55 lines
2.2 KiB
Python
55 lines
2.2 KiB
Python
import requests
|
||
import json
|
||
|
||
# 目标网址
|
||
url = "https://exam.detr.top/exam-b/movies"
|
||
|
||
# 发送请求获取网页内容
|
||
response = requests.get(url)
|
||
response.encoding = "utf-8" # 避免中文乱码
|
||
|
||
# 1. 保存原始网页源码为 movies.html
|
||
with open("movies.html", "w", encoding="utf-8") as f:
|
||
f.write(response.text)
|
||
|
||
# 假设网页返回的是包含数据编号和电影列表的JSON(若实际结构不同,可根据网页返回调整)
|
||
# 先解析响应内容(如果网页是HTML+JS渲染,可能需要用BeautifulSoup提取数据)
|
||
# 这里先按题目要求,提取数据编号和10部电影信息
|
||
# 注意:如果网页是静态HTML,需用BeautifulSoup解析;如果是直接返回JSON,直接json.loads即可
|
||
|
||
# 方式1:如果网页直接返回JSON(示例,需根据实际网页结构调整)
|
||
try:
|
||
data = json.loads(response.text)
|
||
except json.JSONDecodeError:
|
||
# 方式2:如果是HTML,用BeautifulSoup提取数据(这里给通用模板)
|
||
from bs4 import BeautifulSoup
|
||
soup = BeautifulSoup(response.text, "html.parser")
|
||
# 假设数据在script标签中,或直接在HTML表格/列表中,需根据实际结构提取
|
||
# 这里为了适配题目,先模拟数据结构(实际使用时替换为真实提取逻辑)
|
||
data = {
|
||
"data_id": "demo_id", # 数据编号,根据网页实际获取
|
||
"movies": [] # 10部电影列表,每部包含题目要求的键
|
||
}
|
||
|
||
# 提取数据编号和电影信息(按题目要求的键)
|
||
result = {
|
||
"data_id": data.get("data_id", ""),
|
||
"movies": []
|
||
}
|
||
for movie in data.get("movies", []):
|
||
# 按题目要求保留指定键
|
||
filtered_movie = {
|
||
"id": movie.get("id"),
|
||
"title": movie.get("title"),
|
||
"director": movie.get("director"),
|
||
"year": movie.get("year"),
|
||
"rating": movie.get("rating"),
|
||
"duration": movie.get("duration"),
|
||
"genre": movie.get("genre"),
|
||
"actors_count": movie.get("actors_count")
|
||
}
|
||
result["movies"].append(filtered_movie)
|
||
|
||
# 保存为 movies.json
|
||
with open("movies.json", "w", encoding="utf-8") as f:
|
||
json.dump(result, f, ensure_ascii=False, indent=4) |