40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
import requests
|
||
import json
|
||
|
||
# 1. 配置请求头(测试头,满足题目要求)
|
||
headers = {
|
||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||
"Referer": "https://exam.detr.top/"
|
||
}
|
||
|
||
url = "https://exam.detr.top/exam-b/movies"
|
||
|
||
# 2. 一次性请求获取全部数据(题目要求单次抓取)
|
||
resp = requests.get(url, headers=headers)
|
||
resp.raise_for_status() # 捕获请求异常
|
||
|
||
# 保存原始网页源码到 movies.html
|
||
with open("movies.html", "w", encoding="utf-8") as f:
|
||
f.write(resp.text)
|
||
|
||
# 解析接口返回的json数据
|
||
movie_data = resp.json()
|
||
|
||
# 筛选全部10部电影,校验字段:id, title, director, year, rating, duration, genre, actors_count
|
||
valid_movies = []
|
||
for item in movie_data:
|
||
needed_keys = ["id", "title", "director", "year", "rating", "duration", "genre", "actors_count"]
|
||
# 只保留包含全部要求键的电影
|
||
if all(k in item for k in needed_keys):
|
||
valid_movies.append(item)
|
||
|
||
# 保存电影数据到 movies.json
|
||
with open("movies.json", "w", encoding="utf-8") as f:
|
||
json.dump(valid_movies, f, ensure_ascii=False, indent=2)
|
||
|
||
print("抓取完成:已生成 movies.html 和 movies.json")
|
||
print(f"共抓取到 {len(valid_movies)} 部电影")
|
||
|
||
|