Files

40 lines
1.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
# 1. 配置请求头(测试头,满足题目要求)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept-Language": "zh-CN,zh;q=0.9",
"Referer": "https://exam.detr.top/"
}
url = "https://exam.detr.top/exam-b/movies"
# 2. 一次性请求获取全部数据(题目要求单次抓取)
resp = requests.get(url, headers=headers)
resp.raise_for_status() # 捕获请求异常
# 保存原始网页源码到 movies.html
with open("movies.html", "w", encoding="utf-8") as f:
f.write(resp.text)
# 解析接口返回的json数据
movie_data = resp.json()
# 筛选全部10部电影校验字段id, title, director, year, rating, duration, genre, actors_count
valid_movies = []
for item in movie_data:
needed_keys = ["id", "title", "director", "year", "rating", "duration", "genre", "actors_count"]
# 只保留包含全部要求键的电影
if all(k in item for k in needed_keys):
valid_movies.append(item)
# 保存电影数据到 movies.json
with open("movies.json", "w", encoding="utf-8") as f:
json.dump(valid_movies, f, ensure_ascii=False, indent=2)
print("抓取完成:已生成 movies.html 和 movies.json")
print(f"共抓取到 {len(valid_movies)} 部电影")