import requests import json # 目标网址 url = "https://exam.detr.top/exam-b/movies" # 发送请求获取网页内容 response = requests.get(url) response.encoding = "utf-8" # 避免中文乱码 # 1. 保存原始网页源码为 movies.html with open("movies.html", "w", encoding="utf-8") as f: f.write(response.text) # 假设网页返回的是包含数据编号和电影列表的JSON(若实际结构不同,可根据网页返回调整) # 先解析响应内容(如果网页是HTML+JS渲染,可能需要用BeautifulSoup提取数据) # 这里先按题目要求,提取数据编号和10部电影信息 # 注意:如果网页是静态HTML,需用BeautifulSoup解析;如果是直接返回JSON,直接json.loads即可 # 方式1:如果网页直接返回JSON(示例,需根据实际网页结构调整) try: data = json.loads(response.text) except json.JSONDecodeError: # 方式2:如果是HTML,用BeautifulSoup提取数据(这里给通用模板) from bs4 import BeautifulSoup soup = BeautifulSoup(response.text, "html.parser") # 假设数据在script标签中,或直接在HTML表格/列表中,需根据实际结构提取 # 这里为了适配题目,先模拟数据结构(实际使用时替换为真实提取逻辑) data = { "data_id": "demo_id", # 数据编号,根据网页实际获取 "movies": [] # 10部电影列表,每部包含题目要求的键 } # 提取数据编号和电影信息(按题目要求的键) result = { "data_id": data.get("data_id", ""), "movies": [] } for movie in data.get("movies", []): # 按题目要求保留指定键 filtered_movie = { "id": movie.get("id"), "title": movie.get("title"), "director": movie.get("director"), "year": movie.get("year"), "rating": movie.get("rating"), "duration": movie.get("duration"), "genre": movie.get("genre"), "actors_count": movie.get("actors_count") } result["movies"].append(filtered_movie) # 保存为 movies.json with open("movies.json", "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=4)