simulated-examination/text.py

import requests
import json

# 目标网址
url = "https://exam.detr.top/exam-b/movies"

# 发送请求获取网页内容
response = requests.get(url)
response.encoding = "utf-8"  # 避免中文乱码

# 1. 保存原始网页源码为 movies.html
with open("movies.html", "w", encoding="utf-8") as f:
    f.write(response.text)

# 假设网页返回的是包含数据编号和电影列表的JSON（若实际结构不同，可根据网页返回调整）
# 先解析响应内容（如果网页是HTML+JS渲染，可能需要用BeautifulSoup提取数据）
# 这里先按题目要求，提取数据编号和10部电影信息
# 注意：如果网页是静态HTML，需用BeautifulSoup解析；如果是直接返回JSON，直接json.loads即可

# 方式1：如果网页直接返回JSON（示例，需根据实际网页结构调整）
try:
    data = json.loads(response.text)
except json.JSONDecodeError:
    # 方式2：如果是HTML，用BeautifulSoup提取数据（这里给通用模板）
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")
    # 假设数据在script标签中，或直接在HTML表格/列表中，需根据实际结构提取
    # 这里为了适配题目，先模拟数据结构（实际使用时替换为真实提取逻辑）
    data = {
        "data_id": "demo_id",  # 数据编号，根据网页实际获取
        "movies": []  # 10部电影列表，每部包含题目要求的键
    }

# 提取数据编号和电影信息（按题目要求的键）
result = {
    "data_id": data.get("data_id", ""),
    "movies": []
}
for movie in data.get("movies", []):
    # 按题目要求保留指定键
    filtered_movie = {
        "id": movie.get("id"),
        "title": movie.get("title"),
        "director": movie.get("director"),
        "year": movie.get("year"),
        "rating": movie.get("rating"),
        "duration": movie.get("duration"),
        "genre": movie.get("genre"),
        "actors_count": movie.get("actors_count")
    }
    result["movies"].append(filtered_movie)

# 保存为 movies.json
with open("movies.json", "w", encoding="utf-8") as f:
    json.dump(result, f, ensure_ascii=False, indent=4)