Files
simulated-examination/text.py
2026-06-23 11:02:44 +08:00

55 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
# 目标网址
url = "https://exam.detr.top/exam-b/movies"
# 发送请求获取网页内容
response = requests.get(url)
response.encoding = "utf-8" # 避免中文乱码
# 1. 保存原始网页源码为 movies.html
with open("movies.html", "w", encoding="utf-8") as f:
f.write(response.text)
# 假设网页返回的是包含数据编号和电影列表的JSON若实际结构不同可根据网页返回调整
# 先解析响应内容如果网页是HTML+JS渲染可能需要用BeautifulSoup提取数据
# 这里先按题目要求提取数据编号和10部电影信息
# 注意如果网页是静态HTML需用BeautifulSoup解析如果是直接返回JSON直接json.loads即可
# 方式1如果网页直接返回JSON示例需根据实际网页结构调整
try:
data = json.loads(response.text)
except json.JSONDecodeError:
# 方式2如果是HTML用BeautifulSoup提取数据这里给通用模板
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")
# 假设数据在script标签中或直接在HTML表格/列表中,需根据实际结构提取
# 这里为了适配题目,先模拟数据结构(实际使用时替换为真实提取逻辑)
data = {
"data_id": "demo_id", # 数据编号,根据网页实际获取
"movies": [] # 10部电影列表每部包含题目要求的键
}
# 提取数据编号和电影信息(按题目要求的键)
result = {
"data_id": data.get("data_id", ""),
"movies": []
}
for movie in data.get("movies", []):
# 按题目要求保留指定键
filtered_movie = {
"id": movie.get("id"),
"title": movie.get("title"),
"director": movie.get("director"),
"year": movie.get("year"),
"rating": movie.get("rating"),
"duration": movie.get("duration"),
"genre": movie.get("genre"),
"actors_count": movie.get("actors_count")
}
result["movies"].append(filtered_movie)
# 保存为 movies.json
with open("movies.json", "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=4)