Files
simulated-examination/movie.py
2026-06-23 11:20:09 +08:00

87 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
from bs4 import BeautifulSoup
url = "https://exam.detr.top/exam-b/movies"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
resp = requests.get(url, headers=headers)
resp.encoding = resp.apparent_encoding
html_source = resp.text
with open("movies.html", "w", encoding="utf-8") as f:
f.write(html_source)
print("已保存原始网页movies.html")
soup = BeautifulSoup(html_source, "html.parser")
code_text = soup.find("code").get_text(strip=True)
data_id = code_text.split(":")[-1].strip()
table_rows = soup.find("table").find_all("tr")[1:]
movie_list = []
for row in table_rows:
tds = row.find_all("td")
movie_info = {
"id": int(tds[0].text),
"title": tds[1].text,
"director": tds[2].text,
"year": int(tds[3].text),
"rating": float(tds[4].text),
"duration": int(tds[5].text),
"genre": tds[6].text,
"actors_count": int(tds[7].text)
}
movie_list.append(movie_info)
# 组装json总数据
json_data = {
"data_code": data_id,
"movies": movie_list
}
# 保存movies.json
with open("movies.json", "w", encoding="utf-8") as f:
json.dump(json_data, f, ensure_ascii=False, indent=4)
print("已保存电影数据movies.json")
# ====================== 第2题读取movies.json进行数据分析 ======================
# 读取json文件
with open("movies.json", "r", encoding="utf-8") as f:
load_data = json.load(f)
movies = load_data["movies"]
# ① 找出评分最高、最低电影
sorted_by_rating = sorted(movies, key=lambda x: x["rating"])
min_movie = sorted_by_rating[0]
max_movie = sorted_by_rating[-1]
print("\n===== ① 评分最高/最低电影 =====")
print(f"评分最低:{min_movie['title']} {min_movie['rating']}")
print(f"评分最高:{max_movie['title']} {max_movie['rating']}")
# ② 统计各类型电影数量
genre_count = {}
for m in movies:
g = m["genre"]
genre_count[g] = genre_count.get(g, 0) + 1
print("\n===== ② 各类型电影数量 =====")
print(genre_count)
# ③ 统计各导演电影数量
director_count = {}
for m in movies:
d = m["director"]
director_count[d] = director_count.get(d, 0) + 1
print("\n===== ③ 各导演电影数量 =====")
print(director_count)
# ④ 统计2020年以后上映电影数量
cnt_after_2020 = 0
for m in movies:
if m["year"] >= 2020:
cnt_after_2020 += 1
print("\n===== ④ 2020年(含)后上映电影总数 =====")
print(cnt_after_2020)