From 26b5a6f628c376c8a1ec67435b43a43c76e68ec3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=B1=BF=E8=B1=AA?=
 <2509165005@student.example.com>
Date: Tue, 23 Jun 2026 11:20:09 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20/?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 movie.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 movie.py

diff --git a/movie.py b/movie.py
new file mode 100644
index 0000000..e887138
--- /dev/null
+++ b/movie.py
@@ -0,0 +1,87 @@
+import requests
+import json
+from bs4 import BeautifulSoup
+
+url = "https://exam.detr.top/exam-b/movies"
+headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+}
+
+resp = requests.get(url, headers=headers)
+resp.encoding = resp.apparent_encoding
+html_source = resp.text
+
+with open("movies.html", "w", encoding="utf-8") as f:
+    f.write(html_source)
+print("已保存原始网页：movies.html")
+
+soup = BeautifulSoup(html_source, "html.parser")
+
+code_text = soup.find("code").get_text(strip=True)
+data_id = code_text.split(":")[-1].strip()
+
+table_rows = soup.find("table").find_all("tr")[1:] 
+movie_list = []
+
+for row in table_rows:
+    tds = row.find_all("td")
+    movie_info = {
+        "id": int(tds[0].text),
+        "title": tds[1].text,
+        "director": tds[2].text,
+        "year": int(tds[3].text),
+        "rating": float(tds[4].text),
+        "duration": int(tds[5].text),
+        "genre": tds[6].text,
+        "actors_count": int(tds[7].text)
+    }
+    movie_list.append(movie_info)
+
+# 组装json总数据
+json_data = {
+    "data_code": data_id,
+    "movies": movie_list
+}
+
+# 保存movies.json
+with open("movies.json", "w", encoding="utf-8") as f:
+    json.dump(json_data, f, ensure_ascii=False, indent=4)
+print("已保存电影数据：movies.json")
+
+# ====================== 第2题：读取movies.json进行数据分析 ======================
+# 读取json文件
+with open("movies.json", "r", encoding="utf-8") as f:
+    load_data = json.load(f)
+movies = load_data["movies"]
+
+# ① 找出评分最高、最低电影
+sorted_by_rating = sorted(movies, key=lambda x: x["rating"])
+min_movie = sorted_by_rating[0]
+max_movie = sorted_by_rating[-1]
+print("\n===== ① 评分最高/最低电影 =====")
+print(f"评分最低：{min_movie['title']} {min_movie['rating']}")
+print(f"评分最高：{max_movie['title']} {max_movie['rating']}")
+
+# ② 统计各类型电影数量
+genre_count = {}
+for m in movies:
+    g = m["genre"]
+    genre_count[g] = genre_count.get(g, 0) + 1
+print("\n===== ② 各类型电影数量 =====")
+print(genre_count)
+
+# ③ 统计各导演电影数量
+director_count = {}
+for m in movies:
+    d = m["director"]
+    director_count[d] = director_count.get(d, 0) + 1
+print("\n===== ③ 各导演电影数量 =====")
+print(director_count)
+
+# ④ 统计2020年（含）以后上映电影数量
+cnt_after_2020 = 0
+for m in movies:
+    if m["year"] >= 2020:
+        cnt_after_2020 += 1
+print("\n===== ④ 2020年(含)后上映电影总数 =====")
+print(cnt_after_2020)
\ No newline at end of file