上传文件至 q2_1_crawler

2026-06-23 11:18:32 +08:00
parent c1fa4e65df
commit 5a8416c145
2 changed files with 94 additions and 0 deletions
--- a/q2_1_crawler/q2_2.py
+++ b/q2_1_crawler/q2_2.py
@@ -0,0 +1,55 @@
+import json
+from collections import defaultdict
+
+# 读取json文件
+with open("movies.json", "r", encoding="utf-8") as f:
+    movies = json.load(f)
+
+# ① 找出评分最高、最低电影
+def get_rating_extreme():
+    # 按rating排序
+    sorted_movies = sorted(movies, key=lambda x: x["rating"])
+    lowest = sorted_movies[0]
+    highest = sorted_movies[-1]
+    print("=== ① 评分极值 ===")
+    print(f"评分最低电影：{lowest['title']}，评分：{lowest['rating']}")
+    print(f"评分最高电影：{highest['title']}，评分：{highest['rating']}")
+    return highest, lowest
+
+# ② 统计各类型电影数量（genre为列表，拆分统计）
+def count_genre():
+    genre_count = defaultdict(int)
+    for movie in movies:
+        genres = movie["genre"]
+        for g in genres:
+            genre_count[g] += 1
+    print("\n=== ② 各类型电影数量（字典格式）===")
+    print(dict(genre_count))
+    return dict(genre_count)
+
+# ③ 统计各导演电影数量
+def count_director():
+    dir_count = defaultdict(int)
+    for movie in movies:
+        d = movie["director"]
+        dir_count[d] += 1
+    print("\n=== ③ 各导演电影数量（字典格式）===")
+    print(dict(dir_count))
+    return dict(dir_count)
+
+# ④ 统计2020年（含）以后上映电影数量
+def count_after_2020():
+    cnt = 0
+    for movie in movies:
+        if movie["year"] >= 2020:
+            cnt += 1
+    print("\n=== ④ 2020年（含）后上映电影数量 ===")
+    print(f"总数：{cnt}")
+    return cnt
+
+# 执行全部分析逻辑
+if __name__ == "__main__":
+    get_rating_extreme()
+    count_genre()
+    count_director()
+    count_after_2020()