From 8c1d7964dc134a2dca3aac2663db1b3f36a057f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E7=BA=A2=E8=BE=89?= <2509165036@student.example.com> Date: Tue, 23 Jun 2026 11:16:23 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20q2=5F1=5Fcrawler?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- q2_1_crawler/movies.html | 152 +++++++++++++++++++++++++++++++++++++++ q2_1_crawler/movies.json | 4 ++ q2_1_crawler/q2_2.py | 45 ++++++++++++ 3 files changed, 201 insertions(+) create mode 100644 q2_1_crawler/movies.html create mode 100644 q2_1_crawler/movies.json create mode 100644 q2_1_crawler/q2_2.py diff --git a/q2_1_crawler/movies.html b/q2_1_crawler/movies.html new file mode 100644 index 0000000..aa3d3ba --- /dev/null +++ b/q2_1_crawler/movies.html @@ -0,0 +1,152 @@ + + + + + + + + 电影列表 + + + +

电影列表

+

数据编号:B-20260623-4415

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
编号电影名导演上映年份评分时长(分钟)类型主演数
1放牛班的春天Frank Darabont19946.5141剧情3
2千与千寻陈凯歌20227.2160科幻5
3肖申克的救赎Robert Zemeckis20127.694悬疑4
4星际穿越James Cameron20039.1157喜剧2
5忠犬八公的故事宫崎骏20237.5164喜剧5
6盗梦空间Christopher Nolan19999.198爱情2
7阿甘正传Lasse Hallström20009.3153动画4
8泰坦尼克号Rajkumar Hirani20168.3143剧情3
9霸王别姬Christophe Barratier20027.4149悬疑4
10三傻大闹宝莱坞Christopher Nolan20076.8136科幻2
+ + \ No newline at end of file diff --git a/q2_1_crawler/movies.json b/q2_1_crawler/movies.json new file mode 100644 index 0000000..a5758bd --- /dev/null +++ b/q2_1_crawler/movies.json @@ -0,0 +1,4 @@ +{ + "actors_count": "0", + "movies": [] +} \ No newline at end of file diff --git a/q2_1_crawler/q2_2.py b/q2_1_crawler/q2_2.py new file mode 100644 index 0000000..1ddf6da --- /dev/null +++ b/q2_1_crawler/q2_2.py @@ -0,0 +1,45 @@ +import json + +# 读取爬取结果 +with open("movies.json", "r", encoding="utf-8") as f: + data = json.load(f) +movies = data["movies"] + +# 容错判断,避免无数据时报错 +if not movies: + print("错误:无电影数据,请先运行爬虫q2_1.py!") +else: + # ① 最高、最低评分电影 + sorted_movies = sorted(movies, key=lambda x: x["rating"]) + min_m = sorted_movies[0] + max_m = sorted_movies[-1] + print("① 评分极值:") + print(f"最低分:{min_m['title']} {min_m['rating']}") + print(f"最高分:{max_m['title']} {max_m['rating']}") + + # ② 各类型数量,字典输出 + genre_stat = {} + for m in movies: + g = m["genre"] + genre_stat[g] = genre_stat.get(g, 0) + 1 + print("\n② 电影类型统计字典:") + print(genre_stat) + + # ③ 各导演影片数量,字典输出 + dir_stat = {} + for m in movies: + d = m["director"] + dir_stat[d] = dir_stat.get(d, 0) + 1 + print("\n③ 导演影片数量统计字典:") + print(dir_stat) + + # ④ 2020年(含)后上映总数 + count_2020 = 0 + for m in movies: + try: + year = int(m["year"]) + if year >= 2020: + count_2020 += 1 + except ValueError: + continue + print(f"\n④ 2020年(含)后上映电影总数:{count_2020}") \ No newline at end of file