diff --git a/q2_1_crawler/movies.html b/q2_1_crawler/movies.html
new file mode 100644
index 0000000..ff3d403
--- /dev/null
+++ b/q2_1_crawler/movies.html
@@ -0,0 +1,152 @@
+
+
+
+
+
+
+
+ 电影列表
+
+
+
+ 电影列表
+ 数据编号:B-20260623-4047
+
+
+
+
+ | 编号 |
+ 电影名 |
+ 导演 |
+ 上映年份 |
+ 评分 |
+ 时长(分钟) |
+ 类型 |
+ 主演数 |
+
+
+
+
+
+ | 1 |
+ 盗梦空间 |
+ Frank Darabont |
+ 1996 |
+ 6.2 |
+ 109 |
+ 爱情 |
+ 4 |
+
+
+
+ | 2 |
+ 阿甘正传 |
+ 陈凯歌 |
+ 1994 |
+ 8.1 |
+ 152 |
+ 喜剧 |
+ 2 |
+
+
+
+ | 3 |
+ 千与千寻 |
+ Robert Zemeckis |
+ 2000 |
+ 7.9 |
+ 97 |
+ 科幻 |
+ 4 |
+
+
+
+ | 4 |
+ 泰坦尼克号 |
+ James Cameron |
+ 2000 |
+ 7.1 |
+ 104 |
+ 喜剧 |
+ 2 |
+
+
+
+ | 5 |
+ 肖申克的救赎 |
+ 宫崎骏 |
+ 2015 |
+ 6.6 |
+ 106 |
+ 爱情 |
+ 2 |
+
+
+
+ | 6 |
+ 放牛班的春天 |
+ Christopher Nolan |
+ 2000 |
+ 7.1 |
+ 121 |
+ 喜剧 |
+ 3 |
+
+
+
+ | 7 |
+ 星际穿越 |
+ Lasse Hallström |
+ 1991 |
+ 6.3 |
+ 94 |
+ 剧情 |
+ 3 |
+
+
+
+ | 8 |
+ 霸王别姬 |
+ Rajkumar Hirani |
+ 2016 |
+ 7.6 |
+ 128 |
+ 动画 |
+ 2 |
+
+
+
+ | 9 |
+ 忠犬八公的故事 |
+ Christophe Barratier |
+ 1995 |
+ 8.4 |
+ 143 |
+ 科幻 |
+ 3 |
+
+
+
+ | 10 |
+ 三傻大闹宝莱坞 |
+ Christopher Nolan |
+ 2019 |
+ 6.4 |
+ 137 |
+ 剧情 |
+ 2 |
+
+
+
+
+
+
\ No newline at end of file
diff --git a/q2_1_crawler/movies.json b/q2_1_crawler/movies.json
new file mode 100644
index 0000000..2e61d23
--- /dev/null
+++ b/q2_1_crawler/movies.json
@@ -0,0 +1,4 @@
+{
+ "data_id": "unknown",
+ "movies": []
+}
\ No newline at end of file
diff --git a/q2_1_crawler/q2_1.py b/q2_1_crawler/q2_1.py
new file mode 100644
index 0000000..08f1cc1
--- /dev/null
+++ b/q2_1_crawler/q2_1.py
@@ -0,0 +1,56 @@
+import requests
+import json
+from bs4 import BeautifulSoup
+
+def crawl_movies():
+ url = "https://exam.detr.top/exam-b/movies"
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+ }
+
+ resp = requests.get(url, headers=headers)
+ resp.encoding = 'utf-8' # 根据实际编码调整
+ html = resp.text
+
+ # 保存原始网页源码
+ with open("movies.html", "w", encoding="utf-8") as f:
+ f.write(html)
+
+ # 解析 HTML 提取数据
+ soup = BeautifulSoup(html, 'html.parser')
+
+ # 示例:数据编号(请根据实际网页结构调整选择器)
+ data_id_elem = soup.find('span', id='data-id')
+ data_id = data_id_elem.text.strip() if data_id_elem else "unknown"
+
+ # 示例:电影列表(请根据实际网页结构调整选择器)
+ movies = []
+ movie_elements = soup.find_all('div', class_='movie') # 示例选择器
+
+ for elem in movie_elements:
+ # 以下字段提取均为示例,需实际调整
+ movie = {
+ 'id': elem.get('data-id') or elem.find('span', class_='id').text.strip(),
+ 'title': elem.find('h3', class_='title').text.strip(),
+ 'director': elem.find('span', class_='director').text.strip(),
+ 'year': int(elem.find('span', class_='year').text.strip()),
+ 'rating': float(elem.find('span', class_='rating').text.strip()),
+ 'duration': int(elem.find('span', class_='duration').text.strip().replace(' min', '')),
+ 'genre': [g.text.strip() for g in elem.find_all('span', class_='genre')],
+ 'actors_count': int(elem.find('span', class_='actors_count').text.strip())
+ }
+ movies.append(movie)
+
+ # 保存 JSON
+ result = {
+ "data_id": data_id,
+ "movies": movies
+ }
+ with open("movies.json", "w", encoding="utf-8") as f:
+ json.dump(result, f, ensure_ascii=False, indent=2)
+
+ print("爬取完成,已生成 movies.html 和 movies.json")
+ return result
+
+if __name__ == "__main__":
+ crawl_movies()
\ No newline at end of file
diff --git a/q2_1_crawler/q2_2.py b/q2_1_crawler/q2_2.py
new file mode 100644
index 0000000..1b81a38
--- /dev/null
+++ b/q2_1_crawler/q2_2.py
@@ -0,0 +1,31 @@
+import json
+from collections import Counter
+
+def analyze_movies(json_path="movies.json"):
+ with open(json_path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+ movies = data["movies"]
+
+ # ① 评分最高和最低
+ max_movie = max(movies, key=lambda x: x['rating'])
+ min_movie = min(movies, key=lambda x: x['rating'])
+ print(f"最高评分电影:{max_movie['title']},评分:{max_movie['rating']}")
+ print(f"最低评分电影:{min_movie['title']},评分:{min_movie['rating']}")
+
+ # ② 各类型电影数量
+ genre_counter = Counter()
+ for movie in movies:
+ for genre in movie['genre']:
+ genre_counter[genre] += 1
+ print("各类型电影数量:", dict(genre_counter))
+
+ # ③ 各导演电影数量
+ director_counter = Counter(movie['director'] for movie in movies)
+ print("各导演电影数量:", dict(director_counter))
+
+ # ④ 2020年(含)以后上映的电影数量
+ count_2020_plus = sum(1 for movie in movies if movie['year'] >= 2020)
+ print(f"2020年(含)以后上映的电影数量:{count_2020_plus}")
+
+if __name__ == "__main__":
+ analyze_movies()
\ No newline at end of file