From 26b5a6f628c376c8a1ec67435b43a43c76e68ec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=B1=BF=E8=B1=AA?= <2509165005@student.example.com> Date: Tue, 23 Jun 2026 11:20:09 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- movie.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 movie.py diff --git a/movie.py b/movie.py new file mode 100644 index 0000000..e887138 --- /dev/null +++ b/movie.py @@ -0,0 +1,87 @@ +import requests +import json +from bs4 import BeautifulSoup + +url = "https://exam.detr.top/exam-b/movies" +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +} + +resp = requests.get(url, headers=headers) +resp.encoding = resp.apparent_encoding +html_source = resp.text + +with open("movies.html", "w", encoding="utf-8") as f: + f.write(html_source) +print("已保存原始网页:movies.html") + +soup = BeautifulSoup(html_source, "html.parser") + +code_text = soup.find("code").get_text(strip=True) +data_id = code_text.split(":")[-1].strip() + +table_rows = soup.find("table").find_all("tr")[1:] +movie_list = [] + +for row in table_rows: + tds = row.find_all("td") + movie_info = { + "id": int(tds[0].text), + "title": tds[1].text, + "director": tds[2].text, + "year": int(tds[3].text), + "rating": float(tds[4].text), + "duration": int(tds[5].text), + "genre": tds[6].text, + "actors_count": int(tds[7].text) + } + movie_list.append(movie_info) + +# 组装json总数据 +json_data = { + "data_code": data_id, + "movies": movie_list +} + +# 保存movies.json +with open("movies.json", "w", encoding="utf-8") as f: + json.dump(json_data, f, ensure_ascii=False, indent=4) +print("已保存电影数据:movies.json") + +# ====================== 第2题:读取movies.json进行数据分析 ====================== +# 读取json文件 +with open("movies.json", "r", encoding="utf-8") as f: + load_data = json.load(f) +movies = load_data["movies"] + +# ① 找出评分最高、最低电影 +sorted_by_rating = sorted(movies, key=lambda x: x["rating"]) +min_movie = sorted_by_rating[0] +max_movie = sorted_by_rating[-1] +print("\n===== ① 评分最高/最低电影 =====") +print(f"评分最低:{min_movie['title']} {min_movie['rating']}") +print(f"评分最高:{max_movie['title']} {max_movie['rating']}") + +# ② 统计各类型电影数量 +genre_count = {} +for m in movies: + g = m["genre"] + genre_count[g] = genre_count.get(g, 0) + 1 +print("\n===== ② 各类型电影数量 =====") +print(genre_count) + +# ③ 统计各导演电影数量 +director_count = {} +for m in movies: + d = m["director"] + director_count[d] = director_count.get(d, 0) + 1 +print("\n===== ③ 各导演电影数量 =====") +print(director_count) + +# ④ 统计2020年(含)以后上映电影数量 +cnt_after_2020 = 0 +for m in movies: + if m["year"] >= 2020: + cnt_after_2020 += 1 +print("\n===== ④ 2020年(含)后上映电影总数 =====") +print(cnt_after_2020) \ No newline at end of file