import requests import json from bs4 import BeautifulSoup url = "https://exam.detr.top/exam-b/movies" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } resp = requests.get(url, headers=headers) resp.encoding = resp.apparent_encoding html_source = resp.text with open("movies.html", "w", encoding="utf-8") as f: f.write(html_source) print("已保存原始网页:movies.html") soup = BeautifulSoup(html_source, "html.parser") code_text = soup.find("code").get_text(strip=True) data_id = code_text.split(":")[-1].strip() table_rows = soup.find("table").find_all("tr")[1:] movie_list = [] for row in table_rows: tds = row.find_all("td") movie_info = { "id": int(tds[0].text), "title": tds[1].text, "director": tds[2].text, "year": int(tds[3].text), "rating": float(tds[4].text), "duration": int(tds[5].text), "genre": tds[6].text, "actors_count": int(tds[7].text) } movie_list.append(movie_info) # 组装json总数据 json_data = { "data_code": data_id, "movies": movie_list } # 保存movies.json with open("movies.json", "w", encoding="utf-8") as f: json.dump(json_data, f, ensure_ascii=False, indent=4) print("已保存电影数据:movies.json") # ====================== 第2题:读取movies.json进行数据分析 ====================== # 读取json文件 with open("movies.json", "r", encoding="utf-8") as f: load_data = json.load(f) movies = load_data["movies"] # ① 找出评分最高、最低电影 sorted_by_rating = sorted(movies, key=lambda x: x["rating"]) min_movie = sorted_by_rating[0] max_movie = sorted_by_rating[-1] print("\n===== ① 评分最高/最低电影 =====") print(f"评分最低:{min_movie['title']} {min_movie['rating']}") print(f"评分最高:{max_movie['title']} {max_movie['rating']}") # ② 统计各类型电影数量 genre_count = {} for m in movies: g = m["genre"] genre_count[g] = genre_count.get(g, 0) + 1 print("\n===== ② 各类型电影数量 =====") print(genre_count) # ③ 统计各导演电影数量 director_count = {} for m in movies: d = m["director"] director_count[d] = director_count.get(d, 0) + 1 print("\n===== ③ 各导演电影数量 =====") print(director_count) # ④ 统计2020年(含)以后上映电影数量 cnt_after_2020 = 0 for m in movies: if m["year"] >= 2020: cnt_after_2020 += 1 print("\n===== ④ 2020年(含)后上映电影总数 =====") print(cnt_after_2020)