上传文件至 /
This commit is contained in:
87
movie.py
Normal file
87
movie.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import requests
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
url = "https://exam.detr.top/exam-b/movies"
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
resp = requests.get(url, headers=headers)
|
||||
resp.encoding = resp.apparent_encoding
|
||||
html_source = resp.text
|
||||
|
||||
with open("movies.html", "w", encoding="utf-8") as f:
|
||||
f.write(html_source)
|
||||
print("已保存原始网页:movies.html")
|
||||
|
||||
soup = BeautifulSoup(html_source, "html.parser")
|
||||
|
||||
code_text = soup.find("code").get_text(strip=True)
|
||||
data_id = code_text.split(":")[-1].strip()
|
||||
|
||||
table_rows = soup.find("table").find_all("tr")[1:]
|
||||
movie_list = []
|
||||
|
||||
for row in table_rows:
|
||||
tds = row.find_all("td")
|
||||
movie_info = {
|
||||
"id": int(tds[0].text),
|
||||
"title": tds[1].text,
|
||||
"director": tds[2].text,
|
||||
"year": int(tds[3].text),
|
||||
"rating": float(tds[4].text),
|
||||
"duration": int(tds[5].text),
|
||||
"genre": tds[6].text,
|
||||
"actors_count": int(tds[7].text)
|
||||
}
|
||||
movie_list.append(movie_info)
|
||||
|
||||
# 组装json总数据
|
||||
json_data = {
|
||||
"data_code": data_id,
|
||||
"movies": movie_list
|
||||
}
|
||||
|
||||
# 保存movies.json
|
||||
with open("movies.json", "w", encoding="utf-8") as f:
|
||||
json.dump(json_data, f, ensure_ascii=False, indent=4)
|
||||
print("已保存电影数据:movies.json")
|
||||
|
||||
# ====================== 第2题:读取movies.json进行数据分析 ======================
|
||||
# 读取json文件
|
||||
with open("movies.json", "r", encoding="utf-8") as f:
|
||||
load_data = json.load(f)
|
||||
movies = load_data["movies"]
|
||||
|
||||
# ① 找出评分最高、最低电影
|
||||
sorted_by_rating = sorted(movies, key=lambda x: x["rating"])
|
||||
min_movie = sorted_by_rating[0]
|
||||
max_movie = sorted_by_rating[-1]
|
||||
print("\n===== ① 评分最高/最低电影 =====")
|
||||
print(f"评分最低:{min_movie['title']} {min_movie['rating']}")
|
||||
print(f"评分最高:{max_movie['title']} {max_movie['rating']}")
|
||||
|
||||
# ② 统计各类型电影数量
|
||||
genre_count = {}
|
||||
for m in movies:
|
||||
g = m["genre"]
|
||||
genre_count[g] = genre_count.get(g, 0) + 1
|
||||
print("\n===== ② 各类型电影数量 =====")
|
||||
print(genre_count)
|
||||
|
||||
# ③ 统计各导演电影数量
|
||||
director_count = {}
|
||||
for m in movies:
|
||||
d = m["director"]
|
||||
director_count[d] = director_count.get(d, 0) + 1
|
||||
print("\n===== ③ 各导演电影数量 =====")
|
||||
print(director_count)
|
||||
|
||||
# ④ 统计2020年(含)以后上映电影数量
|
||||
cnt_after_2020 = 0
|
||||
for m in movies:
|
||||
if m["year"] >= 2020:
|
||||
cnt_after_2020 += 1
|
||||
print("\n===== ④ 2020年(含)后上映电影总数 =====")
|
||||
print(cnt_after_2020)
|
||||
Reference in New Issue
Block a user