上传文件至 /
This commit is contained in:
135
2604022509165013.py
Normal file
135
2604022509165013.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import csv
|
||||
import os
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.8.4472.124 Safari/537.36'
|
||||
}
|
||||
|
||||
movies = []
|
||||
|
||||
for start in range(0, 250, 25):
|
||||
url = f'https://movie.douban.com/top250?start={start}'
|
||||
response = requests.get(url, headers=headers)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
for item in soup.find_all('div', class_='item'):
|
||||
rank_tag = item.find('em')
|
||||
rank = rank_tag.text if rank_tag else '未知排名'
|
||||
|
||||
title_tag = item.find('span', class_='title')
|
||||
if title_tag:
|
||||
title = title_tag.text
|
||||
else:
|
||||
title = "未找到标题"
|
||||
print(f"在这个item里没找到标题:{item}")
|
||||
|
||||
other_tag = item.find('span', class_='other')
|
||||
other_title = other_tag.text.strip() if other_tag else ""
|
||||
|
||||
if other_title.startswith('/'):
|
||||
other_title = other_title[1:].strip()
|
||||
|
||||
rating_tag = item.find('span', class_='rating_num')
|
||||
rating = rating_tag.text if rating_tag else "未知评分"
|
||||
|
||||
inq_tag = item.find('span', class_='inq')
|
||||
inq = inq_tag.text.strip() if inq_tag else ""
|
||||
|
||||
playable_tag = item.find('span', class_='playable')
|
||||
if playable_tag:
|
||||
year_tag = playable_tag.find_previous_sibling('span', class_='year')
|
||||
year = year_tag.text.strip('()') if year_tag else "未知年份"
|
||||
else:
|
||||
year_tag = item.find('span', class_='year')
|
||||
year = year_tag.text.strip('()') if year_tag else "未知年份"
|
||||
|
||||
img_tag = item.find('img')
|
||||
poster_url = img_tag['src'] if img_tag else ""
|
||||
|
||||
movies.append({
|
||||
"rank": int(rank),
|
||||
"title": title,
|
||||
"en_title": other_title,
|
||||
"rating": rating,
|
||||
"quote": inq,
|
||||
"year": year,
|
||||
"poster_url": poster_url
|
||||
})
|
||||
if len(movies) >= 10:
|
||||
break
|
||||
if len(movies) >= 10:
|
||||
break
|
||||
|
||||
with open("movies.txt", "w", encoding="utf-8") as f:
|
||||
for movie in movies:
|
||||
f.write(movie["title"] + "\n")
|
||||
print(" 已保存:movies.txt")
|
||||
|
||||
with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(["排名", "中文名", "英文名", "评分", "简介", "年份"])
|
||||
for movie in movies:
|
||||
writer.writerow([
|
||||
movie["rank"],
|
||||
movie["title"],
|
||||
movie["en_title"],
|
||||
movie["rating"],
|
||||
movie["quote"],
|
||||
movie["year"]
|
||||
])
|
||||
print(" 已保存:movies.csv")
|
||||
|
||||
with open("movies.json", "w", encoding="utf-8") as f:
|
||||
json.dump(movies, f, ensure_ascii=False, indent=4)
|
||||
print(" 已保存:movies.json")
|
||||
|
||||
high_rating_movies = []
|
||||
with open("movies.csv", "r", encoding="utf-8-sig") as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
if float(row["评分"]) > 9.5:
|
||||
high_rating_movies.append(row)
|
||||
|
||||
print("\n=== 评分高于9.5的电影 ===")
|
||||
for m in high_rating_movies:
|
||||
print(f"{m['排名']} {m['中文名']} {m['评分']}")
|
||||
|
||||
with open("high_rating.csv", "w", encoding="utf-8-sig", newline="") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=high_rating_movies[0].keys())
|
||||
writer.writeheader()
|
||||
writer.writerows(high_rating_movies)
|
||||
print(" 已保存:high_rating.csv")
|
||||
|
||||
with open("movies.json", "r", encoding="utf-8") as f:
|
||||
movie_data = json.load(f)
|
||||
|
||||
ratings = [float(m["rating"]) for m in movie_data]
|
||||
avg_rating = sum(ratings) / len(ratings)
|
||||
max_rating = max(ratings)
|
||||
top_movies = [m for m in movie_data if float(m["rating"]) == max_rating]
|
||||
|
||||
print(f"\n=== 统计信息 ===")
|
||||
print(f"前10部电影平均分:{avg_rating:.2f}")
|
||||
print(f"最高评分:{max_rating}")
|
||||
for m in top_movies:
|
||||
print(f"评分最高的电影:{m['title']}")
|
||||
|
||||
poster_folder = "movie_posters"
|
||||
os.makedirs(poster_folder, exist_ok=True)
|
||||
|
||||
for movie in movies:
|
||||
try:
|
||||
if movie["poster_url"]:
|
||||
img_resp = requests.get(movie["poster_url"], headers=headers)
|
||||
|
||||
filename = f"{poster_folder}/{movie['rank']}_{movie['title']}.jpg"
|
||||
with open(filename, "wb") as f:
|
||||
f.write(img_resp.content)
|
||||
print(f" 已下载海报:{movie['title']}")
|
||||
except Exception as e:
|
||||
print(f" 海报下载失败:{movie['title']},错误:{e}")
|
||||
|
||||
print("\n 所有任务全部完成!")
|
||||
Reference in New Issue
Block a user