52 lines
2.4 KiB
Python
52 lines
2.4 KiB
Python
import requests
|
|
import re
|
|
import csv
|
|
import json
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
}
|
|
movie_list = []
|
|
for page in range(0, 250, 25):
|
|
url = f"https://movie.douban.com/top250?start={page}&filter="
|
|
response = requests.get(url, headers=headers)
|
|
html = response.text
|
|
pattern = re.compile(
|
|
r'<em class="">(\d+)</em>.*?<span class="title">([^<]+)</span>.*?<span class="other">.*?/ (.*?)</span>.*?<span class="rating_num" property="v:average">(\d+\.\d+)</span>.*?<span>(\d+)人评价</span>',
|
|
re.S
|
|
)
|
|
results = pattern.findall(html)
|
|
for res in results:
|
|
rank = res[0]
|
|
title = res[1]
|
|
en_title = res[2].strip()
|
|
rating = res[3]
|
|
vote = res[4]
|
|
movie_dict = {
|
|
"排名": rank,
|
|
"电影名": title,
|
|
"英文名": en_title,
|
|
"评分": rating,
|
|
"评价人数": vote
|
|
}
|
|
movie_list.append(movie_dict)
|
|
with open("movies.txt", "w", encoding="utf-8") as f:
|
|
for movie in movie_list:
|
|
f.write(f"{movie['排名']}. {movie['电影名']} 评分:{movie['评分']} 评价人数:{movie['评价人数']}\n")
|
|
print("✅ movies.txt 保存成功(仅电影名+评分+评价人数)")
|
|
with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f:
|
|
writer = csv.DictWriter(f, fieldnames=["排名", "电影名", "英文名", "评分", "评价人数"])
|
|
writer.writeheader()
|
|
writer.writerows(movie_list)
|
|
print("✅ movies.csv 保存成功(完整信息)")
|
|
with open("movies.json", "w", encoding="utf-8") as f:
|
|
json.dump(movie_list, f, ensure_ascii=False, indent=2)
|
|
print("✅ movies.json 保存成功(完整信息)")
|
|
high_rating_movies = [m for m in movie_list if float(m["评分"]) > 9.5]
|
|
print("\n🎯 评分高于9.5的电影:")
|
|
for m in high_rating_movies:
|
|
print(f"{m['排名']}. {m['电影名']} 评分:{m['评分']} 评价人数:{m['评价人数']}")
|
|
with open("high_rating_movies.csv", "w", encoding="utf-8-sig", newline="") as f:
|
|
writer = csv.DictWriter(f, fieldnames=["排名", "电影名", "英文名", "评分", "评价人数"])
|
|
writer.writeheader()
|
|
writer.writerows(high_rating_movies)
|
|
print("\n✅ high_rating_movies.csv 保存成功(评分>9.5的电影)") |