import requests import re import csv import json headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } movie_list = [] for page in range(0, 250, 25): url = f"https://movie.douban.com/top250?start={page}&filter=" response = requests.get(url, headers=headers) html = response.text pattern = re.compile( r'(\d+).*?([^<]+).*?.*?/ (.*?).*?(\d+\.\d+).*?(\d+)人评价', re.S ) results = pattern.findall(html) for res in results: rank = res[0] title = res[1] en_title = res[2].strip() rating = res[3] vote = res[4] movie_dict = { "排名": rank, "电影名": title, "英文名": en_title, "评分": rating, "评价人数": vote } movie_list.append(movie_dict) with open("movies.txt", "w", encoding="utf-8") as f: for movie in movie_list: f.write(f"{movie['排名']}. {movie['电影名']} 评分:{movie['评分']} 评价人数:{movie['评价人数']}\n") print("✅ movies.txt 保存成功(仅电影名+评分+评价人数)") with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f: writer = csv.DictWriter(f, fieldnames=["排名", "电影名", "英文名", "评分", "评价人数"]) writer.writeheader() writer.writerows(movie_list) print("✅ movies.csv 保存成功(完整信息)") with open("movies.json", "w", encoding="utf-8") as f: json.dump(movie_list, f, ensure_ascii=False, indent=2) print("✅ movies.json 保存成功(完整信息)") high_rating_movies = [m for m in movie_list if float(m["评分"]) > 9.5] print("\n🎯 评分高于9.5的电影:") for m in high_rating_movies: print(f"{m['排名']}. {m['电影名']} 评分:{m['评分']} 评价人数:{m['评价人数']}") with open("high_rating_movies.csv", "w", encoding="utf-8-sig", newline="") as f: writer = csv.DictWriter(f, fieldnames=["排名", "电影名", "英文名", "评分", "评价人数"]) writer.writeheader() writer.writerows(high_rating_movies) print("\n✅ high_rating_movies.csv 保存成功(评分>9.5的电影)")