From 5dce4f34215914b1c809d315116de319c76954a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=96=87=E7=90=B3?= <2509165042@student.example.com> Date: Thu, 2 Apr 2026 16:05:00 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- XWL.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 XWL.py diff --git a/XWL.py b/XWL.py new file mode 100644 index 0000000..a277966 --- /dev/null +++ b/XWL.py @@ -0,0 +1,52 @@ +import requests +import re +import csv +import json +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +} +movie_list = [] +for page in range(0, 250, 25): + url = f"https://movie.douban.com/top250?start={page}&filter=" + response = requests.get(url, headers=headers) + html = response.text + pattern = re.compile( + r'(\d+).*?([^<]+).*?.*?/ (.*?).*?(\d+\.\d+).*?(\d+)人评价', + re.S + ) + results = pattern.findall(html) + for res in results: + rank = res[0] + title = res[1] + en_title = res[2].strip() + rating = res[3] + vote = res[4] + movie_dict = { + "排名": rank, + "电影名": title, + "英文名": en_title, + "评分": rating, + "评价人数": vote + } + movie_list.append(movie_dict) +with open("movies.txt", "w", encoding="utf-8") as f: + for movie in movie_list: + f.write(f"{movie['排名']}. {movie['电影名']} 评分:{movie['评分']} 评价人数:{movie['评价人数']}\n") +print("✅ movies.txt 保存成功(仅电影名+评分+评价人数)") +with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f: + writer = csv.DictWriter(f, fieldnames=["排名", "电影名", "英文名", "评分", "评价人数"]) + writer.writeheader() + writer.writerows(movie_list) +print("✅ movies.csv 保存成功(完整信息)") +with open("movies.json", "w", encoding="utf-8") as f: + json.dump(movie_list, f, ensure_ascii=False, indent=2) +print("✅ movies.json 保存成功(完整信息)") +high_rating_movies = [m for m in movie_list if float(m["评分"]) > 9.5] +print("\n🎯 评分高于9.5的电影:") +for m in high_rating_movies: + print(f"{m['排名']}. {m['电影名']} 评分:{m['评分']} 评价人数:{m['评价人数']}") +with open("high_rating_movies.csv", "w", encoding="utf-8-sig", newline="") as f: + writer = csv.DictWriter(f, fieldnames=["排名", "电影名", "英文名", "评分", "评价人数"]) + writer.writeheader() + writer.writerows(high_rating_movies) +print("\n✅ high_rating_movies.csv 保存成功(评分>9.5的电影)") \ No newline at end of file