From 6a5448e1e0eaf5b2a71df417b8b3dd46b41a16b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=91=E9=9F=B6=E5=9D=A4?=
 <2509165013@student.example.com>
Date: Thu, 2 Apr 2026 16:02:17 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20/?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 2604022509165013.py | 135 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 2604022509165013.py

diff --git a/2604022509165013.py b/2604022509165013.py
new file mode 100644
index 0000000..42736c6
--- /dev/null
+++ b/2604022509165013.py
@@ -0,0 +1,135 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+import csv
+import os
+
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.8.4472.124 Safari/537.36'
+}
+
+movies = []
+
+for start in range(0, 250, 25):
+    url = f'https://movie.douban.com/top250?start={start}'
+    response = requests.get(url, headers=headers)
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    for item in soup.find_all('div', class_='item'): 
+        rank_tag = item.find('em')
+        rank = rank_tag.text if rank_tag else '未知排名'
+
+        title_tag = item.find('span', class_='title')
+        if title_tag:
+            title = title_tag.text
+        else:
+            title = "未找到标题"
+            print(f"在这个item里没找到标题:{item}")
+
+        other_tag = item.find('span', class_='other')
+        other_title = other_tag.text.strip() if other_tag else ""
+        
+        if other_title.startswith('/'):
+            other_title = other_title[1:].strip()
+
+        rating_tag = item.find('span', class_='rating_num')
+        rating = rating_tag.text if rating_tag else "未知评分"
+
+        inq_tag = item.find('span', class_='inq')
+        inq = inq_tag.text.strip() if inq_tag else ""
+
+        playable_tag = item.find('span', class_='playable')
+        if playable_tag:
+            year_tag = playable_tag.find_previous_sibling('span', class_='year')
+            year = year_tag.text.strip('()') if year_tag else "未知年份"
+        else:
+            year_tag = item.find('span', class_='year')
+            year = year_tag.text.strip('()') if year_tag else "未知年份"
+
+        img_tag = item.find('img')
+        poster_url = img_tag['src'] if img_tag else ""
+
+        movies.append({
+            "rank": int(rank),
+            "title": title,
+            "en_title": other_title,
+            "rating": rating,
+            "quote": inq,
+            "year": year,
+            "poster_url": poster_url
+        })
+        if len(movies) >= 10:
+            break
+    if len(movies) >= 10:
+        break
+
+with open("movies.txt", "w", encoding="utf-8") as f:
+    for movie in movies:
+        f.write(movie["title"] + "\n")
+print(" 已保存：movies.txt")
+
+with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f:
+    writer = csv.writer(f)
+    writer.writerow(["排名", "中文名", "英文名", "评分", "简介", "年份"])
+    for movie in movies:
+        writer.writerow([
+            movie["rank"],
+            movie["title"],
+            movie["en_title"],
+            movie["rating"],
+            movie["quote"],
+            movie["year"]
+        ])
+print(" 已保存：movies.csv")
+
+with open("movies.json", "w", encoding="utf-8") as f:
+    json.dump(movies, f, ensure_ascii=False, indent=4)
+print(" 已保存：movies.json")
+
+high_rating_movies = []
+with open("movies.csv", "r", encoding="utf-8-sig") as f:
+    reader = csv.DictReader(f)
+    for row in reader:
+        if float(row["评分"]) > 9.5:
+            high_rating_movies.append(row)
+
+print("\n=== 评分高于9.5的电影 ===")
+for m in high_rating_movies:
+    print(f"{m['排名']} {m['中文名']} {m['评分']}")
+
+with open("high_rating.csv", "w", encoding="utf-8-sig", newline="") as f:
+    writer = csv.DictWriter(f, fieldnames=high_rating_movies[0].keys())
+    writer.writeheader()
+    writer.writerows(high_rating_movies)
+print(" 已保存：high_rating.csv")
+
+with open("movies.json", "r", encoding="utf-8") as f:
+    movie_data = json.load(f)
+
+ratings = [float(m["rating"]) for m in movie_data]
+avg_rating = sum(ratings) / len(ratings)
+max_rating = max(ratings)
+top_movies = [m for m in movie_data if float(m["rating"]) == max_rating]
+
+print(f"\n=== 统计信息 ===")
+print(f"前10部电影平均分：{avg_rating:.2f}")
+print(f"最高评分：{max_rating}")
+for m in top_movies:
+    print(f"评分最高的电影：{m['title']}")
+
+poster_folder = "movie_posters"
+os.makedirs(poster_folder, exist_ok=True)
+
+for movie in movies:
+    try:
+        if movie["poster_url"]:
+            img_resp = requests.get(movie["poster_url"], headers=headers)
+           
+            filename = f"{poster_folder}/{movie['rank']}_{movie['title']}.jpg"
+            with open(filename, "wb") as f:
+                f.write(img_resp.content)
+            print(f" 已下载海报：{movie['title']}")
+    except Exception as e:
+        print(f" 海报下载失败：{movie['title']}，错误：{e}")
+
+print("\n 所有任务全部完成！")
\ No newline at end of file