From 4398d1ee788109cd99a3bdb22cc1e1891399fdb0 Mon Sep 17 00:00:00 2001
From: 2509165025 <2509165025@student.edu.cn>
Date: Tue, 31 Mar 2026 11:27:41 +0800
Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BD=9C=E4=B8=9Adouban250?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 250json.py            |  0
 douban.txt/douban.txt | 67 +++++++++++++++++++++++++++++++++++++++++++
 douban3.31 .py        |  0
 3 files changed, 67 insertions(+)
 create mode 100644 250json.py
 create mode 100644 douban.txt/douban.txt
 create mode 100644 douban3.31 .py

diff --git a/250json.py b/250json.py
new file mode 100644
index 0000000..e69de29
diff --git a/douban.txt/douban.txt b/douban.txt/douban.txt
new file mode 100644
index 0000000..db0a605
--- /dev/null
+++ b/douban.txt/douban.txt
@@ -0,0 +1,67 @@
+import requests
+from bs4 import BeautifulSoup
+import time
+
+headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+}
+
+all_movies = []
+
+for page in range(10):
+    start = page * 25
+    url = f"https://movie.douban.com/top250?start={start}&filter="
+
+    print(f"正在爬第 {page+1} 页 …")
+    resp = requests.get(url, headers=headers)
+    soup = BeautifulSoup(resp.text, "html.parser")
+
+    items = soup.find_all("div", class_="item")
+
+    for item in items:
+        rank = item.find("em").text
+        title = item.find("span", class_="title").t
+        score = item.find("span", class_="rating_num").text
+
+        info = item.find("div", class_="bd").p.text.strip()
+        lines = [line.strip() for line in info.split("\n") if line.strip()]
+
+        director_line = lines[0]
+        if "导演:" in director_line:
+            director = director_line.split("导演:")[1].split("主演:")[0].strip()
+        else:
+            director = "未知"
+
+        if len(lines) >= 2:
+            year_area_genre = lines[1].split("/")
+            year = year_area_genre[0].strip() if len(year_area_genre) > 0 else "未知"
+            area = year_area_genre[1].strip() if len(year_area_genre) > 1 else "未知"
+            genre = year_area_genre[2].strip() if len(year_area_genre) > 2 else "未知"
+        else:
+            year = area = genre = "未知"
+
+        movie = {
+            "排名": rank,
+            "片名": title,
+            "导演": director,
+            "年份": year,
+            "地区": area,
+            "类型": genre,
+            "评分": score
+        }
+        all_movies.append(movie)
+
+    time.sleep(1)  
+
+with open("douban_top250.txt", "w", encoding="utf-8") as f:
+    for m in all_movies:
+        f.write(f"排名：{m['排名']}\n")
+        f.write(f"片名：{m['片名']}\n")
+        f.write(f"导演：{m['导演']}\n")
+        f.write(f"年份：{m['年份']}\n")
+        f.write(f"地区：{m['地区']}\n")
+        f.write(f"类型：{m['类型']}\n")
+        f.write(f"评分：{m['评分']}\n")
+        f.write("-" * 50 + "\n")
+
+print("全部爬完！已保存到 douban_top250.txt")
\ No newline at end of file
diff --git a/douban3.31 .py b/douban3.31 .py
new file mode 100644
index 0000000..e69de29