From 6e96743a5947c630d7f351b0a5719e946ca41723 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=B0=A4=E4=BC=98?=
 <2509165039@student.example.com>
Date: Wed, 1 Apr 2026 01:04:50 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20/?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 260331-2509165039.py | 62 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 260331-2509165039.py

diff --git a/260331-2509165039.py b/260331-2509165039.py
new file mode 100644
index 0000000..bd832a0
--- /dev/null
+++ b/260331-2509165039.py
@@ -0,0 +1,62 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+import csv
+
+headers = {'User-Agent':'Mozilla/5.0(Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
+
+movies = []
+
+for start in range(0,250,25):
+    url = f'https://movie.douban.com/top250?start={start}'
+    response = requests.get(url,headers=headers)
+    soup = BeautifulSoup(response.text,'html.parser')
+
+    for item in soup.find_all('div',class_='item'):
+        title_tag = item.find('span',class_='title')
+        if title_tag:
+            title = title_tag.text
+        else:
+            title = "未找到标题"
+            print(f"在这个item里没找到标题:{item}")
+        other_tag = item.find('span',class_='other')
+        other_title = other_tag.text if other_tag else""
+        info_tag = item.find('p',class_='')
+        info = info_tag.text.strip() if info_tag else ""
+        playable_tag = item.find('span',class_='playable')
+        if playable_tag:
+            year_tag = playable_tag.find_previous_sibling('span',class_='year')
+            year = year_tag.text.strip('()') if year_tag else "未知年份"
+        else:
+            year_tag = item.find('span',class_='year')
+            year = year_tag.text.strip('()') if year_tag else "未知年份"
+
+        director = info.split('导演:')[1].split('主演:')[0].strip() if'导演:' in info else ''
+        actors = info.split('主演:')[1].strip() if '主演:' in info else ''
+
+        movie = {
+            '中文名': title,
+            '英文名': other_title.replace('/','').strip(),
+            '年份': year,
+            '导演': director,
+            '主演': actors
+        }
+        movies.append(movie)
+
+with open('movies.tst','w',encoding='utf-8') as f:
+    f.write('中文名\t英文名\t年份\t导演\t主演\n')
+    for movie in movies:
+        f.write(f"{movie['中文名']}\t{movie['英文名']}\t{movie['年份']}\t{movie['导演']}\t{movie['主演']}\n")
+
+
+with open('movies.csv','w',encoding='utf-8',newline='') as f:
+    writer = csv.DictWriter(f,fieldnames=['中文名','英文名','年份','导演','主演'])
+    writer.writeheader()
+    writer.writerows(movies)
+
+
+with open('movies.json','w',encoding='utf-8') as f:
+    json.dump(movies,f,ensure_ascii=False,indent=4)
+
+
+print("爬取完成!已生成 movies.tst, movies.csv, movies.json")
\ No newline at end of file