From 8770d587d914e9123f5486bbd6b11cc03ed60f2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E9=92=A6=E7=87=95?= <2509165038@student.example.com> Date: Tue, 9 Jun 2026 11:10:25 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 爬虫.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 爬虫.py diff --git a/爬虫.py b/爬虫.py new file mode 100644 index 0000000..7e00ee0 --- /dev/null +++ b/爬虫.py @@ -0,0 +1,47 @@ + +import requests +from bs4 import BeautifulSoup +import csv +import time + + +genre_dict = { + 0:"剧情", + 1:"喜剧", + 2:"科幻", + 3:"悬疑", + 4:"动作", + 5:"爱情", + 6:"动画", + 7:"犯罪", + 8:"奇幻", + 9:"记录", + +} +url = 'https://movie.douban.com/top250' +headers = {'User-Agent': 'Mozilla/5.0...'} +response = requests.get(url, headers=headers) + + +soup = BeautifulSoup(response.text, 'lxml') +movies = [] + +for item in soup.select('.item'): + title = item.select_one('.title').get_text() + rating = item.select_one('.rating_num').get_text() + quote = item.select_one('.inq').get_text() if item.select_one('.inq') else '' + + movies.append({ + 'title': title.strip(), + 'rating': rating, + 'quote': quote + }) + + +with open('movies.csv', 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=['title', 'rating', 'quote']) + writer.writeheader() + writer.writerows(movies) + +print(f'已保存 {len(movies)} 部电影到 movies.csv') +print("类别编号1对应的类别:",genre_dict[1]) \ No newline at end of file