diff --git a/爬虫.py b/爬虫.py new file mode 100644 index 0000000..7e00ee0 --- /dev/null +++ b/爬虫.py @@ -0,0 +1,47 @@ + +import requests +from bs4 import BeautifulSoup +import csv +import time + + +genre_dict = { + 0:"剧情", + 1:"喜剧", + 2:"科幻", + 3:"悬疑", + 4:"动作", + 5:"爱情", + 6:"动画", + 7:"犯罪", + 8:"奇幻", + 9:"记录", + +} +url = 'https://movie.douban.com/top250' +headers = {'User-Agent': 'Mozilla/5.0...'} +response = requests.get(url, headers=headers) + + +soup = BeautifulSoup(response.text, 'lxml') +movies = [] + +for item in soup.select('.item'): + title = item.select_one('.title').get_text() + rating = item.select_one('.rating_num').get_text() + quote = item.select_one('.inq').get_text() if item.select_one('.inq') else '' + + movies.append({ + 'title': title.strip(), + 'rating': rating, + 'quote': quote + }) + + +with open('movies.csv', 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=['title', 'rating', 'quote']) + writer.writeheader() + writer.writerows(movies) + +print(f'已保存 {len(movies)} 部电影到 movies.csv') +print("类别编号1对应的类别:",genre_dict[1]) \ No newline at end of file