import requests from bs4 import BeautifulSoup import csv import time def scrape_douban_top250(): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' } with open('douban_movies.csv', 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['排名', '电影名称', '评分', '评价人数', '经典台词'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for start in range(0, 250, 25): url = f'https://movie.douban.com/top250?start={start}' try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') movies = soup.find_all('div', class_='item') for movie in movies: rank = movie.find('em').text title = movie.find('span', class_='title').text rating = movie.find('span', class_='rating_num').text people_span = movie.find('div', class_='star').find_all('span')[-1] people = people_span.text.replace('人评价', '') quote_tag = movie.find('span', class_='inq') quote = quote_tag.text if quote_tag else "暂无" writer.writerow({ '排名': rank, '电影名称': title, '评分': rating, '评价人数': people, '经典台词': quote }) print(f"第 {start//25 + 1} 页爬取完成,当前 URL: {url}") time.sleep(1) except requests.exceptions.RequestException as e: print(f"请求异常:{e}") continue print("✅ 所有数据已爬取完成,已保存为 douban_movies.csv") if __name__ == '__main__': scrape_douban_top250()