import requests from bs4 import BeautifulSoup import time def crawl_movie_info(): """ 爬取豆瓣电影 Top250 的基础信息 """ url = "https://movie.douban.com/top250") headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() # 如果请求失败,抛出异常 soup = BeautifulSoup(response.text, "html.parser") movie_items = soup.find_all("div", class_="item") print(f"✅ 成功获取到 {len(movie_items)} 部电影信息!") print("-" * 50) for index, item in enumerate(movie_items, 1): title = item.find("span", class_="title").get_text() rating = item.find("span", class_="rating_num").get_text() info_line = item.find("div", class_="bd").find("p").get_text().strip() year = info_line.split("\n")[-1].strip()[:4] print(f"🎬 第 {index} 部:") print(f" 片名:{title}") print(f" 评分:{rating}") print(f" 年份:{year}") print("-" * 30) return movie_items except Exception as e: print(f"❌ 爬取失败:{e}") return None if __name__ == "__main__": print("🚀 开始爬取豆瓣电影 Top250 信息...") crawl_movie_info()