diff --git a/爬虫1/pachong.py.txt b/爬虫1/pachong.py.txt new file mode 100644 index 0000000..4195a13 --- /dev/null +++ b/爬虫1/pachong.py.txt @@ -0,0 +1,42 @@ +import requests +from bs4 import BeautifulSoup +import time + +def crawl_movie_info(): + """ + 爬取豆瓣电影 Top250 的基础信息 + """ + url = "https://movie.douban.com/top250") + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + } + + try: + response = requests.get(url, headers=headers, timeout=10) + response.raise_for_status() # 如果请求失败,抛出异常 + soup = BeautifulSoup(response.text, "html.parser") + movie_items = soup.find_all("div", class_="item") + + print(f"✅ 成功获取到 {len(movie_items)} 部电影信息!") + print("-" * 50) + for index, item in enumerate(movie_items, 1): + title = item.find("span", class_="title").get_text() + + rating = item.find("span", class_="rating_num").get_text() + info_line = item.find("div", class_="bd").find("p").get_text().strip() + year = info_line.split("\n")[-1].strip()[:4] + print(f"🎬 第 {index} 部:") + print(f" 片名:{title}") + print(f" 评分:{rating}") + print(f" 年份:{year}") + print("-" * 30) + + return movie_items + + except Exception as e: + print(f"❌ 爬取失败:{e}") + return None + +if __name__ == "__main__": + print("🚀 开始爬取豆瓣电影 Top250 信息...") + crawl_movie_info() \ No newline at end of file