From 06e4a6216dd6304b8b95250734fd9f531fe260ee Mon Sep 17 00:00:00 2001 From: 2509165020 <2509165020@student.edu.cn> Date: Fri, 13 Mar 2026 17:49:38 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BD=91=E7=BB=9C=E6=95=B0=E6=8D=AE=E9=87=87?= =?UTF-8?q?=E9=9B=86=EF=BC=88=E7=88=AC=E8=99=AB=E5=9F=BA=E7=A1=80=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 爬虫1/pachong1.py.txt | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 爬虫1/pachong1.py.txt diff --git a/爬虫1/pachong1.py.txt b/爬虫1/pachong1.py.txt new file mode 100644 index 0000000..4195a13 --- /dev/null +++ b/爬虫1/pachong1.py.txt @@ -0,0 +1,42 @@ +import requests +from bs4 import BeautifulSoup +import time + +def crawl_movie_info(): + """ + 爬取豆瓣电影 Top250 的基础信息 + """ + url = "https://movie.douban.com/top250") + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + } + + try: + response = requests.get(url, headers=headers, timeout=10) + response.raise_for_status() # 如果请求失败,抛出异常 + soup = BeautifulSoup(response.text, "html.parser") + movie_items = soup.find_all("div", class_="item") + + print(f"✅ 成功获取到 {len(movie_items)} 部电影信息!") + print("-" * 50) + for index, item in enumerate(movie_items, 1): + title = item.find("span", class_="title").get_text() + + rating = item.find("span", class_="rating_num").get_text() + info_line = item.find("div", class_="bd").find("p").get_text().strip() + year = info_line.split("\n")[-1].strip()[:4] + print(f"🎬 第 {index} 部:") + print(f" 片名:{title}") + print(f" 评分:{rating}") + print(f" 年份:{year}") + print("-" * 30) + + return movie_items + + except Exception as e: + print(f"❌ 爬取失败:{e}") + return None + +if __name__ == "__main__": + print("🚀 开始爬取豆瓣电影 Top250 信息...") + crawl_movie_info() \ No newline at end of file