网络数据采集(爬虫基础)

This commit is contained in:
2509165020
2026-03-13 17:49:38 +08:00
parent 8b8266a311
commit 06e4a6216d

42
爬虫1/pachong1.py.txt Normal file
View File

@@ -0,0 +1,42 @@
import requests
from bs4 import BeautifulSoup
import time
def crawl_movie_info():
"""
爬取豆瓣电影 Top250 的基础信息
"""
url = "https://movie.douban.com/top250"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status() # 如果请求失败,抛出异常
soup = BeautifulSoup(response.text, "html.parser")
movie_items = soup.find_all("div", class_="item")
print(f"✅ 成功获取到 {len(movie_items)} 部电影信息!")
print("-" * 50)
for index, item in enumerate(movie_items, 1):
title = item.find("span", class_="title").get_text()
rating = item.find("span", class_="rating_num").get_text()
info_line = item.find("div", class_="bd").find("p").get_text().strip()
year = info_line.split("\n")[-1].strip()[:4]
print(f"🎬 第 {index} 部:")
print(f" 片名:{title}")
print(f" 评分:{rating}")
print(f" 年份:{year}")
print("-" * 30)
return movie_items
except Exception as e:
print(f"❌ 爬取失败:{e}")
return None
if __name__ == "__main__":
print("🚀 开始爬取豆瓣电影 Top250 信息...")
crawl_movie_info()