42 lines
1.5 KiB
Plaintext
42 lines
1.5 KiB
Plaintext
import requests
|
||
from bs4 import BeautifulSoup
|
||
import time
|
||
|
||
def crawl_movie_info():
|
||
"""
|
||
爬取豆瓣电影 Top250 的基础信息
|
||
"""
|
||
url = "https://movie.douban.com/top250")
|
||
headers = {
|
||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||
}
|
||
|
||
try:
|
||
response = requests.get(url, headers=headers, timeout=10)
|
||
response.raise_for_status() # 如果请求失败,抛出异常
|
||
soup = BeautifulSoup(response.text, "html.parser")
|
||
movie_items = soup.find_all("div", class_="item")
|
||
|
||
print(f"✅ 成功获取到 {len(movie_items)} 部电影信息!")
|
||
print("-" * 50)
|
||
for index, item in enumerate(movie_items, 1):
|
||
title = item.find("span", class_="title").get_text()
|
||
|
||
rating = item.find("span", class_="rating_num").get_text()
|
||
info_line = item.find("div", class_="bd").find("p").get_text().strip()
|
||
year = info_line.split("\n")[-1].strip()[:4]
|
||
print(f"🎬 第 {index} 部:")
|
||
print(f" 片名:{title}")
|
||
print(f" 评分:{rating}")
|
||
print(f" 年份:{year}")
|
||
print("-" * 30)
|
||
|
||
return movie_items
|
||
|
||
except Exception as e:
|
||
print(f"❌ 爬取失败:{e}")
|
||
return None
|
||
|
||
if __name__ == "__main__":
|
||
print("🚀 开始爬取豆瓣电影 Top250 信息...")
|
||
crawl_movie_info() |