76 lines
2.5 KiB
Python
76 lines
2.5 KiB
Python
import requests
|
||
from bs4 import BeautifulSoup
|
||
import time
|
||
|
||
# 伪装成浏览器
|
||
headers = {
|
||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||
}
|
||
|
||
all_movies = []
|
||
|
||
# 爬10页,每页25部 = 250部
|
||
for page in range(10):
|
||
start = page * 25
|
||
url = f"https://movie.douban.com/top250?start={start}&filter="
|
||
|
||
print(f"正在爬第 {page+1} 页 …")
|
||
resp = requests.get(url, headers=headers)
|
||
soup = BeautifulSoup(resp.text, "html.parser")
|
||
|
||
items = soup.find_all("div", class_="item")
|
||
|
||
for item in items:
|
||
# 排名 & 片名
|
||
rank = item.find("em").text
|
||
title = item.find("span", class_="title").text
|
||
|
||
# 评分
|
||
score = item.find("span", class_="rating_num").text
|
||
|
||
# 导演、年份、地区、类型
|
||
info = item.find("div", class_="bd").p.text.strip()
|
||
lines = [line.strip() for line in info.split("\n") if line.strip()]
|
||
|
||
# 第一行:导演
|
||
director_line = lines[0]
|
||
if "导演:" in director_line:
|
||
director = director_line.split("导演:")[1].split("主演:")[0].strip()
|
||
else:
|
||
director = "未知"
|
||
|
||
# 第二行:年份 / 地区 / 类型
|
||
if len(lines) >= 2:
|
||
year_area_genre = lines[1].split("/")
|
||
year = year_area_genre[0].strip() if len(year_area_genre) > 0 else "未知"
|
||
area = year_area_genre[1].strip() if len(year_area_genre) > 1 else "未知"
|
||
genre = year_area_genre[2].strip() if len(year_area_genre) > 2 else "未知"
|
||
else:
|
||
year = area = genre = "未知"
|
||
|
||
movie = {
|
||
"排名": rank,
|
||
"片名": title,
|
||
"导演": director,
|
||
"年份": year,
|
||
"地区": area,
|
||
"类型": genre,
|
||
"评分": score
|
||
}
|
||
all_movies.append(movie)
|
||
|
||
time.sleep(1) # 防封
|
||
|
||
# ------------------- 写入 TXT -------------------
|
||
with open("douban_top250.txt", "w", encoding="utf-8") as f:
|
||
for m in all_movies:
|
||
f.write(f"排名:{m['排名']}\n")
|
||
f.write(f"片名:{m['片名']}\n")
|
||
f.write(f"导演:{m['导演']}\n")
|
||
f.write(f"年份:{m['年份']}\n")
|
||
f.write(f"地区:{m['地区']}\n")
|
||
f.write(f"类型:{m['类型']}\n")
|
||
f.write(f"评分:{m['评分']}\n")
|
||
f.write("-" * 50 + "\n")
|
||
|
||
print("全部爬完!已保存到 douban_top250.txt") |