Files
final-practice/20260609.3.py
2026-06-09 11:23:33 +08:00

35 lines
1.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
movies = []
# 豆瓣Top250每页25条前50条需爬2页
for start in [0, 25]:
url = f"https://movie.douban.com/top250?start={start}"
res = requests.get(url, headers=headers)
soup = BeautifulSoup(res.text, "html.parser")
for item in soup.select(".item"):
rank = item.select_one(".pic em").text
title = item.select_one(".title").text
# 主演信息处理
info = item.select_one(".bd p").text.strip().split("\n")[0]
actors = info.split("主演:")[-1].split(" / ")[0].strip() if "主演:" in info else ""
# 短评quote处理
quote_tag = item.select_one(".quote .inq")
quote = quote_tag.text if quote_tag else ""
movies.append({
"rank": int(rank),
"title": title,
"actors": actors,
"quote": quote
})
# 保存为json文件
with open("movies.json", "w", encoding="utf-8") as f:
json.dump(movies, f, ensure_ascii=False, indent=2)