Files
final-practice/期末/爬豆瓣
2509165045 3b39c7148a 期末
2026-06-23 12:08:02 +08:00

40 lines
1.2 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
from bs4 import BeautifulSoup as bs
import json
headers = {'User-Agent':'Mozilla/5.0(Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
urls = [
"https://movie.douban.com/top250?start=0", # 第1页1-25
"https://movie.douban.com/top250?start=25", # 第2页26-50
]
data = []
for url in urls:
resp = requests.get(url, headers=headers)
resp.encoding = 'utf-8' # 修复了拼写uft-8 → utf-8
soup = bs(resp.text, "html.parser")
items = soup.find_all("div", class_="item")
for i in range(len(items)):
print(i)
title = items[i].find("span", class_="title").get_text()
actors = items[i].find("div", class_="bd").get_text().strip()
try:
actors = actors.split("主演:")[1].split("\n")[0]
except:
actors = "无"
try:
quote = items[i].find("div", class_="bd").find("p", class_="quote").get_text().strip()
except:
quote = "无"
data.append({
"title": title,
"actor": actors,
"quote": quote
})
print(data)
with open("movie.json","w",encoding="utf-8") as f:
json.dump(data,f,ensure_ascii=False,indent=4)