import requests from bs4 import BeautifulSoup import json headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } movie_data = [] for start_offset in [0, 25]: url = f"https://movie.douban.com/top250?start={start_offset}" resp = requests.get(url, headers=headers) soup = BeautifulSoup(resp.text, "html.parser") item_list = soup.find_all("div", class_="item") for index, item in enumerate(item_list): rank = start_offset + index + 1 title = item.find("span", class_="title").get_text(strip=True) info_str = item.find("div", class_="bd").p.get_text(strip=True) if "主演:" in info_str: actors = info_str.split("主演:")[-1].split("\n")[0].strip() else: actors = "无" quote_tag = item.find("span", class_="inq") quote = quote_tag.get_text(strip=True) if quote_tag else "" movie_data.append({ "rank": rank, "title": title, "actors": actors, "quote": quote }) with open("movies.json", "w", encoding="utf-8") as f: json.dump(movie_data, f, ensure_ascii=False, indent=2) print(f"爬取完成,共{len(movie_data)}部电影,已生成 movies.json")