import requests from bs4 import BeautifulSoup as bs import json headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" "AppleWebKit/537.36 (KHTML, like Gecko)" "Chrome/129.0.0.0 Safari/537.36" } url = "https://movie.douban.com/top250?start=0&filter=" data = [] resp = requests.get(url, headers=headers) resp.encoding = 'utf-8' soup = bs(resp.text, "html.parser") # print(soup) items = soup.find_all("div", class_="item") # print(items[0]) # print(len(items)) for i in range(len(items)): print(i) title = items[i].find("span", class_="title").get_text() # print(title) actors = items[i].find("div", class_="bd").get_text().strip() try: actors = actors.split("主演:")[1].split("\n")[0].strip() except: actors = "无" #print(actors) quote = items[i].find("div", class_="bd").find("p", class_="quote").get_text().strip() # print(quote) data.append({ "title": title, "actors": actors, "quote": quote }) for i in range(len(items)): # 电影标题 title = items[i].find("span", class_="title").get_text().strip() # 演员信息 actors = items[i].find("div", class_="bd").get_text().strip() try: actors = actors.split("主演:")[1].split("\n")[0].strip() except: actors = "无" # 经典台词 try: quote = items[i].find("div", class_="bd").find("p", class_="quote").get_text().strip() except: quote = "无" # print(quote) data.append({ "title": title, "actors": actors, "quote": quote }) print(data) with open("movies.json", "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False)