import requests from bs4 import BeautifulSoup import time # 伪装成浏览器 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } all_movies = [] # 爬10页,每页25部 = 250部 for page in range(10): start = page * 25 url = f"https://movie.douban.com/top250?start={start}&filter=" print(f"正在爬第 {page+1} 页 …") resp = requests.get(url, headers=headers) soup = BeautifulSoup(resp.text, "html.parser") items = soup.find_all("div", class_="item") for item in items: # 排名 & 片名 rank = item.find("em").text title = item.find("span", class_="title").text # 评分 score = item.find("span", class_="rating_num").text # 导演、年份、地区、类型 info = item.find("div", class_="bd").p.text.strip() lines = [line.strip() for line in info.split("\n") if line.strip()] # 第一行:导演 director_line = lines[0] if "导演:" in director_line: director = director_line.split("导演:")[1].split("主演:")[0].strip() else: director = "未知" # 第二行:年份 / 地区 / 类型 if len(lines) >= 2: year_area_genre = lines[1].split("/") year = year_area_genre[0].strip() if len(year_area_genre) > 0 else "未知" area = year_area_genre[1].strip() if len(year_area_genre) > 1 else "未知" genre = year_area_genre[2].strip() if len(year_area_genre) > 2 else "未知" else: year = area = genre = "未知" movie = { "排名": rank, "片名": title, "导演": director, "年份": year, "地区": area, "类型": genre, "评分": score } all_movies.append(movie) time.sleep(1) # 防封 # ------------------- 写入 TXT ------------------- with open("douban_top250.txt", "w", encoding="utf-8") as f: for m in all_movies: f.write(f"排名:{m['排名']}\n") f.write(f"片名:{m['片名']}\n") f.write(f"导演:{m['导演']}\n") f.write(f"年份:{m['年份']}\n") f.write(f"地区:{m['地区']}\n") f.write(f"类型:{m['类型']}\n") f.write(f"评分:{m['评分']}\n") f.write("-" * 50 + "\n") print("全部爬完!已保存到 douban_top250.txt")