import requests from bs4 import BeautifulSoup import json import csv import os headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.8.4472.124 Safari/537.36' } movies = [] for start in range(0, 250, 25): url = f'https://movie.douban.com/top250?start={start}' response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') for item in soup.find_all('div', class_='item'): rank_tag = item.find('em') rank = rank_tag.text if rank_tag else '未知排名' title_tag = item.find('span', class_='title') if title_tag: title = title_tag.text else: title = "未找到标题" print(f"在这个item里没找到标题:{item}") other_tag = item.find('span', class_='other') other_title = other_tag.text.strip() if other_tag else "" if other_title.startswith('/'): other_title = other_title[1:].strip() rating_tag = item.find('span', class_='rating_num') rating = rating_tag.text if rating_tag else "未知评分" inq_tag = item.find('span', class_='inq') inq = inq_tag.text.strip() if inq_tag else "" playable_tag = item.find('span', class_='playable') if playable_tag: year_tag = playable_tag.find_previous_sibling('span', class_='year') year = year_tag.text.strip('()') if year_tag else "未知年份" else: year_tag = item.find('span', class_='year') year = year_tag.text.strip('()') if year_tag else "未知年份" img_tag = item.find('img') poster_url = img_tag['src'] if img_tag else "" movies.append({ "rank": int(rank), "title": title, "en_title": other_title, "rating": rating, "quote": inq, "year": year, "poster_url": poster_url }) if len(movies) >= 10: break if len(movies) >= 10: break with open("movies.txt", "w", encoding="utf-8") as f: for movie in movies: f.write(movie["title"] + "\n") print(" 已保存:movies.txt") with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f: writer = csv.writer(f) writer.writerow(["排名", "中文名", "英文名", "评分", "简介", "年份"]) for movie in movies: writer.writerow([ movie["rank"], movie["title"], movie["en_title"], movie["rating"], movie["quote"], movie["year"] ]) print(" 已保存:movies.csv") with open("movies.json", "w", encoding="utf-8") as f: json.dump(movies, f, ensure_ascii=False, indent=4) print(" 已保存:movies.json") high_rating_movies = [] with open("movies.csv", "r", encoding="utf-8-sig") as f: reader = csv.DictReader(f) for row in reader: if float(row["评分"]) > 9.5: high_rating_movies.append(row) print("\n=== 评分高于9.5的电影 ===") for m in high_rating_movies: print(f"{m['排名']} {m['中文名']} {m['评分']}") with open("high_rating.csv", "w", encoding="utf-8-sig", newline="") as f: writer = csv.DictWriter(f, fieldnames=high_rating_movies[0].keys()) writer.writeheader() writer.writerows(high_rating_movies) print(" 已保存:high_rating.csv") with open("movies.json", "r", encoding="utf-8") as f: movie_data = json.load(f) ratings = [float(m["rating"]) for m in movie_data] avg_rating = sum(ratings) / len(ratings) max_rating = max(ratings) top_movies = [m for m in movie_data if float(m["rating"]) == max_rating] print(f"\n=== 统计信息 ===") print(f"前10部电影平均分:{avg_rating:.2f}") print(f"最高评分:{max_rating}") for m in top_movies: print(f"评分最高的电影:{m['title']}") poster_folder = "movie_posters" os.makedirs(poster_folder, exist_ok=True) for movie in movies: try: if movie["poster_url"]: img_resp = requests.get(movie["poster_url"], headers=headers) filename = f"{poster_folder}/{movie['rank']}_{movie['title']}.jpg" with open(filename, "wb") as f: f.write(img_resp.content) print(f" 已下载海报:{movie['title']}") except Exception as e: print(f" 海报下载失败:{movie['title']},错误:{e}") print("\n 所有任务全部完成!")