From 6a5448e1e0eaf5b2a71df417b8b3dd46b41a16b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=91=E9=9F=B6=E5=9D=A4?= <2509165013@student.example.com> Date: Thu, 2 Apr 2026 16:02:17 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 2604022509165013.py | 135 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 2604022509165013.py diff --git a/2604022509165013.py b/2604022509165013.py new file mode 100644 index 0000000..42736c6 --- /dev/null +++ b/2604022509165013.py @@ -0,0 +1,135 @@ +import requests +from bs4 import BeautifulSoup +import json +import csv +import os + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.8.4472.124 Safari/537.36' +} + +movies = [] + +for start in range(0, 250, 25): + url = f'https://movie.douban.com/top250?start={start}' + response = requests.get(url, headers=headers) + soup = BeautifulSoup(response.text, 'html.parser') + + for item in soup.find_all('div', class_='item'): + rank_tag = item.find('em') + rank = rank_tag.text if rank_tag else '未知排名' + + title_tag = item.find('span', class_='title') + if title_tag: + title = title_tag.text + else: + title = "未找到标题" + print(f"在这个item里没找到标题:{item}") + + other_tag = item.find('span', class_='other') + other_title = other_tag.text.strip() if other_tag else "" + + if other_title.startswith('/'): + other_title = other_title[1:].strip() + + rating_tag = item.find('span', class_='rating_num') + rating = rating_tag.text if rating_tag else "未知评分" + + inq_tag = item.find('span', class_='inq') + inq = inq_tag.text.strip() if inq_tag else "" + + playable_tag = item.find('span', class_='playable') + if playable_tag: + year_tag = playable_tag.find_previous_sibling('span', class_='year') + year = year_tag.text.strip('()') if year_tag else "未知年份" + else: + year_tag = item.find('span', class_='year') + year = year_tag.text.strip('()') if year_tag else "未知年份" + + img_tag = item.find('img') + poster_url = img_tag['src'] if img_tag else "" + + movies.append({ + "rank": int(rank), + "title": title, + "en_title": other_title, + "rating": rating, + "quote": inq, + "year": year, + "poster_url": poster_url + }) + if len(movies) >= 10: + break + if len(movies) >= 10: + break + +with open("movies.txt", "w", encoding="utf-8") as f: + for movie in movies: + f.write(movie["title"] + "\n") +print(" 已保存:movies.txt") + +with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f: + writer = csv.writer(f) + writer.writerow(["排名", "中文名", "英文名", "评分", "简介", "年份"]) + for movie in movies: + writer.writerow([ + movie["rank"], + movie["title"], + movie["en_title"], + movie["rating"], + movie["quote"], + movie["year"] + ]) +print(" 已保存:movies.csv") + +with open("movies.json", "w", encoding="utf-8") as f: + json.dump(movies, f, ensure_ascii=False, indent=4) +print(" 已保存:movies.json") + +high_rating_movies = [] +with open("movies.csv", "r", encoding="utf-8-sig") as f: + reader = csv.DictReader(f) + for row in reader: + if float(row["评分"]) > 9.5: + high_rating_movies.append(row) + +print("\n=== 评分高于9.5的电影 ===") +for m in high_rating_movies: + print(f"{m['排名']} {m['中文名']} {m['评分']}") + +with open("high_rating.csv", "w", encoding="utf-8-sig", newline="") as f: + writer = csv.DictWriter(f, fieldnames=high_rating_movies[0].keys()) + writer.writeheader() + writer.writerows(high_rating_movies) +print(" 已保存:high_rating.csv") + +with open("movies.json", "r", encoding="utf-8") as f: + movie_data = json.load(f) + +ratings = [float(m["rating"]) for m in movie_data] +avg_rating = sum(ratings) / len(ratings) +max_rating = max(ratings) +top_movies = [m for m in movie_data if float(m["rating"]) == max_rating] + +print(f"\n=== 统计信息 ===") +print(f"前10部电影平均分:{avg_rating:.2f}") +print(f"最高评分:{max_rating}") +for m in top_movies: + print(f"评分最高的电影:{m['title']}") + +poster_folder = "movie_posters" +os.makedirs(poster_folder, exist_ok=True) + +for movie in movies: + try: + if movie["poster_url"]: + img_resp = requests.get(movie["poster_url"], headers=headers) + + filename = f"{poster_folder}/{movie['rank']}_{movie['title']}.jpg" + with open(filename, "wb") as f: + f.write(img_resp.content) + print(f" 已下载海报:{movie['title']}") + except Exception as e: + print(f" 海报下载失败:{movie['title']},错误:{e}") + +print("\n 所有任务全部完成!") \ No newline at end of file