import requests import re url = "https://movie.douban.com/top250" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36" } response = requests.get(url, headers=headers) html = response.text pattern = re.compile( r'(\d+).*?' r'(.*?).*?' r'(.*?).*?' r'(.*?).*?' r'(\d+人评价).*?' r'(.*?)?', re.S ) movies = pattern.findall(html)[:10] movie_list = [] for m in movies: rank = m[0] title = m[1] en_title = m[2].replace("/", "").strip() rating = m[3] people = m[4] quote = m[5] if len(m) > 5 else "" movie_list.append({ "rank": int(rank), "title": title, "en_title": en_title, "rating": rating, "people": people, "quote": quote }) with open("movies.txt", "w", encoding="utf-8") as f: for m in movie_list: f.write(m["title"] + " | " + m["quote"] + "\n") print("✅ 练习1完成:已保存中文名+评语到 movies.txt") with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f: writer = csv.writer(f) writer.writerow(["排名", "中文名", "英文名", "评分", "评价人数", "经典评语"]) for m in movie_list: writer.writerow([m["rank"], m["title"], m["en_title"], m["rating"], m["people"], m["quote"]]) print("✅ 练习2完成:已保存到 movies.csv") with open("movies.json", "w", encoding="utf-8") as f: json.dump(movie_list, f, ensure_ascii=False, indent=4) print("✅ 练习3完成:已保存到 movies.json") print("\n--- 练习5:JSON统计 ---") with open("movies.json", "r", encoding="utf-8") as f: data = json.load(f) max_movie = max(data, key=lambda x: float(x["rating"])) print(f"评分最高:{max_movie['title']}({max_movie['rating']}分 | {max_movie['people']})")