import requests
import re
import csv
url = "https://movie.douban.com/top250"
headers = {"User-Agent": "Mozilla/5.0"}
resp = requests.get(url, headers=headers)
html = resp.text
pattern = re.compile(
r'(\d+).*?'
r'(.*?).*?'
r'(.*?).*?'
r'(.*?)',
re.S
)
movies = pattern.findall(html)[:10]
with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f:
w = csv.writer(f)
w.writerow(["排名", "中文名", "英文名", "评分"])
for m in movies:
rank, title, en, rating = m
en = en.replace("/", "").strip()
w.writerow([rank, title, en, rating])
print("已保存到 movies.csv")