27 lines
745 B
Python
27 lines
745 B
Python
import requests
|
|
import re
|
|
import csv
|
|
|
|
url = "https://movie.douban.com/top250"
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
resp = requests.get(url, headers=headers)
|
|
html = resp.text
|
|
|
|
pattern = re.compile(
|
|
r'<em class="">(\d+)</em>.*?'
|
|
r'<span class="title">(.*?)</span>.*?'
|
|
r'<span class="other">(.*?)</span>.*?'
|
|
r'<span class="rating_num">(.*?)</span>',
|
|
re.S
|
|
)
|
|
movies = pattern.findall(html)[:10]
|
|
|
|
with open("movies.csv", "w", encoding="utf-8-sig", newline="") as f:
|
|
w = csv.writer(f)
|
|
w.writerow(["排名", "中文名", "英文名", "评分"])
|
|
for m in movies:
|
|
rank, title, en, rating = m
|
|
en = en.replace("/", "").strip()
|
|
w.writerow([rank, title, en, rating])
|
|
|
|
print("已保存到 movies.csv") |