33 lines
826 B
Python
33 lines
826 B
Python
import requests
|
|
import re
|
|
import json
|
|
|
|
url = "https://movie.douban.com/top250"
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
resp = requests.get(url, headers=headers)
|
|
html = resp.text
|
|
|
|
pattern = re.compile(
|
|
r'<em class="">(\d+)</em>.*?'
|
|
r'<span class="title">(.*?)</span>.*?'
|
|
r'<span class="other">(.*?)</span>.*?'
|
|
r'<span class="rating_num">(.*?)</span>.*?'
|
|
r'<span class="inq">(.*?)</span>?',
|
|
re.S
|
|
)
|
|
movies = pattern.findall(html)[:10]
|
|
|
|
result = []
|
|
for m in movies:
|
|
result.append({
|
|
"rank": int(m[0]),
|
|
"title": m[1],
|
|
"en_title": m[2].replace("/", "").strip(),
|
|
"rating": m[3],
|
|
"quote": m[4] if len(m) > 4 else ""
|
|
})
|
|
|
|
with open("movies.json", "w", encoding="utf-8") as f:
|
|
json.dump(result, f, ensure_ascii=False, indent=4)
|
|
|
|
print("已保存到 movies.json") |