完成作业douban250
This commit is contained in:
0
250json.py
Normal file
0
250json.py
Normal file
67
douban.txt/douban.txt
Normal file
67
douban.txt/douban.txt
Normal file
@@ -0,0 +1,67 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import time
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
}
|
||||
|
||||
all_movies = []
|
||||
|
||||
for page in range(10):
|
||||
start = page * 25
|
||||
url = f"https://movie.douban.com/top250?start={start}&filter="
|
||||
|
||||
print(f"<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> {page+1} ҳ <20><>")
|
||||
resp = requests.get(url, headers=headers)
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
items = soup.find_all("div", class_="item")
|
||||
|
||||
for item in items:
|
||||
rank = item.find("em").text
|
||||
title = item.find("span", class_="title").t
|
||||
score = item.find("span", class_="rating_num").text
|
||||
|
||||
info = item.find("div", class_="bd").p.text.strip()
|
||||
lines = [line.strip() for line in info.split("\n") if line.strip()]
|
||||
|
||||
director_line = lines[0]
|
||||
if "<22><><EFBFBD><EFBFBD>:" in director_line:
|
||||
director = director_line.split("<22><><EFBFBD><EFBFBD>:")[1].split("<22><><EFBFBD><EFBFBD>:")[0].strip()
|
||||
else:
|
||||
director = "δ֪"
|
||||
|
||||
if len(lines) >= 2:
|
||||
year_area_genre = lines[1].split("/")
|
||||
year = year_area_genre[0].strip() if len(year_area_genre) > 0 else "δ֪"
|
||||
area = year_area_genre[1].strip() if len(year_area_genre) > 1 else "δ֪"
|
||||
genre = year_area_genre[2].strip() if len(year_area_genre) > 2 else "δ֪"
|
||||
else:
|
||||
year = area = genre = "δ֪"
|
||||
|
||||
movie = {
|
||||
"<22><><EFBFBD><EFBFBD>": rank,
|
||||
"Ƭ<><C6AC>": title,
|
||||
"<22><><EFBFBD><EFBFBD>": director,
|
||||
"<22><><EFBFBD><EFBFBD>": year,
|
||||
"<22><><EFBFBD><EFBFBD>": area,
|
||||
"<22><><EFBFBD><EFBFBD>": genre,
|
||||
"<22><><EFBFBD><EFBFBD>": score
|
||||
}
|
||||
all_movies.append(movie)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
with open("douban_top250.txt", "w", encoding="utf-8") as f:
|
||||
for m in all_movies:
|
||||
f.write(f"<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>{m['<27><><EFBFBD><EFBFBD>']}\n")
|
||||
f.write(f"Ƭ<><C6AC><EFBFBD><EFBFBD>{m['Ƭ<><C6AC>']}\n")
|
||||
f.write(f"<22><><EFBFBD>ݣ<EFBFBD>{m['<27><><EFBFBD><EFBFBD>']}\n")
|
||||
f.write(f"<22><><EFBFBD>ݣ<EFBFBD>{m['<27><><EFBFBD><EFBFBD>']}\n")
|
||||
f.write(f"<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>{m['<27><><EFBFBD><EFBFBD>']}\n")
|
||||
f.write(f"<22><><EFBFBD>ͣ<EFBFBD>{m['<27><><EFBFBD><EFBFBD>']}\n")
|
||||
f.write(f"<22><><EFBFBD>֣<EFBFBD>{m['<27><><EFBFBD><EFBFBD>']}\n")
|
||||
f.write("-" * 50 + "\n")
|
||||
|
||||
print("ȫ<><C8AB><EFBFBD><EFBFBD><EFBFBD>꣡<EFBFBD>ѱ<EFBFBD><D1B1>浽 douban_top250.txt")
|
||||
0
douban3.31 .py
Normal file
0
douban3.31 .py
Normal file
Reference in New Issue
Block a user