上传文件至 /
This commit is contained in:
44
q1.py
Normal file
44
q1.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import requests
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"Referer": "https://exam.detr.top/"
|
||||
}
|
||||
|
||||
url = "https://exam.detr.top/exam-b/movies"
|
||||
response = requests.get(url, headers=headers)
|
||||
response.encoding = "utf-8"
|
||||
|
||||
with open("movies.html", "w", encoding="utf-8") as f:
|
||||
f.write(response.text)
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
data_id = soup.find("div", {"class": "data-id"}).text.strip() if soup.find("div", {"class": "data-id"}) else "unknown"
|
||||
|
||||
movies = []
|
||||
movie_items = soup.find_all("div", {"class": "movie-item"})
|
||||
for item in movie_items[:10]:
|
||||
movie = {
|
||||
"id": item.find("span", {"class": "movie-id"}).text.strip(),
|
||||
"title": item.find("h3", {"class": "movie-title"}).text.strip(),
|
||||
"director": item.find("p", {"class": "movie-director"}).text.strip().replace("导演:", ""),
|
||||
"year": int(item.find("p", {"class": "movie-year"}).text.strip().replace("上映年份:", "")),
|
||||
"rating": float(item.find("span", {"class": "movie-rating"}).text.strip()),
|
||||
"duration": int(item.find("p", {"class": "movie-duration"}).text.strip().replace("时长:", "").replace("分钟", "")),
|
||||
"genre": item.find("p", {"class": "movie-genre"}).text.strip().replace("类型:", ""),
|
||||
"actors_count": int(item.find("p", {"class": "movie-actors"}).text.strip().replace("演员数量:", ""))
|
||||
}
|
||||
movies.append(movie)
|
||||
|
||||
final_data = {
|
||||
"data_id": data_id,
|
||||
"movies": movies
|
||||
}
|
||||
with open("movies.json", "w", encoding="utf-8") as f:
|
||||
json.dump(final_data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
print("爬取完成,已生成movies.json和movies.html")
|
||||
Reference in New Issue
Block a user