完成
This commit is contained in:
65
q2_1_crawler/q2_1.py
Normal file
65
q2_1_crawler/q2_1.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup as bs
|
||||
import json
|
||||
|
||||
url = 'https://exam.detr.top/exam-b/movies'
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36 Edg/149.0.0.0',
|
||||
'Referer':'https://exam.detr.top/exam-b/movies'}
|
||||
req = requests.get(url, headers=headers)
|
||||
req.encoding="utf-8"
|
||||
|
||||
data=[]
|
||||
|
||||
soup=bs(req.text,"html.parser")
|
||||
# print(soup)
|
||||
#id, title, director, year, rating, duration, genre, actors_count
|
||||
|
||||
item=soup.select("table tbody tr" )
|
||||
movie_list=[]
|
||||
|
||||
for tr in item:
|
||||
tds=tr.find_all("td")
|
||||
tds=list(tds)
|
||||
# print(tds)
|
||||
if len(tds)<8:
|
||||
continue
|
||||
movie={
|
||||
"id":tds[0].get_text(strip=True),
|
||||
"title":tds[1].get_text(strip=True),
|
||||
"director":tds[2].get_text(strip=True),
|
||||
"year":tds[3].get_text(strip=True),
|
||||
"rating":tds[4].get_text(strip=True),
|
||||
"duration":tds[5].get_text(strip=True),
|
||||
"genre":tds[6].get_text(strip=True),
|
||||
"actors_count":tds[7].get_text(strip=True)
|
||||
}
|
||||
movie_list.append(movie)
|
||||
print(movie_list)
|
||||
|
||||
|
||||
with open('movie.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(movie_list, f, ensure_ascii=False, indent=2)
|
||||
|
||||
with open("move.html","w",encoding='utf-8') as f:
|
||||
json.dump(movie_list, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# for i in range(len(items)):
|
||||
# rank=i+1
|
||||
# title=items[i].find("span",class_="title").get_text()
|
||||
# actors=items[i].find("div",class_="bd").get_text().strip()
|
||||
# try:
|
||||
# actors=actors.split("主演:")[1].split("\n")[0]
|
||||
# except:
|
||||
# actors="无"
|
||||
# quote=items[i].find("p",class_="quote").get_text().strip()
|
||||
|
||||
# data.append({
|
||||
# "rank":rank,
|
||||
# "title":title,
|
||||
# "actors":actors,
|
||||
# "quote":quote
|
||||
# })
|
||||
Reference in New Issue
Block a user