import requests from bs4 import BeautifulSoup as bs import json url = 'https://exam.detr.top/exam-b/movies' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36 Edg/149.0.0.0', 'Referer':'https://exam.detr.top/exam-b/movies'} req = requests.get(url, headers=headers) req.encoding="utf-8" data=[] soup=bs(req.text,"html.parser") # print(soup) #id, title, director, year, rating, duration, genre, actors_count item=soup.select("table tbody tr" ) movie_list=[] for tr in item: tds=tr.find_all("td") tds=list(tds) # print(tds) if len(tds)<8: continue movie={ "id":tds[0].get_text(strip=True), "title":tds[1].get_text(strip=True), "director":tds[2].get_text(strip=True), "year":tds[3].get_text(strip=True), "rating":tds[4].get_text(strip=True), "duration":tds[5].get_text(strip=True), "genre":tds[6].get_text(strip=True), "actors_count":tds[7].get_text(strip=True) } movie_list.append(movie) print(movie_list) with open('movie.json', 'w', encoding='utf-8') as f: json.dump(movie_list, f, ensure_ascii=False, indent=2) with open("move.html","w",encoding='utf-8') as f: json.dump(movie_list, f, ensure_ascii=False, indent=2) # for i in range(len(items)): # rank=i+1 # title=items[i].find("span",class_="title").get_text() # actors=items[i].find("div",class_="bd").get_text().strip() # try: # actors=actors.split("主演:")[1].split("\n")[0] # except: # actors="无" # quote=items[i].find("p",class_="quote").get_text().strip() # data.append({ # "rank":rank, # "title":title, # "actors":actors, # "quote":quote # })