import re import requests from bs4 import BeautifulSoup as bs import json header = {'User-Agent':'Mozilla/5.0'} url = 'https://exam.detr.top/exam-b/movies' html = requests.get(url, headers=header).text # print(html) open('movies.html','w',encoding='utf-8').write(html) # print(html) # print('==============') # fd = bs(html, 'html.parser').find('p', class_='meta') fd = re.search(r'exam_fingerprint:\s*(\S+)',html).group(1) # print(fd) resp = bs(html, 'html.parser').find_all('tr', class_='item-row') # print(resp) movies = [] for i in resp: c = [] for r in i.find_all('td'): c.append(r.text.strip()) movies.append({ 'id':int(c[0]), 'title':c[1], 'director':c[2], 'year':int(c[3]), 'rating':float(c[4]), 'duration':int(c[5]), 'genre':c[6], 'actors_count':int(c[7]) }) json.dump({'ID':fd, 'movies':movies}, open('movies.json', 'w', encoding='utf-8'), ensure_ascii=False, indent=2)