forked from 2509165022/simulated-examination
1
This commit is contained in:
30
q2_1_crawler/1.py
Normal file
30
q2_1_crawler/1.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup as bs
|
||||
import json
|
||||
header = {'User-Agent':'Mozilla/5.0'}
|
||||
url = 'https://exam.detr.top/exam-b/movies'
|
||||
|
||||
html = requests.get(url, headers=header).text
|
||||
# print(html)
|
||||
open('movies.html','w',encoding='utf-8').write(html)
|
||||
# print(html)
|
||||
# print('==============')
|
||||
# fd = bs(html, 'html.parser').find('p', class_='meta')
|
||||
fd = re.search(r'exam_fingerprint:\s*(\S+)',html).group(1)
|
||||
# print(fd)
|
||||
|
||||
resp = bs(html, 'html.parser').find_all('tr', class_='item-row')
|
||||
|
||||
# print(resp)
|
||||
movies = []
|
||||
|
||||
for i in resp:
|
||||
c = []
|
||||
for r in i.find_all('td'):
|
||||
c.append(r.text.strip())
|
||||
movies.append({
|
||||
'id':int(c[0]), 'title':c[1], 'director':c[2], 'year':int(c[3]), 'rating':float(c[4]), 'duration':int(c[5]), 'genre':c[6], 'actors_count':int(c[7])
|
||||
})
|
||||
|
||||
json.dump({'ID':fd, 'movies':movies}, open('movies.json', 'w', encoding='utf-8'), ensure_ascii=False, indent=2)
|
||||
Reference in New Issue
Block a user