import requests import json from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } url = "https://exam.detr.top/exam-b/movies" try: response = requests.get(url, headers=headers) response.encoding = 'utf-8' html_content = response.text with open('movies.html', 'w', encoding='utf-8') as f_html: f_html.write(html_content) soup = BeautifulSoup(html_content, 'html.parser') table = soup.find('table') tbody = table.find('tbody') rows = tbody.find_all('tr') movies_data = [] for row in rows: tds = row.find_all('td') if len(tds) >= 8: movie = { "id": tds[0].text.strip(), "title": tds[1].text.strip(), "director": tds[2].text.strip(), "year": int(tds[3].text.strip()), "rating": float(tds[4].text.strip()), "duration": int(tds[5].text.strip()), "genre": tds[6].text.strip(), "actors_count": int(tds[7].text.strip()) } movies_data.append(movie) with open('movies.json', 'w', encoding='utf-8') as f_json: json.dump(movies_data, f_json, ensure_ascii=False, indent=4) print(f"爬取成功!共获取 {len(movies_data)} 条电影数据。") print("文件 movies.html 和 movies.json 已保存。") except Exception as e: print(f"爬取或解析失败,错误信息:{e}")