import requests from bs4 import BeautifulSoup import json import os def task_1_scrape(): url = "https://exam.detr.top/exam-b/movies" headers = { "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)" "AppleWebKit/537.36 (KHTML, like Gecko)" "Chrome/129.0.0.0 Safari/537.36" } try: response = requests.get(url,headers=headers) response.encoding = 'utf-8' if response.status_code == 200: html_content = response.text with open("movies.html","w",encoding="utf-8") as f: f.write(html_content) print("[成功]已保存movies.html") soup = BeautifulSoup(html_content,'html.parser') table = soup.find('table') rows = table.find_all('tr') movies_data = [] print(f"DEBUG:我抓到了{len(movies_data)}个电影数据") for row in rows[1:]: cols = row.find_all('td') if len(cols) > 0: movie = { "id": int(cols[0].get_text(strip = True)), "title": cols[1].get_text(strip = True), "director": cols[2].get_text(strip = True), "year": int(cols[3].get_text(strip = True)), "rating": float(cols[4].get_text(strip = True)), "duration": int(cols[5].get_text(strip = True)), "genre": cols[6].get_text(strip = True), "actors_count": int(cols[7].get_text(strip = True)) } movies_data.append(movie) with open("movies.json","w",encoding="utf-8") as f: json.dump(movies_data, f, ensure_ascii=False, indent = 4) print(f"[成功]已抓取{len(movies_data)}部电影并保存至movies.json") else: print(f"[错误]请求失败,状态码:{response.status_code}") except Exception as e: print(f"[异常]发生错误:{e}")