From 18cd09b6d235e632981dffa5458cc935649da3cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=97=E8=B4=B5=E7=8F=8D?= <2509165009@student.example.com> Date: Tue, 23 Jun 2026 11:06:07 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=2026062309.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 26062309.py | 50 -------------------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 26062309.py diff --git a/26062309.py b/26062309.py deleted file mode 100644 index 3ff21ab..0000000 --- a/26062309.py +++ /dev/null @@ -1,50 +0,0 @@ -import requests -import json -from bs4 import BeautifulSoup - -# 请求检测头(题目硬性要求) -headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" -} -url = "https://exam.detr.top/exam-b/movies" - -response = requests.get(url, headers=headers) -response.raise_for_status() -response.encoding = "utf-8" - -# 保存网页源码 -with open("movies.html", "w", encoding="utf-8") as f: - f.write(response.text) - -# 解析表格数据 -soup = BeautifulSoup(response.text, "html.parser") -table = soup.find("table") -tr_rows = table.find_all("tr")[1:] - -movie_list = [] -for row in tr_rows: - cell = row.find_all("td") - info = { - "id": int(cell[0].text.strip()), - "title": cell[1].text.strip(), - "director": cell[2].text.strip(), - "year": int(cell[3].text.strip()), - "rating": float(cell[4].text.strip()), - "duration": int(cell[5].text.strip()), - "genre": cell[6].text.strip(), - "actors_count": int(cell[7].text.strip()) - } - movie_list.append(info) - -# 提取页面数据编号 -data_code = soup.find("code").get_text(strip=True) -result = { - "data_id": data_code, - "movies": movie_list -} - -# 写入JSON文件 -with open("movies.json", "w", encoding="utf-8") as f: - json.dump(result, f, ensure_ascii=False, indent=4) - -print("✅ 爬取完成,两个文件已正常生成") \ No newline at end of file