From a44027c78b45b2dbb3f157659aaa751154f712f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=83=A1=E7=BA=A2=E8=BE=89?=
 <2509165036@student.example.com>
Date: Tue, 23 Jun 2026 11:00:03 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20https:/gitea.detr.top/2509?=
 =?UTF-8?q?165036/simulated-examination/src/branch/main/q2=5F1=5Fcrawler/q?=
 =?UTF-8?q?2=5F1.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/branch/main/q2_1_crawler/q2_1.py      | 50 -------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 https:/gitea.detr.top/2509165036/simulated-examination/src/branch/main/q2_1_crawler/q2_1.py

diff --git a/https:/gitea.detr.top/2509165036/simulated-examination/src/branch/main/q2_1_crawler/q2_1.py b/https:/gitea.detr.top/2509165036/simulated-examination/src/branch/main/q2_1_crawler/q2_1.py
deleted file mode 100644
index 4e32424..0000000
--- a/https:/gitea.detr.top/2509165036/simulated-examination/src/branch/main/q2_1_crawler/q2_1.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import json
-
-# 1. 配置请求头（题目强制要求必须带检测头）
-url = "https://exam.detr.top/exam-b/movies"
-headers = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36"
-}
-
-# 仅一次请求，一次性拿到页面全部数据（符合“一次爬取获取所有数据”得分要求）
-resp = requests.get(url, headers=headers, timeout=10)
-resp.encoding = "utf-8"
-html_text = resp.text
-
-# 保存原始网页源码 movies.html
-with open("movies.html", "w", encoding="utf-8") as f:
-    f.write(html_text)
-
-# 解析页面
-soup = BeautifulSoup(html_text, "html.parser")
-# 获取数据编号 actors_count
-count_tag = soup.find("span", id="actors_count")
-actors_count = count_tag.get_text(strip=True) if count_tag else "0"
-
-# 提取全部10条电影
-movie_items = soup.select(".movie-item")
-movie_list = []
-for item in movie_items:
-    movie = {
-        "id": item.get("data-id", ""),
-        "title": item.select_one(".title").get_text(strip=True) if item.select_one(".title") else "",
-        "director": item.select_one(".director").get_text(strip=True) if item.select_one(".director") else "",
-        "year": item.select_one(".year").get_text(strip=True) if item.select_one(".year") else "",
-        "rating": float(item.select_one(".rating").get_text(strip=True)) if item.select_one(".rating") else 0.0,
-        "duration": item.select_one(".duration").get_text(strip=True) if item.select_one(".duration") else "",
-        "genre": item.select_one(".genre").get_text(strip=True) if item.select_one(".genre") else ""
-    }
-    movie_list.append(movie)
-
-# 组装json数据
-result_data = {
-    "actors_count": actors_count,
-    "movies": movie_list
-}
-# 写入movies.json
-with open("movies.json", "w", encoding="utf-8") as f:
-    json.dump(result_data, f, ensure_ascii=False, indent=2)
-
-print(f"爬取完成，共抓取{len(movie_list)}部电影，已生成 movies.html、movies.json")
\ No newline at end of file