From e279efba3ee48b3c07f32f95b7911c11274769d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=83=A1=E7=BA=A2=E8=BE=89?=
 <2509165036@student.example.com>
Date: Tue, 23 Jun 2026 10:58:03 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20q2=5F1=5Fcrawler?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 q2_1_crawler/q2_1.py | 50 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 q2_1_crawler/q2_1.py

diff --git a/q2_1_crawler/q2_1.py b/q2_1_crawler/q2_1.py
new file mode 100644
index 0000000..4e32424
--- /dev/null
+++ b/q2_1_crawler/q2_1.py
@@ -0,0 +1,50 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+
+# 1. 配置请求头（题目强制要求必须带检测头）
+url = "https://exam.detr.top/exam-b/movies"
+headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/125.0.0.0 Safari/537.36"
+}
+
+# 仅一次请求，一次性拿到页面全部数据（符合“一次爬取获取所有数据”得分要求）
+resp = requests.get(url, headers=headers, timeout=10)
+resp.encoding = "utf-8"
+html_text = resp.text
+
+# 保存原始网页源码 movies.html
+with open("movies.html", "w", encoding="utf-8") as f:
+    f.write(html_text)
+
+# 解析页面
+soup = BeautifulSoup(html_text, "html.parser")
+# 获取数据编号 actors_count
+count_tag = soup.find("span", id="actors_count")
+actors_count = count_tag.get_text(strip=True) if count_tag else "0"
+
+# 提取全部10条电影
+movie_items = soup.select(".movie-item")
+movie_list = []
+for item in movie_items:
+    movie = {
+        "id": item.get("data-id", ""),
+        "title": item.select_one(".title").get_text(strip=True) if item.select_one(".title") else "",
+        "director": item.select_one(".director").get_text(strip=True) if item.select_one(".director") else "",
+        "year": item.select_one(".year").get_text(strip=True) if item.select_one(".year") else "",
+        "rating": float(item.select_one(".rating").get_text(strip=True)) if item.select_one(".rating") else 0.0,
+        "duration": item.select_one(".duration").get_text(strip=True) if item.select_one(".duration") else "",
+        "genre": item.select_one(".genre").get_text(strip=True) if item.select_one(".genre") else ""
+    }
+    movie_list.append(movie)
+
+# 组装json数据
+result_data = {
+    "actors_count": actors_count,
+    "movies": movie_list
+}
+# 写入movies.json
+with open("movies.json", "w", encoding="utf-8") as f:
+    json.dump(result_data, f, ensure_ascii=False, indent=2)
+
+print(f"爬取完成，共抓取{len(movie_list)}部电影，已生成 movies.html、movies.json")
\ No newline at end of file