删除 q2_1_crawler/q2_1.py

2026-06-23 11:20:15 +08:00
parent 1d1a1b8979
commit e98c2ad05b
1 changed files with 0 additions and 57 deletions
--- a/q2_1_crawler/q2_1.py
+++ b/q2_1_crawler/q2_1.py
@@ -1,57 +0,0 @@
-import requests
-from bs4 import BeautifulSoup
-import json
-import os
-
-def task_1_scrape():
-    url = "https://exam.detr.top/exam-b/movies"
-
-    headers = {
-        "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
-         "AppleWebKit/537.36 (KHTML, like Gecko)" 
-         "Chrome/129.0.0.0 Safari/537.36"
-    }
-
-    try:
-        response = requests.get(url,headers=headers)
-        response.encoding = 'utf-8'
-
-        if response.status_code == 200:
-            html_content = response.text
-
-
-            with open("movies.html","w",encoding="utf-8") as f:
-                f.write(html_content)
-            print("[成功]已保存movies.html")
-
-            soup = BeautifulSoup(html_content,'html.parser')
-            table = soup.find('table')
-            rows = table.find_all('tr')
-
-            movies_data = []
-
-            print(f"DEBUG:我抓到了{len(movies_data)}个电影数据")
-
-            for row in rows[1:]:
-                cols = row.find_all('td')
-                if len(cols) > 0:
-                    movie = {
-                        "id": int(cols[0].get_text(strip = True)),
-                        "title": cols[1].get_text(strip = True),
-                        "director": cols[2].get_text(strip = True),
-                        "year": int(cols[3].get_text(strip = True)),
-                        "rating": float(cols[4].get_text(strip = True)),
-                        "duration": int(cols[5].get_text(strip = True)),
-                        "genre": cols[6].get_text(strip = True),
-                        "actors_count": int(cols[7].get_text(strip = True))
-                    }
-                    movies_data.append(movie)
-           
-            with open("movies.json","w",encoding="utf-8") as f:
-                json.dump(movies_data, f, ensure_ascii=False, indent = 4)
-            print(f"[成功]已抓取{len(movies_data)}部电影并保存至movies.json")
-        else:
-            print(f"[错误]请求失败,状态码：{response.status_code}")
-
-    except Exception as e:
-        print(f"[异常]发生错误：{e}")