From cb5748e7eb2ecd29b91dc077ab149d7b83dcad48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=91=E9=9D=99=E8=B6=85?= <2509165019@student.example.com> Date: Tue, 23 Jun 2026 11:17:04 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- movies.html | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++ movies.json | 4 ++ q1.py | 44 +++++++++++++++ reviews.json | 7 +++ 4 files changed, 207 insertions(+) create mode 100644 movies.html create mode 100644 movies.json create mode 100644 q1.py create mode 100644 reviews.json diff --git a/movies.html b/movies.html new file mode 100644 index 0000000..f1d9707 --- /dev/null +++ b/movies.html @@ -0,0 +1,152 @@ + + + + + + + + 电影列表 + + + +

电影列表

+

数据编号:B-20260623-3317

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
编号电影名导演上映年份评分时长(分钟)类型主演数
1三傻大闹宝莱坞Frank Darabont20128.6176爱情4
2泰坦尼克号陈凯歌20159.0164冒险4
3阿甘正传Robert Zemeckis20098.2148科幻2
4千与千寻James Cameron20106.6107动画3
5放牛班的春天宫崎骏19957.4119科幻4
6忠犬八公的故事Christopher Nolan20047.9119动画2
7肖申克的救赎Lasse Hallström20168.6138爱情2
8盗梦空间Rajkumar Hirani20206.8120冒险3
9霸王别姬Christophe Barratier20047.4140冒险5
10星际穿越Christopher Nolan19908.5116悬疑4
+ + \ No newline at end of file diff --git a/movies.json b/movies.json new file mode 100644 index 0000000..e35f50a --- /dev/null +++ b/movies.json @@ -0,0 +1,4 @@ +{ + "data_id": "unknown", + "movies": [] +} \ No newline at end of file diff --git a/q1.py b/q1.py new file mode 100644 index 0000000..d512dd2 --- /dev/null +++ b/q1.py @@ -0,0 +1,44 @@ +import requests +import json +from bs4 import BeautifulSoup + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", + "Referer": "https://exam.detr.top/" +} + +url = "https://exam.detr.top/exam-b/movies" +response = requests.get(url, headers=headers) +response.encoding = "utf-8" + +with open("movies.html", "w", encoding="utf-8") as f: + f.write(response.text) + +soup = BeautifulSoup(response.text, "html.parser") +data_id = soup.find("div", {"class": "data-id"}).text.strip() if soup.find("div", {"class": "data-id"}) else "unknown" + +movies = [] +movie_items = soup.find_all("div", {"class": "movie-item"}) +for item in movie_items[:10]: + movie = { + "id": item.find("span", {"class": "movie-id"}).text.strip(), + "title": item.find("h3", {"class": "movie-title"}).text.strip(), + "director": item.find("p", {"class": "movie-director"}).text.strip().replace("导演:", ""), + "year": int(item.find("p", {"class": "movie-year"}).text.strip().replace("上映年份:", "")), + "rating": float(item.find("span", {"class": "movie-rating"}).text.strip()), + "duration": int(item.find("p", {"class": "movie-duration"}).text.strip().replace("时长:", "").replace("分钟", "")), + "genre": item.find("p", {"class": "movie-genre"}).text.strip().replace("类型:", ""), + "actors_count": int(item.find("p", {"class": "movie-actors"}).text.strip().replace("演员数量:", "")) + } + movies.append(movie) + +final_data = { + "data_id": data_id, + "movies": movies +} +with open("movies.json", "w", encoding="utf-8") as f: + json.dump(final_data, f, ensure_ascii=False, indent=4) + +print("爬取完成,已生成movies.json和movies.html") \ No newline at end of file diff --git a/reviews.json b/reviews.json new file mode 100644 index 0000000..161ff31 --- /dev/null +++ b/reviews.json @@ -0,0 +1,7 @@ +[ + {"id": 1, "text": "外卖小哥送得超快,餐盒还是热的,炸鸡酥脆多汁,酸辣粉也很正宗,分量足,五星好评!"}, + {"id": 2, "text": "等了一个半小时才送到,汤全洒了,面坨成一坨,联系客服也不回,太让人失望了。"}, + {"id": 3, "text": "奶茶是用料很扎实的现煮茶,珍珠Q弹有嚼劲,配送员态度也好,下次还会再点。"}, + {"id": 4, "text": "配送速度一般,但披萨味道不错,芝士拉丝效果好,性价比高,值得推荐。"}, + {"id": 5, "text": "点的麻辣烫食材不新鲜,有股怪味,吃完拉肚子,商家推卸责任,再也不点了。"} +]