From ab1f97be645e3b16cd6e93d19a716b4771a3a48b Mon Sep 17 00:00:00 2001 From: 2509165038 <2509165038@student.edu.cn> Date: Tue, 23 Jun 2026 11:35:49 +0800 Subject: [PATCH] 1 --- 数据爬取/import requests.py | 24 ++++++ 数据爬取/movie.html | 152 ++++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 数据爬取/import requests.py create mode 100644 数据爬取/movie.html diff --git a/数据爬取/import requests.py b/数据爬取/import requests.py new file mode 100644 index 0000000..000da1f --- /dev/null +++ b/数据爬取/import requests.py @@ -0,0 +1,24 @@ +import requests +from bs4 import BeautifulSoup as bs +import json +headers ={ + 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" + "AppleWebKit/537.36 (KHTML, like Gecko)" + " Chroe/120.0.0.0 Safari/537.36", +} + +url = "https://exam.detr.top/exam-b/movies" + +data = [] +response = requests.get(url,headers=headers) +response.encoding="utf-8" +html = response.text +#print(response.text) +soup = bs(response.text,"html.parser") +#print(soup) +items= soup.find_all("th",class_="item") +for i in range(len(items)): + print(i) + id = items[i].find("span",class_="meta").get_text() + + title = items[i].find("span",class_="title").get_text() \ No newline at end of file diff --git a/数据爬取/movie.html b/数据爬取/movie.html new file mode 100644 index 0000000..b0f3f49 --- /dev/null +++ b/数据爬取/movie.html @@ -0,0 +1,152 @@ + + + + + + + + 电影列表 + + + +

电影列表

+

数据编号:B-20260623-6181

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
编号电影名导演上映年份评分时长(分钟)类型主演数
1泰坦尼克号Frank Darabont20186.3168悬疑3
2千与千寻陈凯歌20207.7161悬疑5
3肖申克的救赎Robert Zemeckis20128.0155剧情5
4三傻大闹宝莱坞James Cameron19959.5120剧情5
5盗梦空间宫崎骏19927.8169剧情3
6霸王别姬Christopher Nolan20039.2175喜剧5
7阿甘正传Lasse Hallström20147.4142悬疑5
8星际穿越Rajkumar Hirani19996.4171爱情2
9放牛班的春天Christophe Barratier19938.5163科幻2
10忠犬八公的故事Christopher Nolan19976.6112动画4
+ + \ No newline at end of file