diff --git a/数据爬取/import requests.py b/数据爬取/import requests.py new file mode 100644 index 0000000..000da1f --- /dev/null +++ b/数据爬取/import requests.py @@ -0,0 +1,24 @@ +import requests +from bs4 import BeautifulSoup as bs +import json +headers ={ + 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" + "AppleWebKit/537.36 (KHTML, like Gecko)" + " Chroe/120.0.0.0 Safari/537.36", +} + +url = "https://exam.detr.top/exam-b/movies" + +data = [] +response = requests.get(url,headers=headers) +response.encoding="utf-8" +html = response.text +#print(response.text) +soup = bs(response.text,"html.parser") +#print(soup) +items= soup.find_all("th",class_="item") +for i in range(len(items)): + print(i) + id = items[i].find("span",class_="meta").get_text() + + title = items[i].find("span",class_="title").get_text() \ No newline at end of file diff --git a/数据爬取/movie.html b/数据爬取/movie.html new file mode 100644 index 0000000..b0f3f49 --- /dev/null +++ b/数据爬取/movie.html @@ -0,0 +1,152 @@ + + + + + +
+ +| 编号 | +电影名 | +导演 | +上映年份 | +评分 | +时长(分钟) | +类型 | +主演数 | +
|---|---|---|---|---|---|---|---|
| 1 | +泰坦尼克号 | +Frank Darabont | +2018 | +6.3 | +168 | +悬疑 | +3 | +
| 2 | +千与千寻 | +陈凯歌 | +2020 | +7.7 | +161 | +悬疑 | +5 | +
| 3 | +肖申克的救赎 | +Robert Zemeckis | +2012 | +8.0 | +155 | +剧情 | +5 | +
| 4 | +三傻大闹宝莱坞 | +James Cameron | +1995 | +9.5 | +120 | +剧情 | +5 | +
| 5 | +盗梦空间 | +宫崎骏 | +1992 | +7.8 | +169 | +剧情 | +3 | +
| 6 | +霸王别姬 | +Christopher Nolan | +2003 | +9.2 | +175 | +喜剧 | +5 | +
| 7 | +阿甘正传 | +Lasse Hallström | +2014 | +7.4 | +142 | +悬疑 | +5 | +
| 8 | +星际穿越 | +Rajkumar Hirani | +1999 | +6.4 | +171 | +爱情 | +2 | +
| 9 | +放牛班的春天 | +Christophe Barratier | +1993 | +8.5 | +163 | +科幻 | +2 | +
| 10 | +忠犬八公的故事 | +Christopher Nolan | +1997 | +6.6 | +112 | +动画 | +4 | +