From 3b39c7148a01e2afaeb3db8e44ec1e8638fc3d8b Mon Sep 17 00:00:00 2001 From: 2509165045 <2509165045@student.edu.cn> Date: Tue, 23 Jun 2026 12:08:02 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9C=9F=E6=9C=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- movie.json | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 期末/可视化 | 1 + 期末/爬豆瓣 | 57 +++++++++++++----------- 3 files changed, 158 insertions(+), 25 deletions(-) create mode 100644 期末/可视化 diff --git a/movie.json b/movie.json index 9dd91e4..09e0f24 100644 --- a/movie.json +++ b/movie.json @@ -1,4 +1,129 @@ [ + { + "title": "肖申克的救赎", + "actor": " 蒂姆·罗宾斯 Tim Robbins /...", + "quote": "希望让人自由。" + }, + { + "title": "霸王别姬", + "actor": " 张国荣 Leslie Cheung / 张丰毅 Fengyi Zha...", + "quote": "风华绝代。" + }, + { + "title": "泰坦尼克号", + "actor": " 莱昂纳多·迪卡普里奥 Leonardo...", + "quote": "失去的才是永恒的。" + }, + { + "title": "阿甘正传", + "actor": " 汤姆·汉克斯 Tom Hanks / ...", + "quote": "一部美国近现代史。" + }, + { + "title": "千与千寻", + "actor": " 柊瑠美 Rumi Hîragi / 入野自由 Miy...", + "quote": "最好的宫崎骏,最好的久石让。" + }, + { + "title": "美丽人生", + "actor": " 罗伯托·贝尼尼 Roberto Beni...", + "quote": "最美的谎言。" + }, + { + "title": "星际穿越", + "actor": " 马修·麦康纳 Matthew Mc...", + "quote": "爱是一种力量,让我们超越时空感知它的存在。" + }, + { + "title": "这个杀手不太冷", + "actor": " 让·雷诺 Jean Reno / 娜塔莉·波特曼 ...", + "quote": "怪蜀黍和小萝莉不得不说的故事。" + }, + { + "title": "盗梦空间", + "actor": " 莱昂纳多·迪卡普里奥 Le...", + "quote": "诺兰给了我们一场无法盗取的梦。" + }, + { + "title": "楚门的世界", + "actor": " 金·凯瑞 Jim Carrey / 劳拉·琳妮 Lau...", + "quote": "如果再也不能见到你,祝你早安,午安,晚安。" + }, + { + "title": "辛德勒的名单", + "actor": " 连姆·尼森 Liam Neeson...", + "quote": "拯救一个人,就是拯救整个世界。" + }, + { + "title": "忠犬八公的故事", + "actor": " 理查·基尔 Richard Ger...", + "quote": "永远都不能忘记你所爱的人。" + }, + { + "title": "海上钢琴师", + "actor": " 蒂姆·罗斯 Tim Roth / ...", + "quote": "每个人都要走一条自己坚定了的路,就算是粉身碎骨。" + }, + { + "title": "疯狂动物城", + "actor": " 金妮弗·...", + "quote": "迪士尼给我们营造的乌托邦就是这样,永远善良勇敢,永远出乎意料。" + }, + { + "title": "三傻大闹宝莱坞", + "actor": " 阿米尔·汗 Aamir Khan / 卡...", + "quote": "英俊版憨豆,高情商版谢耳朵。" + }, + { + "title": "机器人总动员", + "actor": " 本·贝尔特 Ben Burtt / 艾丽...", + "quote": "小瓦力,大人生。" + }, + { + "title": "放牛班的春天", + "actor": " 让-巴蒂斯特·莫尼...", + "quote": "天籁一般的童声,是最接近上帝的存在。" + }, + { + "title": "无间道", + "actor": " 刘德华 Andy Lau / 梁朝伟 Tony Leung Chiu W...", + "quote": "香港电影史上永不过时的杰作。" + }, + { + "title": "控方证人", + "actor": " 泰隆·鲍华 Tyrone Power / 玛琳·...", + "quote": "比利·怀德满分作品。" + }, + { + "title": "寻梦环游记", + "actor": " ...", + "quote": "死亡不是真的逝去,遗忘才是永恒的消亡。" + }, + { + "title": "大话西游之大圣娶亲", + "actor": " 周星驰 Stephen Chow / 吴孟达 Man Tat Ng...", + "quote": "一生所爱。" + }, + { + "title": "熔炉", + "actor": " 孔侑 Yoo Gong / 郑有美 Yu-mi Jung /...", + "quote": "我们一路奋战不是为了改变世界,而是为了不让世界改变我们。" + }, + { + "title": "触不可及", + "actor": "无", + "quote": "满满温情的高雅喜剧。" + }, + { + "title": "教父", + "actor": " 马龙·白兰度 M...", + "quote": "千万不要记恨你的对手,这样会让你失去理智。" + }, + { + "title": "末代皇帝", + "actor": " 尊龙 John Lone / 陈...", + "quote": "“不要跟我比惨,我比你更惨”再适合这部电影不过了。" + }, { "title": "哈利·波特与魔法石", "actor": " Daniel Radcliffe / Emma Watson / Rupert Grint", diff --git a/期末/可视化 b/期末/可视化 new file mode 100644 index 0000000..9aec204 --- /dev/null +++ b/期末/可视化 @@ -0,0 +1 @@ +import matplotlib.pyplot \ No newline at end of file diff --git a/期末/爬豆瓣 b/期末/爬豆瓣 index 8a33e64..d3bd9f6 100644 --- a/期末/爬豆瓣 +++ b/期末/爬豆瓣 @@ -2,32 +2,39 @@ import requests from bs4 import BeautifulSoup as bs import json headers = {'User-Agent':'Mozilla/5.0(Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.0.4472.124 Safari/537.36'} -url="https://movie.douban.com/top250?start=25" -data=[] -resp = requests.get(url, headers = headers) -resp.encoding='uft-8' -soup= bs(resp.text,"html.parser") -items= soup.find_all("div",class_="item") -#print(items[0]) -for i in range(len(items)): - print(i) - title=items[i].find("span",class_="title").get_text() +urls = [ + "https://movie.douban.com/top250?start=0", # 第1页(1-25) + "https://movie.douban.com/top250?start=25", # 第2页(26-50) +] +data = [] - actors=items[i].find("div",class_="bd").get_text().strip() - try: - actors=actors.split("主演:")[1].split("\n")[0] - except: - actors="无" - try: - quote=items[i].find("div", class_="bd").find("p",class_="quote").get_text().strip() - except: - quote="无" - data.append({ - "title":title, - "actor":actors, - "quote":quote - }) +for url in urls: + resp = requests.get(url, headers=headers) + resp.encoding = 'utf-8' # 修复了拼写:uft-8 → utf-8 + soup = bs(resp.text, "html.parser") + items = soup.find_all("div", class_="item") + + for i in range(len(items)): + print(i) + title = items[i].find("span", class_="title").get_text() + + actors = items[i].find("div", class_="bd").get_text().strip() + try: + actors = actors.split("主演:")[1].split("\n")[0] + except: + actors = "无" + + try: + quote = items[i].find("div", class_="bd").find("p", class_="quote").get_text().strip() + except: + quote = "无" + + data.append({ + "title": title, + "actor": actors, + "quote": quote + }) print(data) with open("movie.json","w",encoding="utf-8") as f: - json.dump(data,f,ensure_ascii=False,indent=4) + json.dump(data,f,ensure_ascii=False,indent=4) \ No newline at end of file