爬取代码

This commit is contained in:
2509165045
2026-06-11 16:04:26 +08:00
parent 351288a0a5
commit ea50503e0f
2 changed files with 160 additions and 0 deletions

127
movie.json Normal file
View File

@@ -0,0 +1,127 @@
[
{
"title": "肖申克的救赎",
"actor": "无",
"quote": "希望让人自由。"
},
{
"title": "霸王别姬",
"actor": "无",
"quote": "风华绝代。"
},
{
"title": "泰坦尼克号",
"actor": "无",
"quote": "失去的才是永恒的。"
},
{
"title": "阿甘正传",
"actor": "无",
"quote": "一部美国近现代史。"
},
{
"title": "千与千寻",
"actor": "无",
"quote": "最好的宫崎骏,最好的久石让。"
},
{
"title": "美丽人生",
"actor": "无",
"quote": "最美的谎言。"
},
{
"title": "星际穿越",
"actor": "无",
"quote": "爱是一种力量,让我们超越时空感知它的存在。"
},
{
"title": "这个杀手不太冷",
"actor": "无",
"quote": "怪蜀黍和小萝莉不得不说的故事。"
},
{
"title": "盗梦空间",
"actor": "无",
"quote": "诺兰给了我们一场无法盗取的梦。"
},
{
"title": "楚门的世界",
"actor": "无",
"quote": "如果再也不能见到你,祝你早安,午安,晚安。"
},
{
"title": "辛德勒的名单",
"actor": "无",
"quote": "拯救一个人,就是拯救整个世界。"
},
{
"title": "忠犬八公的故事",
"actor": "无",
"quote": "永远都不能忘记你所爱的人。"
},
{
"title": "海上钢琴师",
"actor": "无",
"quote": "每个人都要走一条自己坚定了的路,就算是粉身碎骨。"
},
{
"title": "疯狂动物城",
"actor": "无",
"quote": "迪士尼给我们营造的乌托邦就是这样,永远善良勇敢,永远出乎意料。"
},
{
"title": "三傻大闹宝莱坞",
"actor": "无",
"quote": "英俊版憨豆,高情商版谢耳朵。"
},
{
"title": "机器人总动员",
"actor": "无",
"quote": "小瓦力,大人生。"
},
{
"title": "放牛班的春天",
"actor": "无",
"quote": "天籁一般的童声,是最接近上帝的存在。"
},
{
"title": "无间道",
"actor": "无",
"quote": "香港电影史上永不过时的杰作。"
},
{
"title": "控方证人",
"actor": "无",
"quote": "比利·怀德满分作品。"
},
{
"title": "寻梦环游记",
"actor": "无",
"quote": "死亡不是真的逝去,遗忘才是永恒的消亡。"
},
{
"title": "大话西游之大圣娶亲",
"actor": "无",
"quote": "一生所爱。"
},
{
"title": "熔炉",
"actor": "无",
"quote": "我们一路奋战不是为了改变世界,而是为了不让世界改变我们。"
},
{
"title": "触不可及",
"actor": "无",
"quote": "满满温情的高雅喜剧。"
},
{
"title": "教父",
"actor": "无",
"quote": "千万不要记恨你的对手,这样会让你失去理智。"
},
{
"title": "末代皇帝",
"actor": "无",
"quote": "“不要跟我比惨,我比你更惨”再适合这部电影不过了。"
}
]

33
期末/爬豆瓣 Normal file
View File

@@ -0,0 +1,33 @@
import requests
from bs4 import BeautifulSoup as bs
import json
headers = {'User-Agent':'Mozilla/5.0(Windows NT 10.0;Win64; x64) AppleWebKit/537.36(KHTML,like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
url="https://movie.douban.com/top250?start=0"
data=[]
resp = requests.get(url, headers = headers)
resp.encoding='uft-8'
soup= bs(resp.text,"html.parser")
items= soup.find_all("div",class_="item")
#print(items[0])
for i in range(len(items)):
print(i)
title=items[i].find("span",class_="title").get_text()
actors=items[i].find("div",class_="bd").get_text().strip()
try:
actors=actors.split("主演:")[1].split("\n")[0]
except:
actors="无"
try:
quote=items[i].find("div",class_="bd").find("p",class_="quote").get_text().strip()
except:
quote="无"
data.append({
"title":title,
"actor":actors,
"quote":quote
})
print(data)
with open("movie.json","w",encoding="utf-8") as f:
json.dump(data,f,ensure_ascii=False,indent=4)