作业7
This commit is contained in:
63
爬豆瓣电影top250.py
Normal file
63
爬豆瓣电影top250.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import json
|
||||||
|
import csv
|
||||||
|
|
||||||
|
url = 'https://www.douban.com/doulist/3936288/'
|
||||||
|
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
response.encoding = 'utf-8'
|
||||||
|
html = response.text
|
||||||
|
movies = []
|
||||||
|
|
||||||
|
|
||||||
|
for start in range(0,250,25):
|
||||||
|
url = f'https://movie.douban.com/top250?start={start}'
|
||||||
|
response = requests.get(url,headers=headers)
|
||||||
|
soup = BeautifulSoup(response.text,'html.parser')
|
||||||
|
|
||||||
|
for item in soup.find_all('div',class_='item'):
|
||||||
|
title_tag = item.find('span',class_='title')
|
||||||
|
if title_tag:
|
||||||
|
title = title_tag.text
|
||||||
|
else:
|
||||||
|
title = "未找到标题"
|
||||||
|
print(f"在这个item里没找到标题:{item}")
|
||||||
|
other_tag = item.find('span',class_='other')
|
||||||
|
other_title = other_tag.text if other_tag else""
|
||||||
|
info_tag = item.find('p',class_='')
|
||||||
|
info = info_tag.text.strip() if info_tag else ""
|
||||||
|
playable_tag = item.find('span',class_='playable')
|
||||||
|
if playable_tag:
|
||||||
|
year_tag = playable_tag.find_previous_sibling('span',class_='year')
|
||||||
|
year = year_tag.text.strip('()') if year_tag else "未知年份"
|
||||||
|
else:
|
||||||
|
year_tag = item.find('span',class_='year')
|
||||||
|
year = year_tag.text.strip('()') if year_tag else "未知年份"
|
||||||
|
|
||||||
|
director = info.split('导演:')[1].split('主演:')[0].strip() if'导演:' in info else ''
|
||||||
|
actors = info.split('主演:')[1].strip() if '主演:' in info else ''
|
||||||
|
|
||||||
|
movie = {
|
||||||
|
'中文名': title,
|
||||||
|
'英文名': other_title.replace('/','').strip(),
|
||||||
|
'年份': year,
|
||||||
|
'导演': director,
|
||||||
|
'主演': actors
|
||||||
|
}
|
||||||
|
movies.append(movie)
|
||||||
|
with open('movies.txt','w',encoding='utf-8') as f:
|
||||||
|
f.write('中文名\t英文名\t年份\t导演\t主演\n')
|
||||||
|
for movie in movies:
|
||||||
|
f.write(f"{movie['中文名']}\t{movie['英文名']}\t{movie['年份']}\t{movie['导演']}\t{movie['主演']}\n")
|
||||||
|
with open('movies.csv','w',encoding='utf-8',newline='') as f:
|
||||||
|
writer = csv.DictWriter(f,fieldnames=['中文名','英文名','年份','导演','主演'])
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(movies)
|
||||||
|
|
||||||
|
|
||||||
|
with open('movies.json','w',encoding='utf-8') as f:
|
||||||
|
json.dump(movies,f,ensure_ascii=False,indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
print("爬取完成!已生成 movies.txt, movies.csv, movies.json")
|
||||||
Reference in New Issue
Block a user