完成作业
This commit is contained in:
96
33ljh.py
Normal file
96
33ljh.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import csv
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
|
||||
movies = []
|
||||
for start in range(0, 250, 25):
|
||||
url = f'https://movie.douban.com/top250?start={start}'
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
response.encoding = 'utf-8'
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
for item in soup.find_all('div', class_='item'):
|
||||
|
||||
title_tag = item.find('span', class_='title')
|
||||
if title_tag:
|
||||
title = title_tag.text
|
||||
else:
|
||||
title = "未找到标题"
|
||||
print(f"在这个item里没找到标题: {item}")
|
||||
|
||||
|
||||
other_tag = item.find('span', class_='other')
|
||||
other_title = other_tag.text.replace('/', '').strip() if other_tag else ""
|
||||
|
||||
|
||||
info_tag = item.find('p', class_='')
|
||||
info = info_tag.text.strip() if info_tag else ""
|
||||
|
||||
|
||||
playable_tag = item.find('span', class_='playable')
|
||||
if playable_tag:
|
||||
year_tag = playable_tag.find_previous_sibling('span', class_='year')
|
||||
year = year_tag.text.strip('()') if year_tag else "未知年份"
|
||||
else:
|
||||
year_tag = item.find('span', class_='year')
|
||||
year = year_tag.text.strip('()') if year_tag else "未知年份"
|
||||
|
||||
|
||||
rating_tag = item.find('span', class_='rating_num')
|
||||
rating = rating_tag.text if rating_tag else "0.0"
|
||||
|
||||
|
||||
people_tag = item.find('div', class_='star').find_all('span')[-1]
|
||||
people_count = people_tag.text.replace('人评价', '').strip() if people_tag else "0"
|
||||
|
||||
|
||||
quote_tag = item.find('span', class_='inq')
|
||||
quote = quote_tag.text if quote_tag else "暂无短评"
|
||||
|
||||
|
||||
director = ""
|
||||
actors = ""
|
||||
if '导演:' in info:
|
||||
director_part = info.split('导演:')[1].split('主演:')
|
||||
director = director_part[0].strip()
|
||||
if len(director_part) > 1:
|
||||
actors = director_part[1].split('/')[0].strip()
|
||||
|
||||
|
||||
movie = {
|
||||
'中文名': title,
|
||||
'英文名': other_title,
|
||||
'年份': year,
|
||||
'导演': director,
|
||||
'主演': actors,
|
||||
'评分': rating,
|
||||
'评价人数': people_count,
|
||||
'精选短评': quote
|
||||
}
|
||||
movies.append(movie)
|
||||
|
||||
|
||||
with open('movies.txt', 'w', encoding='utf-8') as f:
|
||||
f.write("中文名\t英文名\t年份\t导演\t主演\t评分\t评价人数\t精选短评\n")
|
||||
for movie in movies:
|
||||
f.write(f"{movie['中文名']}\t{movie['英文名']}\t{movie['年份']}\t{movie['导演']}\t{movie['主演']}\t{movie['评分']}\t{movie['评价人数']}\t{movie['精选短评']}\n")
|
||||
|
||||
|
||||
with open('movies.csv', 'w', encoding='utf-8', newline='') as f:
|
||||
writer = csv.DictWriter(
|
||||
f,
|
||||
fieldnames=['中文名', '英文名', '年份', '导演', '主演', '评分', '评价人数', '精选短评']
|
||||
)
|
||||
writer.writeheader()
|
||||
writer.writerows(movies)
|
||||
|
||||
|
||||
with open('movies.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(movies, f, ensure_ascii=False, indent=4)
|
||||
|
||||
print("爬取完成! 已生成 movies.txt, movies.csv, movies.json")
|
||||
Reference in New Issue
Block a user