Files
task-2-4-regular-expression/44.py
2026-04-02 16:03:40 +08:00

34 lines
1.0 KiB
Python

import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
url = 'https://www.douban.com/doulist/3936288/'
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'lxml')
movies = soup.select('.doulist-item') # 提取电影条目
count = 0
for movie in movies:
title_link = movie.select_one('a[href^="/subject/"]')
rating = movie.select_one('.rating_nums')
intro = movie.select_one('.abstract')
if title_link:
title = title_link.get_text(strip=True)
rating_text = rating.text.strip() if rating else '无评分'
intro_text = intro.get_text(strip=True) if intro else '暂无简介'
print(f'{count + 1}. 电影:{title}')
print(f' 评分:{rating_text}')
print(f' 简介:{intro_text}')
print('-' * 60)
count += 1
if count >= 10:
break