Files
task-2-4-regular-expression/260402_2509165028.py
2509165028 6a2a5a5cb1 2-4
2026-04-02 16:05:41 +08:00

31 lines
1.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import re
url = 'https://www.douban.com/doulist/3936288/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
html = response.text
# 1. 提取电影名
title_pattern = r'<div class="title">([^<]+)</div>'
titles = re.findall(title_pattern, html)
chinese_titles = [t for t in titles if not t.startswith('/')]
# 2. 提取评分
rating_pattern = r'<span class="rating_nums">([^<]+)</span>'
ratings = re.findall(rating_pattern, html)
# 3. 提取经典台词
quote_pattern = r'<span class="inq">([^<]+)</span>'
quotes = re.findall(quote_pattern, html)
# 合并打印前10部
print(f'{"排名":<4} {"电影名":<20} {"评分":<6} {"引言"}')
print('-' * 70)
for i in range(min(10, len(chinese_titles))):
title = chinese_titles[i]
rating = ratings[i] if i < len(ratings) else ''
quote = f'"{quotes[i]}"' if i < len(quotes) else ''
print(f'{i+1:<4} {title:<20} {rating:<6} {quote}')