Files
task-2-3-File-Operations/260331_2509165028吴海霖.py
2509165028 f8f662d251 2-3
2026-03-31 11:31:24 +08:00

40 lines
1.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
from bs4 import BeautifulSoup
import re
import csv
import json
import time
# 设置请求头,模拟浏览器访问
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
url = 'https://movie.douban.com/top250'
print('开始爬取豆瓣电影Top250...')
response = requests.get(url, headers=headers)
print(f'状态码: {response.status_code}')
print(f'内容长度: {len(response.text)} 字符')
# 找到所有电影标题
html = response.text
# 匹配 <span class="title">电影名</span>
title_pattern = r'<span class="title">([^<]+)</span>'
titles = re.findall(title_pattern, html)
# 过滤掉英文名(以/开头)
chinese_titles = [t for t in titles if not t.startswith('/')]
print('电影名称前10部')
for i, title in enumerate(chinese_titles[:10], 1):
print(f'{i}. {title}')
abstract_pattern = r'<span class="abstract">([^<]+)</span>'
abstract = re.findall(abstract_pattern, html)
chinese_abstract = [t for t in abstract if not t.startswith('/')]
print('电影的导演前10部')
for i, abstract in enumerate(chinese_abstract[:10], 1):
print(f'{i}. {abstract}')