上传文件至 /

This commit is contained in:
2026-03-31 11:24:15 +08:00
parent cb0c5331eb
commit ecaef7e651
5 changed files with 168 additions and 0 deletions

43
26.03.31 43movies.py Normal file
View File

@@ -0,0 +1,43 @@
import requests
import re
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
url = 'https://movie.douban.com/top250'
response = requests.get(url, headers=headers)
html = response.text
title_pattern = r'<span class="title">([^<&]+)</span>'
rating_pattern = r'<span class="rating_num"[^>]*>(\d+\.?\d*)</span>'
titles = re.findall(title_pattern, html)
ratings = re.findall(rating_pattern, html)
movies = []
for i in range(min(10, len(titles))):
# 每两个title为一组中文 + 可能有的英文)
title = titles[i] if not titles[i].startswith('/') else ''
en_title = titles[i+1] if i+1 < len(titles) and titles[i+1].startswith('/') else ''
en_title = en_title.replace('/ ', '') if en_title else ''
movie = {
'rank': i + 1,
'title': title,
'en_title': en_title,
'rating': ratings[i] if i < len(ratings) else ''
}
movies.append(movie)
with open('movies.csv', 'w', encoding='utf-8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['rank', 'title', 'en_title', 'rating'])
writer.writeheader()
writer.writerows(movies)
print('已保存到 movies.csv')
with open('movies.csv', 'r', encoding='utf-8') as f:
for line in f:
print(line.strip())

33
26.03.31 43test.py Normal file
View File

@@ -0,0 +1,33 @@
import requests
import re
# 1. 发送请求获取页面
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
url = 'https://movie.douban.com/top250'
response = requests.get(url, headers=headers)
html = response.text
# 2. 用正则表达式提取电影名称
# 电影名称在 <span class="title"> 中
pattern = r'<span class="title">([^<&]+)</span>'
titles = re.findall(pattern, html)
# 3. 过滤掉英文名(只保留中文名)
chinese_titles = [t for t in titles if not t.startswith('/')]
# 取前10个
top10 = chinese_titles[:10]
# 4. 保存到文本文件
with open('movies.txt', 'w', encoding='utf-8') as f:
for i, title in enumerate(top10, 1):
f.write(f'{i}. {title}\n')
print('已保存前10部电影到 movies.txt')
# 显示内容验证
with open('movies.txt', 'r', encoding='utf-8') as f:
print(f.read())

52
26.03.31 43test1.py Normal file
View File

@@ -0,0 +1,52 @@
import requests
import re
import json
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
url = 'https://movie.douban.com/top250'
response = requests.get(url, headers=headers)
html = response.text
title_pattern = r'<span class="title">([^<&]+)</span>'
rating_pattern = r'<span class="rating_num"[^>]*>(\d+\.?\d*)</span>'
quote_pattern = r'<span class="inq">([^<]+)</span>'
titles = re.findall(title_pattern, html)
ratings = re.findall(rating_pattern, html)
quotes = re.findall(quote_pattern, html)
movies = []
title_index = 0
for i in range(10):
while title_index < len(titles) and titles[title_index].startswith('/'):
title_index += 1
movie = {
'rank': i + 1,
'title': titles[title_index] if title_index < len(titles) else '',
'en_title': '',
'rating': ratings[i] if i < len(ratings) else '',
'quote': quotes[i] if i < len(quotes) else ''
}
if title_index + 1 < len(titles) and titles[title_index + 1].startswith('/'):
movie['en_title'] = titles[title_index + 1].replace('/ ', '')
movies.append(movie)
title_index += 1
with open('movies.json', 'w', encoding='utf-8') as f:
json.dump(movies, f, ensure_ascii=False, indent=2)
print('已保存到 movies.json')
with open('movies.json', 'r', encoding='utf-8') as f:
data = json.load(f)
print(f'共保存 {len(data)} 部电影')
for m in data[:3]:
print(f" {m['rank']}. {m['title']} ({m['en_title']}) - {m['rating']}")

18
26.03.31 43test2.py Normal file
View File

@@ -0,0 +1,18 @@
import csv
with open('movies.csv', 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
print('评分高于9.5的电影:')
print('-' * 40)
count = 0
for row in reader:
if float(row['rating']) > 9.5:
count += 1
print(f"{row['rank']}. {row['title']}")
print(f" 英文名: {row['en_title']}")
print(f" 评分: {row['rating']}")
print()
print(f'{count} 部评分超过9.5')

22
26.03.31 43test3.py Normal file
View File

@@ -0,0 +1,22 @@
import json
# 读取JSON
with open('movies.json', 'r', encoding='utf-8') as f:
movies = json.load(f)
# 计算平均分
total = sum(float(m['rating']) for m in movies)
average = total / len(movies)
print(f'Top10 电影平均分: {average:.2f}')
# 找出最高分
highest = max(movies, key=lambda m: float(m['rating']))
print(f'\n评分最高的电影:')
print(f" {highest['rank']}. {highest['title']} ({highest['en_title']})")
print(f" 评分: {highest['rating']}")
# 统计有经典台词的电影
with_quote = [m for m in movies if m['quote']]
print(f'\n有经典台词的电影: {len(with_quote)}')
for m in with_quote:
print(f" \"{m['quote']}\" —— {m['title']}")