上传文件至 /

2026-03-31 11:24:15 +08:00
parent cb0c5331eb
commit ecaef7e651
5 changed files with 168 additions and 0 deletions
--- a/43movies.py
+++ b/43movies.py
@@ -0,0 +1,43 @@
 import requests
 import re
 import csv
 headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
 }
 url = 'https://movie.douban.com/top250'
 response = requests.get(url, headers=headers)
 html = response.text
 title_pattern = r'<span class="title">([^<&]+)</span>'
 rating_pattern = r'<span class="rating_num"[^>]*>(\d+\.?\d*)</span>'
 titles = re.findall(title_pattern, html)
 ratings = re.findall(rating_pattern, html)
 movies = []
 for i in range(min(10, len(titles))):
    # 每两个title为一组（中文 + 可能有的英文）
    title = titles[i] if not titles[i].startswith('/') else ''
    en_title = titles[i+1] if i+1 < len(titles) and titles[i+1].startswith('/') else ''
    en_title = en_title.replace('/ ', '') if en_title else ''
    movie = {
        'rank': i + 1,
        'title': title,
        'en_title': en_title,
        'rating': ratings[i] if i < len(ratings) else ''
    }
    movies.append(movie)
 with open('movies.csv', 'w', encoding='utf-8', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=['rank', 'title', 'en_title', 'rating'])
    writer.writeheader()
    writer.writerows(movies)
 print('已保存到 movies.csv')
 with open('movies.csv', 'r', encoding='utf-8') as f:
    for line in f:
        print(line.strip())
--- a/43test.py
+++ b/43test.py
@@ -0,0 +1,33 @@
 import requests
 import re
 # 1. 发送请求获取页面
 headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
 }
 url = 'https://movie.douban.com/top250'
 response = requests.get(url, headers=headers)
 html = response.text
 # 2. 用正则表达式提取电影名称
 # 电影名称在 <span class="title"> 中
 pattern = r'<span class="title">([^<&]+)</span>'
 titles = re.findall(pattern, html)
 # 3. 过滤掉英文名（只保留中文名）
 chinese_titles = [t for t in titles if not t.startswith('/')]
 # 取前10个
 top10 = chinese_titles[:10]
 # 4. 保存到文本文件
 with open('movies.txt', 'w', encoding='utf-8') as f:
    for i, title in enumerate(top10, 1):
        f.write(f'{i}. {title}\n')
 print('已保存前10部电影到 movies.txt')
 # 显示内容验证
 with open('movies.txt', 'r', encoding='utf-8') as f:
    print(f.read())
--- a/43test1.py
+++ b/43test1.py
@@ -0,0 +1,52 @@
 import requests
 import re
 import json
 headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
 }
 url = 'https://movie.douban.com/top250'
 response = requests.get(url, headers=headers)
 html = response.text
 title_pattern = r'<span class="title">([^<&]+)</span>'
 rating_pattern = r'<span class="rating_num"[^>]*>(\d+\.?\d*)</span>'
 quote_pattern = r'<span class="inq">([^<]+)</span>'
 titles = re.findall(title_pattern, html)
 ratings = re.findall(rating_pattern, html)
 quotes = re.findall(quote_pattern, html)
 movies = []
 title_index = 0
 for i in range(10):
    while title_index < len(titles) and titles[title_index].startswith('/'):
        title_index += 1
    movie = {
        'rank': i + 1,
        'title': titles[title_index] if title_index < len(titles) else '',
        'en_title': '',
        'rating': ratings[i] if i < len(ratings) else '',
        'quote': quotes[i] if i < len(quotes) else ''
    }
    if title_index + 1 < len(titles) and titles[title_index + 1].startswith('/'):
        movie['en_title'] = titles[title_index + 1].replace('/ ', '')
    movies.append(movie)
    title_index += 1
 with open('movies.json', 'w', encoding='utf-8') as f:
    json.dump(movies, f, ensure_ascii=False, indent=2)
 print('已保存到 movies.json')
 with open('movies.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
    print(f'共保存 {len(data)} 部电影')
    for m in data[:3]:
        print(f"  {m['rank']}. {m['title']} ({m['en_title']}) - {m['rating']}")
--- a/43test2.py
+++ b/43test2.py
@@ -0,0 +1,18 @@
 import csv
 with open('movies.csv', 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    print('评分高于9.5的电影：')
    print('-' * 40)
    count = 0
    for row in reader:
        if float(row['rating']) > 9.5:
            count += 1
            print(f"{row['rank']}. {row['title']}")
            print(f"   英文名: {row['en_title']}")
            print(f"   评分: {row['rating']}")
            print()
    print(f'共 {count} 部评分超过9.5')
--- a/43test3.py
+++ b/43test3.py
@@ -0,0 +1,22 @@
 import json
 # 读取JSON
 with open('movies.json', 'r', encoding='utf-8') as f:
    movies = json.load(f)
 # 计算平均分
 total = sum(float(m['rating']) for m in movies)
 average = total / len(movies)
 print(f'Top10 电影平均分: {average:.2f}')
 # 找出最高分
 highest = max(movies, key=lambda m: float(m['rating']))
 print(f'\n评分最高的电影:')
 print(f"  {highest['rank']}. {highest['title']} ({highest['en_title']})")
 print(f"  评分: {highest['rating']}")
 # 统计有经典台词的电影
 with_quote = [m for m in movies if m['quote']]
 print(f'\n有经典台词的电影: {len(with_quote)} 部')
 for m in with_quote:
    print(f"  \"{m['quote']}\" —— {m['title']}")