From 490fd747ccf931cb647139693d70ed9d4f2a2fd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=97=E8=B4=B5=E7=8F=8D?= <2509165009@student.example.com> Date: Tue, 31 Mar 2026 11:22:15 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 26331,09test.py | 26 ++++++++++++++++++++++++++ 26331,09test1.py | 41 +++++++++++++++++++++++++++++++++++++++++ 26331,09test2.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 26331,09test3.py | 18 ++++++++++++++++++ 26331,09test4.py | 18 ++++++++++++++++++ 5 files changed, 151 insertions(+) create mode 100644 26331,09test.py create mode 100644 26331,09test1.py create mode 100644 26331,09test2.py create mode 100644 26331,09test3.py create mode 100644 26331,09test4.py diff --git a/26331,09test.py b/26331,09test.py new file mode 100644 index 0000000..77bd010 --- /dev/null +++ b/26331,09test.py @@ -0,0 +1,26 @@ +import requests +import re + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' +} +url = 'https://movie.douban.com/top250' + +response = requests.get(url, headers=headers) +html = response.text + +pattern = r'([^<&]+)' +titles = re.findall(pattern, html) + +chinese_titles = [t for t in titles if not t.startswith('/')] + +top10 = chinese_titles[:10] + +with open('movies.txt', 'w', encoding='utf-8') as f: + for i, title in enumerate(top10, 1): + f.write(f'{i}. {title}\n') + +print('已保存前10部电影到 movies.txt') + +with open('movies.txt', 'r', encoding='utf-8') as f: + print(f.read()) \ No newline at end of file diff --git a/26331,09test1.py b/26331,09test1.py new file mode 100644 index 0000000..0f91053 --- /dev/null +++ b/26331,09test1.py @@ -0,0 +1,41 @@ +import requests +import re +import csv + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' +} +url = 'https://movie.douban.com/top250' +response = requests.get(url, headers=headers) +html = response.text + +title_pattern = r'([^<&]+)' +rating_pattern = r']*>(\d+\.?\d*)' + +titles = re.findall(title_pattern, html) +ratings = re.findall(rating_pattern, html) + +movies = [] +for i in range(min(10, len(titles))): + title = titles[i] if not titles[i].startswith('/') else '' + en_title = titles[i+1] if i+1 < len(titles) and titles[i+1].startswith('/') else '' + en_title = en_title.replace('/ ', '') if en_title else '' + + movie = { + 'rank': i + 1, + 'title': title, + 'en_title': en_title, + 'rating': ratings[i] if i < len(ratings) else '' + } + movies.append(movie) + +with open('movies.csv', 'w', encoding='utf-8', newline='') as f: + writer = csv.DictWriter(f, fieldnames=['rank', 'title', 'en_title', 'rating']) + writer.writeheader() + writer.writerows(movies) + +print('已保存到 movies.csv') + +with open('movies.csv', 'r', encoding='utf-8') as f: + for line in f: + print(line.strip()) \ No newline at end of file diff --git a/26331,09test2.py b/26331,09test2.py new file mode 100644 index 0000000..a43670f --- /dev/null +++ b/26331,09test2.py @@ -0,0 +1,48 @@ +import requests +import re +import json + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' +} +url = 'https://movie.douban.com/top250' +response = requests.get(url, headers=headers) +html = response.text + +title_pattern = r'([^<&]+)' +rating_pattern = r']*>(\d+\.?\d*)' +quote_pattern = r'([^<]+)' + +titles = re.findall(title_pattern, html) +ratings = re.findall(rating_pattern, html) +quotes = re.findall(quote_pattern, html) + +movies = [] +title_index = 0 +for i in range(10): + while title_index < len(titles) and titles[title_index].startswith('/'): + title_index += 1 + + movie = { + 'rank': i + 1, + 'title': titles[title_index] if title_index < len(titles) else '', + 'en_title': '', + 'rating': ratings[i] if i < len(ratings) else '', + 'quote': quotes[i] if i < len(quotes) else '' + } + if title_index + 1 < len(titles) and titles[title_index + 1].startswith('/'): + movie['en_title'] = titles[title_index + 1].replace('/ ', '') + + movies.append(movie) + title_index += 1 + +with open('movies.json', 'w', encoding='utf-8') as f: + json.dump(movies, f, ensure_ascii=False, indent=2) + +print('已保存到 movies.json') + +with open('movies.json', 'r', encoding='utf-8') as f: + data = json.load(f) + print(f'共保存 {len(data)} 部电影') + for m in data[:3]: + print(f" {m['rank']}. {m['title']} ({m['en_title']}) - {m['rating']}") \ No newline at end of file diff --git a/26331,09test3.py b/26331,09test3.py new file mode 100644 index 0000000..d646f81 --- /dev/null +++ b/26331,09test3.py @@ -0,0 +1,18 @@ +import csv + +with open('movies.csv', 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + + print('评分高于9.5的电影:') + print('-' * 40) + + count = 0 + for row in reader: + if float(row['rating']) > 9.5: + count += 1 + print(f"{row['rank']}. {row['title']}") + print(f" 英文名: {row['en_title']}") + print(f" 评分: {row['rating']}") + print() + + print(f'共 {count} 部评分超过9.5') \ No newline at end of file diff --git a/26331,09test4.py b/26331,09test4.py new file mode 100644 index 0000000..70b639f --- /dev/null +++ b/26331,09test4.py @@ -0,0 +1,18 @@ +import json + +with open('movies.json', 'r', encoding='utf-8') as f: + movies = json.load(f) + +total = sum(float(m['rating']) for m in movies) +average = total / len(movies) +print(f'Top10 电影平均分: {average:.2f}') + +highest = max(movies, key=lambda m: float(m['rating'])) +print(f'\n评分最高的电影:') +print(f" {highest['rank']}. {highest['title']} ({highest['en_title']})") +print(f" 评分: {highest['rating']}") + +with_quote = [m for m in movies if m['quote']] +print(f'\n有经典台词的电影: {len(with_quote)} 部') +for m in with_quote: + print(f" \"{m['quote']}\" —— {m['title']}") \ No newline at end of file