Files
2026-04-02 16:06:47 +08:00

37 lines
1.2 KiB
Python

import re
# 本地测试数据(对应题目要求的结构)
html = '''
<div class="movie">
<h2 class="name">《流浪地球》</h2>
<span class="year">(2024)</span>
<span class="rating">8.5</span>
<span class="director">导演: 郭帆</span>
</div>
<div class="movie">
<h2 class="name">《你好,李焕英》</h2>
<span class="year">(2024)</span>
<span class="rating">9.0</span>
<span class="director">导演: 贾玲</span>
</div>
'''
# ================= 核心:针对 div 结构的正则 =================
pattern = r'''
<div class="movie">
.*?<h2 class="name">《([^》]+)》</h2> # 捕获电影名
.*?<span class="year">\((\d{4})\)</span> # 捕获年份
.*?<span class="rating">([\d.]+)</span> # 捕获评分
.*?<span class="director">导演: ([^<]+)</span> # 捕获导演
'''
# 开始匹配
movies = re.findall(pattern, html, re.DOTALL | re.X)
# 输出结果
print("===== 练习2 结果展示 =====")
for index, movie in enumerate(movies, 1):
name, year, rating, director = movie
print(f"{index}. 电影名:{name}")
print(f" 年份:{year} | 评分:{rating} | {director}")
print("-" * 30)