37 lines
1.2 KiB
Python
37 lines
1.2 KiB
Python
import re
|
|
|
|
# 本地测试数据(对应题目要求的结构)
|
|
html = '''
|
|
<div class="movie">
|
|
<h2 class="name">《流浪地球》</h2>
|
|
<span class="year">(2024)</span>
|
|
<span class="rating">8.5</span>
|
|
<span class="director">导演: 郭帆</span>
|
|
</div>
|
|
<div class="movie">
|
|
<h2 class="name">《你好,李焕英》</h2>
|
|
<span class="year">(2024)</span>
|
|
<span class="rating">9.0</span>
|
|
<span class="director">导演: 贾玲</span>
|
|
</div>
|
|
'''
|
|
|
|
# ================= 核心:针对 div 结构的正则 =================
|
|
pattern = r'''
|
|
<div class="movie">
|
|
.*?<h2 class="name">《([^》]+)》</h2> # 捕获电影名
|
|
.*?<span class="year">\((\d{4})\)</span> # 捕获年份
|
|
.*?<span class="rating">([\d.]+)</span> # 捕获评分
|
|
.*?<span class="director">导演: ([^<]+)</span> # 捕获导演
|
|
'''
|
|
|
|
# 开始匹配
|
|
movies = re.findall(pattern, html, re.DOTALL | re.X)
|
|
|
|
# 输出结果
|
|
print("===== 练习2 结果展示 =====")
|
|
for index, movie in enumerate(movies, 1):
|
|
name, year, rating, director = movie
|
|
print(f"{index}. 电影名:{name}")
|
|
print(f" 年份:{year} | 评分:{rating} | {director}")
|
|
print("-" * 30) |