33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
import re
|
|
|
|
html = '''
|
|
<div class="movie">
|
|
<h2 class="name">《流浪地球》</h2>
|
|
<span class="year">(2024)</span>
|
|
<span class="rating">8.5</span>
|
|
<span class="director">导演:郭帆</span>
|
|
</div>
|
|
<div class="movie">
|
|
<h2 class="name">《你好,李焕英》</h2>
|
|
<span class="year">(2024)</span>
|
|
<span class="rating">7.9</span>
|
|
<span class="director">导演:贾玲</span>
|
|
</div>
|
|
'''
|
|
|
|
# 编写正则表达式,提取所有电影信息
|
|
# pattern = r'你的正则表达式'
|
|
|
|
# 提示:可以用多个正则分别提取,或者用一个复杂的正则提取所有
|
|
name_pattern = r'<h2 class="name">《([^》]+)》</h2>'
|
|
year_pattern = r'<span class="year">\((\d{4})\)</span>'
|
|
rating_pattern = r'<span class="rating">([^<]+)</span>'
|
|
director_pattern = r'导演:([^<]+)'
|
|
|
|
names = re.findall(name_pattern, html)
|
|
years = re.findall(year_pattern, html)
|
|
ratings = re.findall(rating_pattern, html)
|
|
directors = re.findall(director_pattern, html)
|
|
|
|
for i in range(len(names)):
|
|
print(f"{names[i]} | {years[i]} | 评分:{ratings[i]} | {directors[i]}") |