29 lines
914 B
Python
29 lines
914 B
Python
import re
|
|
html = """
|
|
<div class="movie">
|
|
<h2 class="name">《流浪地球》</h2>
|
|
<span class="year">(2024)</span>
|
|
<span class="rating">8.5</span>
|
|
<span class="director">导演: 郭帆</span>
|
|
</div>
|
|
<div class="movie">
|
|
<h2 class="name">《你好,李焕英》</h2>
|
|
<span class="year">(2021)</span>
|
|
<span class="rating">7.9</span>
|
|
<span class="director">导演: 贾玲</span>
|
|
</div>
|
|
"""
|
|
|
|
name_pattern = r'<h2 class="name">《([^》]+)》</h2>'
|
|
year_pattern = r'<span class="year">\((\d+)\)</span>'
|
|
rating_pattern = r'<span class="rating">([\d.]+)</span>'
|
|
director_pattern = r'导演: ([^<]+)'
|
|
|
|
names = re.findall(name_pattern, html)
|
|
years = re.findall(year_pattern, html)
|
|
ratings = re.findall(rating_pattern, html)
|
|
directors = re.findall(director_pattern, html)
|
|
|
|
for
|
|
name, year, rating, director in zip(names, years, ratings, directors):
|
|
print(f"{name} {year} {rating} {director}") |