From 9a288d81de5767c637a6b4cbb559965cad8d52ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=BF=9B=E9=B9=8F?= <2509165001@student.example.com> Date: Thu, 2 Apr 2026 16:06:47 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20/?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- exercise.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 exercise.py diff --git a/exercise.py b/exercise.py new file mode 100644 index 0000000..3151ac7 --- /dev/null +++ b/exercise.py @@ -0,0 +1,37 @@ +import re + +# 本地测试数据(对应题目要求的结构) +html = ''' +
+

《流浪地球》

+ (2024) + 8.5 + 导演: 郭帆 +
+
+

《你好,李焕英》

+ (2024) + 9.0 + 导演: 贾玲 +
+''' + +# ================= 核心:针对 div 结构的正则 ================= +pattern = r''' +
+.*?

《([^》]+)》

# 捕获电影名 +.*?\((\d{4})\) # 捕获年份 +.*?([\d.]+) # 捕获评分 +.*?导演: ([^<]+) # 捕获导演 +''' + +# 开始匹配 +movies = re.findall(pattern, html, re.DOTALL | re.X) + +# 输出结果 +print("===== 练习2 结果展示 =====") +for index, movie in enumerate(movies, 1): + name, year, rating, director = movie + print(f"{index}. 电影名:{name}") + print(f" 年份:{year} | 评分:{rating} | {director}") + print("-" * 30) \ No newline at end of file