提交动手练习代码

2026-04-03 11:17:04 +08:00
parent 4b9fb044ef
commit 711ecaafc8
1 changed files with 112 additions and 0 deletions
--- a/260403-2509165039.py
+++ b/260403-2509165039.py
@@ -0,0 +1,112 @@
+#练习 1：提取天气预报
+text = '''
+2024-03-15 天气：晴 温度：15-25°C
+2024-03-16 天气：多云 温度：12-20°C
+2024-03-17 天气：小雨 温度：10-18°C
+'''
+
+import re
+pattern = r'(\d{4}-\d{2}-\d{2})\s*天气：([^ ]+)\s*温度：(\d+)-(\d+)°C'
+matches = re.findall(pattern, text)
+
+for match in matches:
+    date, weather, low, high = match
+    print(f'{date}: {weather}, {low}°C-{high}°C')
+
+
+
+
+
+# 练习 2：爬取豆瓣电影信息
+import re
+
+html = '''
+<div class="movie">
+    <h2 class="name">《流浪地球》</h2>
+    <span class="year">(2024)</span>
+    <span class="rating">8.5</span>
+    <span class="director">导演：郭帆</span>
+</div>
+<div class="movie">
+    <h2 class="name">《你好，李焕英》</h2>
+    <span class="year">(2024)</span>
+    <span class="rating">7.9</span>
+    <span class="director">导演：贾玲</span>
+</div>
+'''
+
+name_pattern = r'<h2 class="name">《([^》]+)》</h2>'
+year_pattern = r'<span class="year">\((\d{4})\)</span>'
+rating_pattern = r'<span class="rating">([^<]+)</span>'
+director_pattern = r'导演：([^<]+)'
+
+names = re.findall(name_pattern, html)
+years = re.findall(year_pattern, html)
+ratings = re.findall(rating_pattern, html)
+directors = re.findall(director_pattern, html)
+
+for i in range(len(names)):
+    print(f"{names[i]} | {years[i]} | 评分：{ratings[i]} | {directors[i]}")
+
+
+
+
+# 练习 3：日志分析
+import re
+
+log = '''
+192.168.1.100 - - [15/Mar/2024:10:15:30 +0800] "GET /index.html HTTP/1.1" 200 1234
+10.0.0.50 - - [15/Mar/2024:10:15:31 +0800] "POST /api/login HTTP/1.1" 200 256
+192.168.1.101 - - [15/Mar/2024:10:15:32 +0800] "GET /notfound.html HTTP/1.1" 404 512
+172.16.0.200 - - [15/Mar/2024:10:15:33 +0800] "GET /images/logo.png HTTP/1.1" 200 4096
+'''
+
+pattern = r'(\d+\.\d+\.\d+\.\d+).*?\[([^\]]+)\].*?" (\d{3}) \d+'
+
+for match in re.finditer(pattern, log):
+    ip, time, status = match.groups()
+    print(f'IP: {ip:15} | 时间: {time:25} | 状态: {status}')
+
+
+
+
+# 练习 4：电话号码脱敏
+import re
+
+phone_book = '''
+张三：138-1234-5678
+李四：139-5678-1234
+王五：138-0000-1111
+'''
+
+
+pattern = r'(\d{3})-(\d{4})-(\d{4})'
+
+def mask_phone(match):
+    return f'{match.group(1)}-****-{match.group(3)}'
+
+masked = re.sub(pattern, mask_phone, phone_book)
+print(masked)
+
+
+
+
+# 练习 5：综合挑战
+import re
+
+html = '''
+<table class="schedule">
+    <tr><th>时间</th><th>课程</th><th>教室</th></tr>
+    <tr><td>周一 1-2节</td><td>Python程序设计</td><td>A101</td></tr>
+    <tr><td>周一 3-4节</td><td>数据结构</td><td>B205</td></tr>
+    <tr><td>周二 1-2节</td><td>高等数学</td><td>C301</td></tr>
+    <tr><td>周三 5-6节</td><td>Python程序设计</td><td>A102</td></tr>
+</table>
+'''
+
+time_pattern = r'<td>([^<]+)</td><td>([^<]+)</td><td>([^<]+)</td>'
+courses = re.findall(time_pattern, html)
+
+print('课程表：')
+for time, course, room in courses:
+    print(f'{time} | {course} | {room}')