提交动手练习代码
This commit is contained in:
112
260403-2509165039.py
Normal file
112
260403-2509165039.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
#练习 1:提取天气预报
|
||||||
|
text = '''
|
||||||
|
2024-03-15 天气:晴 温度:15-25°C
|
||||||
|
2024-03-16 天气:多云 温度:12-20°C
|
||||||
|
2024-03-17 天气:小雨 温度:10-18°C
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
pattern = r'(\d{4}-\d{2}-\d{2})\s*天气:([^ ]+)\s*温度:(\d+)-(\d+)°C'
|
||||||
|
matches = re.findall(pattern, text)
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
date, weather, low, high = match
|
||||||
|
print(f'{date}: {weather}, {low}°C-{high}°C')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 练习 2:爬取豆瓣电影信息
|
||||||
|
import re
|
||||||
|
|
||||||
|
html = '''
|
||||||
|
<div class="movie">
|
||||||
|
<h2 class="name">《流浪地球》</h2>
|
||||||
|
<span class="year">(2024)</span>
|
||||||
|
<span class="rating">8.5</span>
|
||||||
|
<span class="director">导演:郭帆</span>
|
||||||
|
</div>
|
||||||
|
<div class="movie">
|
||||||
|
<h2 class="name">《你好,李焕英》</h2>
|
||||||
|
<span class="year">(2024)</span>
|
||||||
|
<span class="rating">7.9</span>
|
||||||
|
<span class="director">导演:贾玲</span>
|
||||||
|
</div>
|
||||||
|
'''
|
||||||
|
|
||||||
|
name_pattern = r'<h2 class="name">《([^》]+)》</h2>'
|
||||||
|
year_pattern = r'<span class="year">\((\d{4})\)</span>'
|
||||||
|
rating_pattern = r'<span class="rating">([^<]+)</span>'
|
||||||
|
director_pattern = r'导演:([^<]+)'
|
||||||
|
|
||||||
|
names = re.findall(name_pattern, html)
|
||||||
|
years = re.findall(year_pattern, html)
|
||||||
|
ratings = re.findall(rating_pattern, html)
|
||||||
|
directors = re.findall(director_pattern, html)
|
||||||
|
|
||||||
|
for i in range(len(names)):
|
||||||
|
print(f"{names[i]} | {years[i]} | 评分:{ratings[i]} | {directors[i]}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 练习 3:日志分析
|
||||||
|
import re
|
||||||
|
|
||||||
|
log = '''
|
||||||
|
192.168.1.100 - - [15/Mar/2024:10:15:30 +0800] "GET /index.html HTTP/1.1" 200 1234
|
||||||
|
10.0.0.50 - - [15/Mar/2024:10:15:31 +0800] "POST /api/login HTTP/1.1" 200 256
|
||||||
|
192.168.1.101 - - [15/Mar/2024:10:15:32 +0800] "GET /notfound.html HTTP/1.1" 404 512
|
||||||
|
172.16.0.200 - - [15/Mar/2024:10:15:33 +0800] "GET /images/logo.png HTTP/1.1" 200 4096
|
||||||
|
'''
|
||||||
|
|
||||||
|
pattern = r'(\d+\.\d+\.\d+\.\d+).*?\[([^\]]+)\].*?" (\d{3}) \d+'
|
||||||
|
|
||||||
|
for match in re.finditer(pattern, log):
|
||||||
|
ip, time, status = match.groups()
|
||||||
|
print(f'IP: {ip:15} | 时间: {time:25} | 状态: {status}')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 练习 4:电话号码脱敏
|
||||||
|
import re
|
||||||
|
|
||||||
|
phone_book = '''
|
||||||
|
张三:138-1234-5678
|
||||||
|
李四:139-5678-1234
|
||||||
|
王五:138-0000-1111
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
pattern = r'(\d{3})-(\d{4})-(\d{4})'
|
||||||
|
|
||||||
|
def mask_phone(match):
|
||||||
|
return f'{match.group(1)}-****-{match.group(3)}'
|
||||||
|
|
||||||
|
masked = re.sub(pattern, mask_phone, phone_book)
|
||||||
|
print(masked)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 练习 5:综合挑战
|
||||||
|
import re
|
||||||
|
|
||||||
|
html = '''
|
||||||
|
<table class="schedule">
|
||||||
|
<tr><th>时间</th><th>课程</th><th>教室</th></tr>
|
||||||
|
<tr><td>周一 1-2节</td><td>Python程序设计</td><td>A101</td></tr>
|
||||||
|
<tr><td>周一 3-4节</td><td>数据结构</td><td>B205</td></tr>
|
||||||
|
<tr><td>周二 1-2节</td><td>高等数学</td><td>C301</td></tr>
|
||||||
|
<tr><td>周三 5-6节</td><td>Python程序设计</td><td>A102</td></tr>
|
||||||
|
</table>
|
||||||
|
'''
|
||||||
|
|
||||||
|
time_pattern = r'<td>([^<]+)</td><td>([^<]+)</td><td>([^<]+)</td>'
|
||||||
|
courses = re.findall(time_pattern, html)
|
||||||
|
|
||||||
|
print('课程表:')
|
||||||
|
for time, course, room in courses:
|
||||||
|
print(f'{time} | {course} | {room}')
|
||||||
Reference in New Issue
Block a user