#练习 1:提取天气预报
text = '''
2024-03-15 天气:晴 温度:15-25°C
2024-03-16 天气:多云 温度:12-20°C
2024-03-17 天气:小雨 温度:10-18°C
'''
import re
pattern = r'(\d{4}-\d{2}-\d{2})\s*天气:([^ ]+)\s*温度:(\d+)-(\d+)°C'
matches = re.findall(pattern, text)
for match in matches:
date, weather, low, high = match
print(f'{date}: {weather}, {low}°C-{high}°C')
# 练习 2:爬取豆瓣电影信息
import re
html = '''
《流浪地球》
(2024)
8.5
导演:郭帆
《你好,李焕英》
(2024)
7.9
导演:贾玲
'''
name_pattern = r'《([^》]+)》
'
year_pattern = r'\((\d{4})\)'
rating_pattern = r'([^<]+)'
director_pattern = r'导演:([^<]+)'
names = re.findall(name_pattern, html)
years = re.findall(year_pattern, html)
ratings = re.findall(rating_pattern, html)
directors = re.findall(director_pattern, html)
for i in range(len(names)):
print(f"{names[i]} | {years[i]} | 评分:{ratings[i]} | {directors[i]}")
# 练习 3:日志分析
import re
log = '''
192.168.1.100 - - [15/Mar/2024:10:15:30 +0800] "GET /index.html HTTP/1.1" 200 1234
10.0.0.50 - - [15/Mar/2024:10:15:31 +0800] "POST /api/login HTTP/1.1" 200 256
192.168.1.101 - - [15/Mar/2024:10:15:32 +0800] "GET /notfound.html HTTP/1.1" 404 512
172.16.0.200 - - [15/Mar/2024:10:15:33 +0800] "GET /images/logo.png HTTP/1.1" 200 4096
'''
pattern = r'(\d+\.\d+\.\d+\.\d+).*?\[([^\]]+)\].*?" (\d{3}) \d+'
for match in re.finditer(pattern, log):
ip, time, status = match.groups()
print(f'IP: {ip:15} | 时间: {time:25} | 状态: {status}')
# 练习 4:电话号码脱敏
import re
phone_book = '''
张三:138-1234-5678
李四:139-5678-1234
王五:138-0000-1111
'''
pattern = r'(\d{3})-(\d{4})-(\d{4})'
def mask_phone(match):
return f'{match.group(1)}-****-{match.group(3)}'
masked = re.sub(pattern, mask_phone, phone_book)
print(masked)
# 练习 5:综合挑战
import re
html = '''
| 时间 | 课程 | 教室 |
| 周一 1-2节 | Python程序设计 | A101 |
| 周一 3-4节 | 数据结构 | B205 |
| 周二 1-2节 | 高等数学 | C301 |
| 周三 5-6节 | Python程序设计 | A102 |
'''
time_pattern = r'([^<]+) | ([^<]+) | ([^<]+) | '
courses = re.findall(time_pattern, html)
print('课程表:')
for time, course, room in courses:
print(f'{time} | {course} | {room}')