#练习 1:提取天气预报 text = ''' 2024-03-15 天气:晴 温度:15-25°C 2024-03-16 天气:多云 温度:12-20°C 2024-03-17 天气:小雨 温度:10-18°C ''' import re pattern = r'(\d{4}-\d{2}-\d{2})\s*天气:([^ ]+)\s*温度:(\d+)-(\d+)°C' matches = re.findall(pattern, text) for match in matches: date, weather, low, high = match print(f'{date}: {weather}, {low}°C-{high}°C') # 练习 2:爬取豆瓣电影信息 import re html = '''

《流浪地球》

(2024) 8.5 导演:郭帆

《你好,李焕英》

(2024) 7.9 导演:贾玲
''' name_pattern = r'

《([^》]+)》

' year_pattern = r'\((\d{4})\)' rating_pattern = r'([^<]+)' director_pattern = r'导演:([^<]+)' names = re.findall(name_pattern, html) years = re.findall(year_pattern, html) ratings = re.findall(rating_pattern, html) directors = re.findall(director_pattern, html) for i in range(len(names)): print(f"{names[i]} | {years[i]} | 评分:{ratings[i]} | {directors[i]}") # 练习 3:日志分析 import re log = ''' 192.168.1.100 - - [15/Mar/2024:10:15:30 +0800] "GET /index.html HTTP/1.1" 200 1234 10.0.0.50 - - [15/Mar/2024:10:15:31 +0800] "POST /api/login HTTP/1.1" 200 256 192.168.1.101 - - [15/Mar/2024:10:15:32 +0800] "GET /notfound.html HTTP/1.1" 404 512 172.16.0.200 - - [15/Mar/2024:10:15:33 +0800] "GET /images/logo.png HTTP/1.1" 200 4096 ''' pattern = r'(\d+\.\d+\.\d+\.\d+).*?\[([^\]]+)\].*?" (\d{3}) \d+' for match in re.finditer(pattern, log): ip, time, status = match.groups() print(f'IP: {ip:15} | 时间: {time:25} | 状态: {status}') # 练习 4:电话号码脱敏 import re phone_book = ''' 张三:138-1234-5678 李四:139-5678-1234 王五:138-0000-1111 ''' pattern = r'(\d{3})-(\d{4})-(\d{4})' def mask_phone(match): return f'{match.group(1)}-****-{match.group(3)}' masked = re.sub(pattern, mask_phone, phone_book) print(masked) # 练习 5:综合挑战 import re html = '''
时间课程教室
周一 1-2节Python程序设计A101
周一 3-4节数据结构B205
周二 1-2节高等数学C301
周三 5-6节Python程序设计A102
''' time_pattern = r'([^<]+)([^<]+)([^<]+)' courses = re.findall(time_pattern, html) print('课程表:') for time, course, room in courses: print(f'{time} | {course} | {room}')