Files
task-3-3-2-MLP/digit_mlp_class/visualize.py
2026-05-21 15:08:03 +08:00

350 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
可视化工具 - 展示神经网络各层的输出
用于课堂教学,让学生直观理解:
1. 输入图像长什么样
2. 第一层隐藏层学到了什么特征
3. 各层激活值的变化
使用方法:
python visualize.py # 可视化测试集前5张
python visualize.py --single # 可视化单张图片
"""
import numpy as np
from PIL import Image
import os
import sys
import matplotlib
matplotlib.use('Agg') # 无头模式,不显示图形
import matplotlib.pyplot as plt
def visualize_input_image(img_vector, save_path='visualizations/input.png'):
"""把784维向量还原成28x28图像并保存"""
img = img_vector.reshape(28, 28) * 255
img = img.astype(np.uint8)
Image.fromarray(img).save(save_path)
return save_path
def visualize_activations(model, img_vector, save_dir='visualizations'):
"""
可视化网络各层的激活值
"""
os.makedirs(save_dir, exist_ok=True)
# 前向传播获取各层激活值
model.forward(img_vector.reshape(1, -1))
# 1. 保存输入图像
visualize_input_image(img_vector, os.path.join(save_dir, '01_input.png'))
# 2. 可视化第一层激活(隐藏层)
hidden_activations = model.a1[0] # (128,)
visualize_hidden_layer(hidden_activations, os.path.join(save_dir, '02_hidden.png'))
# 3. 可视化输出层概率
output_probs = model.probs[0] # (10,)
visualize_output_prob(output_probs, os.path.join(save_dir, '03_output_prob.png'))
# 4. 生成汇总图
create_summary_image(img_vector, hidden_activations, output_probs, save_dir)
return save_dir
def visualize_hidden_layer(activations, save_path):
"""
可视化隐藏层激活值
把128个神经元的激活值排成8x16网格显示
"""
grid_cols = 16
grid_rows = 8
cell_size = 24
img_h = grid_rows * cell_size
img_w = grid_cols * cell_size
grid = np.ones((img_h, img_w)) * 255
for i, act in enumerate(activations):
row = i // grid_cols
col = i % grid_cols
intensity = max(0, min(1, act * 2))
color = int(255 * (1 - intensity * 0.7))
grid[row*cell_size:(row+1)*cell_size-1, col*cell_size:(col+1)*cell_size-1] = color
Image.fromarray(grid.astype(np.uint8)).save(save_path)
def visualize_output_prob(probs, save_path):
"""可视化输出层概率分布"""
fig, ax = plt.subplots(figsize=(8, 4))
digits = list(range(10))
colors = ['#3498db' if i != np.argmax(probs) else '#e74c3c' for i in digits]
bars = ax.bar(digits, probs, color=colors)
ax.set_xlabel('数字', fontsize=12)
ax.set_ylabel('概率', fontsize=12)
ax.set_title('输出层:各数字的预测概率', fontsize=14)
ax.set_xticks(digits)
ax.set_ylim(0, 1)
max_idx = np.argmax(probs)
ax.annotate(f'{probs[max_idx]:.1%}',
xy=(max_idx, probs[max_idx]),
ha='center', va='bottom', fontsize=10, color='#e74c3c', fontweight='bold')
plt.tight_layout()
plt.savefig(save_path, dpi=100, bbox_inches='tight')
plt.close()
def create_summary_image(img_vector, hidden_activations, output_probs, save_dir):
"""创建汇总图"""
fig = plt.figure(figsize=(14, 6))
# 1. 输入图像
ax1 = fig.add_subplot(2, 4, 1)
ax1.imshow(img_vector.reshape(28, 28), cmap='gray')
ax1.set_title('(1) Input Image\n(28x28 pixels)', fontsize=11)
ax1.axis('off')
# 2. 像素值分布
ax2 = fig.add_subplot(2, 4, 2)
ax2.hist(img_vector, bins=30, color='#3498db', alpha=0.7, edgecolor='white')
ax2.set_title('(2) Pixel Value Distribution\n(normalized 0~1)', fontsize=11)
ax2.set_xlabel('像素值')
ax2.set_ylabel('频数')
# 3. 隐藏层激活(热力图)
ax3 = fig.add_subplot(2, 4, 3)
# 128 = 8 × 16
act_2d = hidden_activations.reshape(8, 16)
im = ax3.imshow(act_2d, cmap='Blues', aspect='auto')
ax3.set_title(f'(3) Hidden Layer\n(128 neurons)', fontsize=11)
ax3.axis('off')
plt.colorbar(im, ax=ax3, shrink=0.6)
# 4. 隐藏层激活(条形图)
ax4 = fig.add_subplot(2, 4, 4)
ax4.bar(range(len(hidden_activations)), hidden_activations, color='#3498db', alpha=0.7)
ax4.set_title('(4) Neuron Activations', fontsize=11)
ax4.set_xlabel('神经元编号')
ax4.set_ylabel('激活强度')
# 5. 输出概率
ax5 = fig.add_subplot(2, 4, 5)
digits = list(range(10))
colors = ['#3498db' if i != np.argmax(output_probs) else '#e74c3c' for i in digits]
ax5.bar(digits, output_probs, color=colors)
ax5.set_title('(5) Output Probabilities', fontsize=11)
ax5.set_xlabel('数字')
ax5.set_ylabel('概率')
ax5.set_ylim(0, 1)
ax5.set_xticks(digits)
# 6. 最大概率
ax6 = fig.add_subplot(2, 4, 6)
ax6.axis('off')
predicted = np.argmax(output_probs)
confidence = output_probs[predicted]
result_text = f'预测: {predicted}\n置信度: {confidence:.1%}'
ax6.text(0.5, 0.5, result_text, fontsize=24, ha='center', va='center',
bbox=dict(boxstyle='round', facecolor='#2ecc71', alpha=0.9),
transform=ax6.transAxes, color='white', fontweight='bold')
ax6.set_title('(6) Recognition Result', fontsize=11)
# 7. 网络结构
ax7 = fig.add_subplot(2, 4, 7)
ax7.axis('off')
structure_text = (
'┌─────────────────┐\n'
'│ 输入层 784 │\n'
'│ (28×28展平) │\n'
'└────────┬────────┘\n'
'\n'
' 线性变换+ReLU\n'
'\n'
'┌────────┴────────┐\n'
'│ 隐藏层 128 │\n'
'│ (特征提取) │\n'
'└────────┬────────┘\n'
'\n'
' 线性变换+Softmax\n'
'\n'
'┌────────┴────────┐\n'
'│ 输出层 10 │\n'
'│ (数字0~9概率) │\n'
'└─────────────────┘'
)
ax7.text(0.1, 0.95, structure_text, fontsize=9, va='top',
family='monospace', transform=ax7.transAxes,
bbox=dict(boxstyle='round', facecolor='#f8f9fa', alpha=0.9))
ax7.set_title('(7) Network Structure', fontsize=11)
# 8. 参数量说明
ax8 = fig.add_subplot(2, 4, 8)
ax8.axis('off')
params_text = (
'MLP 参数量计算:\n\n'
'W1: 784 × 128 = 100,352\n'
'b1: 128\n\n'
'W2: 128 × 10 = 1,280\n'
'b2: 10\n\n'
'─────────────────\n'
'总计: 101,770 参数\n\n'
'全部用 NumPy 实现\n'
'无需任何深度学习框架!'
)
ax8.text(0.1, 0.95, params_text, fontsize=10, va='top',
family='monospace', transform=ax8.transAxes)
ax8.set_title('(8) Parameters', fontsize=11)
plt.suptitle('MLP Feature Maps Visualization - Handwritten Digits', fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(os.path.join(save_dir, 'summary.png'), dpi=120, bbox_inches='tight')
plt.close()
def load_model_or_train():
"""加载已训练的模型,如果没有则训练一个"""
import glob
model_files = glob.glob('mnist_mlp_*.npy')
if model_files:
timestamps = sorted(set(
f.replace('mnist_mlp_', '').replace('_W1.npy', '')
for f in model_files if '_W1.npy' in f
))
if timestamps:
model_path = 'mnist_mlp_' + timestamps[-1]
print(f"加载模型: {model_path}")
from model_numpy import MLP
model = MLP(input_size=784, hidden_size=128, num_classes=10, learning_rate=0.1)
model.W1 = np.load(f'{model_path}_W1.npy')
model.b1 = np.load(f'{model_path}_b1.npy')
model.W2 = np.load(f'{model_path}_W2.npy')
model.b2 = np.load(f'{model_path}_b2.npy')
return model
# 没有模型,用sklearn快速训练一个用于演示
print("未找到已训练模型,使用sklearn数据快速训练演示模型...")
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
X = mnist.data[:10000].astype(np.float32) / 255.0
y = mnist.target[:10000].astype(int)
# 用sklearn的MLP代替
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(
hidden_layer_sizes=(128,),
max_iter=20,
alpha=1e-4,
solver='sgd',
learning_rate_init=0.1,
random_state=42,
verbose=False
)
model.fit(X, y)
print("sklearn模型训练完成(仅用于可视化演示)")
return model
def main():
"""主函数"""
os.makedirs('visualizations', exist_ok=True)
# 加载数据
from dataset import load_data
print("加载MNIST数据集...")
X_train, y_train, X_test, y_test = load_data()
# 加载模型
model = load_model_or_train()
# 获取真实标签
if len(y_test.shape) > 1:
y_test_labels = np.argmax(y_test, axis=1)
else:
y_test_labels = y_test
# 可视化测试集前5张
print("\n可视化测试集前5张图片...")
for i in range(5):
img = X_test[i]
true_label = y_test_labels[i]
sub_dir = f'visualizations/sample_{i}_true{true_label}'
os.makedirs(sub_dir, exist_ok=True)
if hasattr(model, 'predict_proba'):
probs = model.predict_proba(img.reshape(1, -1))[0]
predicted = np.argmax(probs)
else:
predicted = model.predict(img.reshape(1, -1))[0]
print(f" 样本{i}: 真实={true_label}, 预测={predicted}")
visualize_activations(model, img, sub_dir)
# 创建对比汇总
create_comparison_summary(X_test[:5], y_test_labels[:5], model, 'visualizations')
print("\n✅ 可视化完成!")
print(" 查看 visualizations/ 目录下的图片和汇总图")
def create_comparison_summary(X_samples, y_true, model, save_dir):
"""创建多个样本的对比汇总图"""
n_samples = len(X_samples)
fig = plt.figure(figsize=(4 * n_samples, 8))
for i in range(n_samples):
img = X_samples[i]
true_label = y_true[i]
if hasattr(model, 'predict_proba'):
probs = model.predict_proba(img.reshape(1, -1))[0]
predicted = np.argmax(probs)
else:
predicted = model.predict(img.reshape(1, -1))[0]
# 输入图像
ax = fig.add_subplot(3, n_samples, i + 1)
ax.imshow(img.reshape(28, 28), cmap='gray')
ax.set_title(f'真实: {true_label}', fontsize=12)
ax.axis('off')
# 隐藏层激活
ax = fig.add_subplot(3, n_samples, i + 1 + n_samples)
model.forward(img.reshape(1, -1))
# 128 = 8 × 16
hidden = model.a1[0].reshape(8, 16)
ax.imshow(hidden, cmap='Blues', aspect='auto')
ax.set_title(f'隐藏层激活', fontsize=10)
ax.axis('off')
# 输出概率
ax = fig.add_subplot(3, n_samples, i + 1 + 2*n_samples)
digits = list(range(10))
colors = ['#e74c3c' if d == predicted else '#3498db' for d in digits]
ax.bar(digits, probs, color=colors)
ax.set_title(f'预测: {predicted}', fontsize=12)
ax.set_ylim(0, 1)
ax.set_xticks(digits)
ax.tick_params(labelsize=8)
plt.suptitle('多样本特征图对比', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig(os.path.join(save_dir, 'comparison.png'), dpi=120, bbox_inches='tight')
plt.close()
if __name__ == '__main__':
main()