卷积神经网络的可视化

可视化卷积神经网络的中间输出（中间激活）
可视化卷积神经网络的过滤器
可视化图像中类激活的热力图

0x00 准备

#为中间激活做准备
from keras.models import load_model
model = load_model('cats_and_dogs_small_2.h5')
model.summary()

img_path = 'E:\\BaiduNetdiskDownload\\kaggle\\train\\cat.1700.jpg'

#预处理单张图像
from keras.preprocessing import image
import numpy as np

img = image.load_img(img_path, target_size=(150, 150))
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)#在0轴上添加此数据
img_tensor /= 255.
#其形状为（1,150,150,3）
print(img_tensor.shape)

#显示测试图像
import matplotlib.pyplot as plt

plt.imshow(img_tensor[0])
plt.show()

0x01 可视化中间激活

#用一个输入张量和一个输出张量列表将模型实例化
#Keras的Model类，模型实例化需要两个参数，输入张量（列表），输出张量（列表）
from keras import models

layer_outputs = [layer.output for layer in model.layers[:8]]
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)

#以预测模式运行模型
activations = activation_model.predict(img_tensor)
#提取第一层
first_layer_activation = activations[0]
print(first_layer_activation.shape)

#将原始模型第一层激活的第四个通道可视化
%matplotlib inline
plt.matshow(first_layer_activation[0, :, :, 4], cmap='viridis')#cmap指定显示的颜色模式,viridis适合鲜绿色圆点
print(first_layer_activation[0, :, :, 4].shape)
#将原始模型第一层激活的第七个通道再可视化
plt.matshow(first_layer_activation[0, :, :, 7], cmap='viridis')

#将每个中间激活的所有通道可视化
import keras

# These are the names of the layers, so can have them as part of our plot
#层名
#遍历每层特征图，将特征数按照规定的宽度划分成个矩阵，取size建立对应的包含这层所有特征的显示大图
#显示大图和层数相同且一一对应
layer_names = []
for layer in model.layers[:8]:
    layer_names.append(layer.name)

images_per_row = 16

# Now let's display our feature maps
for layer_name, layer_activation in zip(layer_names, activations):#遍历每层，显示特征图
    # This is the number of features in the feature map
    n_features = layer_activation.shape[-1]#特征图中特征个数
    #特征图的形状是（1，size，size，n_features）
    # The feature map has shape (1, size, size, n_features)
    size = layer_activation.shape[1]

    # We will tile the activation channels in this matrix
    n_cols = n_features // images_per_row#在这个矩阵中将激活通道平铺，‘//’是非精确除法，是截断除法
    display_grid = np.zeros((size * n_cols, images_per_row * size))

    # We'll tile each filter into this big horizontal grid
    for col in range(n_cols):
        for row in range(images_per_row):
            channel_image = layer_activation[0,
                                             :, :,
                                             col * images_per_row + row]
            # Post-process the feature to make it visually palatable
            channel_image -= channel_image.mean()
            channel_image /= channel_image.std())#空参数应该是全局标准差
            channel_image *= 64
            channel_image += 128
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')#  限制元素在0-255之间
            display_grid[col * size : (col + 1) * size,
                         row * size : (row + 1) * size] = channel_image

    # Display the grid
    scale = 1. / size
    plt.figure(figsize=(scale * display_grid.shape[1],
                        scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.grid(False)# 关闭背景的网格线 
    plt.imshow(display_grid, aspect='auto', cmap='viridis')

plt.show()

0x02 可视化神经网络的过滤器

定义张量损失：输出的指定特征通道的均值
定义梯度：输入对于损失下降
梯度标准化
通过随机梯度下降让损失最大化

#定义将张量转化为有效图像的实用函数
def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    x = np.clip(x, 0, 255).astype('uint8')
    return x

#定义过滤器可视化的函数
def generate_pattern(layer_name, filter_index, size=150):
    # Build a loss function that maximizes the activation
    # of the nth filter of the layer considered.
    layer_output = model.get_layer(layer_name).output
    loss = K.mean(layer_output[:, :, :, filter_index])

    # Compute the gradient of the input picture wrt this loss
    grads = K.gradients(loss, model.input)[0]

    # Normalization trick: we normalize the gradient
    grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)

    # This function returns the loss and grads given the input picture
    iterate = K.function([model.input], [loss, grads])

    # We start from a gray image with some noise
    input_img_data = np.random.random((1, size, size, 3)) * 20 + 128.

    # Run gradient ascent for 40 steps
    step = 1.
    for i in range(40):
        loss_value, grads_value = iterate([input_img_data])
        input_img_data += grads_value * step

    img = input_img_data[0]
    return deprocess_image(img)

#使用
plt.imshow(generate_pattern('block5_conv3', 5))
plt.show()

#生成某一层中所有过滤器响应模式组成的网路
for layer_name in ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1']:
    size = 64
    margin = 5

    # This a empty (black) image where we will store our results.
    results = np.zeros((8 * size + 7 * margin, 8 * size + 7 * margin, 3))

    for i in range(8):  # iterate over the rows of our results grid
        for j in range(8):  # iterate over the columns of our results grid
            # Generate the pattern for filter `i + (j * 8)` in `layer_name`
            filter_img = generate_pattern(layer_name, i + (j * 8), size=size)

            # Put the result in the square `(i, j)` of the results grid
            horizontal_start = i * size + i * margin
            horizontal_end = horizontal_start + size
            vertical_start = j * size + j * margin
            vertical_end = vertical_start + size
            results[horizontal_start: horizontal_end, vertical_start: vertical_end, :] = filter_img

    # Display the results grid
    plt.figure(figsize=(20, 20))
    plt.imshow(results)
    plt.show()

类激活的热力图

#图像准备
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import numpy as np

# The local path to our target image
img_path = 'D:\\Jupyter\\elephant.jpg'

# `img` is a PIL image of size 224x224
img = image.load_img(img_path, target_size=(224, 224))

# `x` is a float32 Numpy array of shape (224, 224, 3)
x = image.img_to_array(img)

# We add a dimension to transform our array into a "batch"
# of size (1, 224, 224, 3)
x = np.expand_dims(x, axis=0)

# Finally we preprocess the batch
# (this does channel-wise color normalization)
x = preprocess_input(x)


#加载VGG16模型预处理
from keras.applications.vgg16 import VGG16

model = VGG16(weights='imagenet')

preds = model.predict(x)
print('Predicted:', decode_predictions(preds, top=3)[0])
#Predicted: [('n02504013', 'Indian_elephant', 0.59529865), ('n01871265', 'tusker', 0.26843295), ('n02504458', 'African_elephant', 0.094206899)]

#应用Grad-CAM算法，算热力图
np.argmax(preds[0])
#385

from keras import backend as K
import numpy as np
#Grad-CAM算法
# This is the "african elephant" entry in the prediction vector
african_elephant_output = model.output[:, 386]

# The is the output feature map of the `block5_conv3` layer,
# the last convolutional layer in VGG16
last_conv_layer = model.get_layer('block5_conv3')

# This is the gradient of the "african elephant" class with regard to
# the output feature map of `block5_conv3`
grads = K.gradients(african_elephant_output, last_conv_layer.output)[0]

# This is a vector of shape (512,), where each entry
# is the mean intensity of the gradient over a specific feature map channel
pooled_grads = K.mean(grads, axis=(0, 1, 2))

# This function allows us to access the values of the quantities we just defined:
# `pooled_grads` and the output feature map of `block5_conv3`,
# given a sample image
iterate = K.function([model.input], [pooled_grads, last_conv_layer.output[0]])

# These are the values of these two quantities, as Numpy arrays,
# given our sample image of two elephants
pooled_grads_value, conv_layer_output_value = iterate([x])

# We multiply each channel in the feature map array
# by "how important this channel is" with regard to the elephant class
for i in range(512):
    conv_layer_output_value[:, :, i] *= pooled_grads_value[i]

# The channel-wise mean of the resulting feature map
# is our heatmap of class activation
heatmap = np.mean(conv_layer_output_value, axis=-1)

#热力图后处理
import matplotlib.pyplot as plt
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
plt.matshow(heatmap)
plt.show()

#热力图与原始图相叠加
import cv2

# We use cv2 to load the original image
img = cv2.imread(img_path)

# We resize the heatmap to have the same size as the original image
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))

# We convert the heatmap to RGB
heatmap = np.uint8(255 * heatmap)

# We apply the heatmap to the original image
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)

# 0.4 here is a heatmap intensity factor
superimposed_img = heatmap * 0.4 + img

# Save the image to disk
cv2.imwrite('D:\\Jupyter\\elephant_hot.jpg', superimposed_img)

个人理解

取卷积核的输出对最终特定的特征向量的梯度向量，与二维图每点的每个特征通道对应相乘，再对图的第三维上做均值运算，得出热力图。

具体参见