Max Unpooling LayerΒΆ

Unfortunately, as some inputs are being dropped in the pooling layer (because only the maximum value will be kept), we cannot fully inverse the max-pooling operation. However, if we have the position of each maximum value when performing max pooling, then we can simply put the maximum value back to its original position. After putting them back, we can set values at other positions to be \(0\).

As in the pooling section, we have an input matrix \(X=\left[ {\begin{array}{*{20}c} 1 & 2 & 3 \\4 & 5 & 6 \\7 & 8 & 9 \end{array} } \right]\) and the corresponding output \(P=\left[ {\begin{array}{*{20}c} 5 & 6 \\8 & 9 \end{array} } \right]\). Besides these, we will know the indices of the maximum value in each region, which are \((1,1)\), \((1,2)\), \((2,1)\) and \((2,2)\).

In the unpooling process, we first put the maximum values back to its position, and fill other positions with \(0\). We will get the output as \(\left[ {\begin{array}{*{20}c} 0 & 0 & 0 \\0 & 5 & 6 \\ 0 & 8 & 9 \end{array} } \right]\)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import math
from .base import Layer
from .convolution import im2col_indices, get_im2col_indices
from tinynet.core import Backend as np


def col2im_no_dup(cols, x_shape, field_height=3, field_width=3, padding=1,
                  stride=1):
    '''
    Similar function for col2im_indices, but will not perform +=.
    This function is used for 
    '''
    N, C, H, W = x_shape
    H_padded, W_padded = H + 2 * padding, W + 2 * padding
    x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
    k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding,
                                 stride)
    cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
    cols_reshaped = cols_reshaped.transpose(2, 0, 1)
    x_padded[:, k, i, j] = cols_reshaped
    if padding == 0:
        return x_padded
    return x_padded[:, :, padding:-padding, padding:-padding]


class MaxUnpool2D(Layer):
    def __init__(self, name, input_dim, size, stride):
        super().__init__(name)
        self.type = 'MaxUnpool2D'
        self.input_channel, self.input_height, self.input_width = input_dim
        self.size = size
        self.stride = stride
        self.out_height = (self.input_height - 1) * stride + size[0]
        self.out_width = (self.input_width - 1) * stride + size[1]
        # it is definitely integer, so we do not need to check it anymore
        self.out_dim = (self.input_channel, self.out_height, self.out_width)

    def forward(self, input, max_indices):
        self.num_of_entries = input.shape[0]
        output_shape = (self.num_of_entries,
                        self.out_dim[0], self.out_dim[1], self.out_dim[2])
        indices = max_indices.reshape(input.shape)
        unpooled = np.zeros(output_shape)
        for i in range(self.num_of_entries):
            for j in range(self.input_channel):
                for m in range(self.input_height):
                    for n in range(self.input_width):
                        index = indices[i, j, m, n]
                        w_index = index %  self.size[0]
                        h_index = index // self.size[1]
                        unpooled[i, j,  m*self.stride+h_index, n*self.stride +
                                 w_index] = input[i, j, m, n]
        return unpooled

    def backward(self, in_gradient):
        '''
        This function is not needed in computation, at least right now.
        '''
        pass
    
    def __call__(self, input, max_indices):
        return self.forward(input, max_indices)