全卷积模型设计计算

是否每次想设置全卷积模型而苦恼于kernal_size和stride的设计?

下面这份代码可以帮助你,寻找合适的方案

import torch
import numpy as np

class Layer:

    def __init__(self, kernel_size=0, stride=0, in_channel=-1, out_channel=-1):
        self.kernel_size = kernel_size
        self.stride = stride
        self.in_channel = in_channel
        self.out_channel = out_channel

    def __str__(self):
        # 如果没有in_channel,说明是maxpooling层,那么只输出kernel_size和stride
        if self.in_channel == -1:
            return "%d,%d" % (self.kernel_size, self.stride)
        return "%d,%d,%d,%d" % (self.in_channel, self.out_channel, self.kernel_size, self.stride)



def calc_output_size(n, p, f, s):
    return int((n+2*p-f)/s+1)

def imperfect_build_moudel(block_config=[], image_size=4, output_size=1, min_kernel_size=2, max_kernel_size=10, min_stride=1, max_stride=5):
    '''
    assume every block is looks like follows
    self.conv = nn.Sequential(
                    nn.Conv2d(in_channel, out_channel, kernel_size=(9, 9), stride=(2, 2)),
                    nn.BatchNorm2d(out_channel),
                    nn.MaxPool2d(kernel_size=3, stride=2),
                    nn.ReLU(inplace=True),
                )
    此函数容易找到解,但是每一层的kernel_size和stride的设置并不保证不丢失特征
    比如有可能在featrue_size=4的时候使用kernel_size=3,stride=2使得输出的feature_size=1
    此函数会输出符合要求的最小的kernel_size和stride
    :param config: 期望的每个block的channel
    :param image_size: 输入的图片大小
    :param output_size: 期望的输出的特征大小
    :param min_kernel_size: 期望的最小的kernel_size
    :param max_kernel_size: 期望的最大的kernel_size
    :param min_stride: 期望的最小的stride
    :param max_stride: 期望的最大的stride
    :return: 符合要求的一个layer配置解
    '''

    mmap_size = image_size
    layer_number = (len(block_config)-1)*2
    mmap = np.zeros((layer_number+1, mmap_size+1)).astype(int)
    mmap[0][image_size] = 1
    pre = np.zeros((layer_number + 1, mmap_size + 1)).astype(int)
    layers = [[] for i in range(layer_number+1)]
    for i in range(len(layers)):
        for j in range(mmap_size+1):
            layers[i].append(Layer())

    for i in range(layer_number):
        for j in range(mmap_size+1):
            if mmap[i][j] == 1:
                for f in range(max_kernel_size, min_kernel_size-1, -1):
                    for s in range(max_stride, min_stride-1, -1):
                        t_output_size = calc_output_size(j, 0, f, s)
                        if t_output_size >= output_size and t_output_size <= image_size:
                            mmap[i+1][t_output_size] = 1
                            layers[i+1][t_output_size] = Layer(f, s)
                            pre[i+1][t_output_size] = j

    ans_layers = []
    if mmap[layer_number][output_size] == 1:
        pre_no = output_size
        for i in range(layer_number, 0, -1):
            ans_layers.append(layers[i][pre_no])
            pre_no = pre[i][pre_no]

    ans_layers.reverse()

    return ans_layers

def perfect_build_moudel(block_config=[], image_size=4, output_size=1, min_kernel_size=2, max_kernel_size=10, min_stride=1, max_stride=5):
    '''
    assume every block is looks like follows
    self.conv = nn.Sequential(
                    nn.Conv2d(in_channel, out_channel, kernel_size=(9, 9), stride=(2, 2)),
                    nn.BatchNorm2d(out_channel),
                    nn.MaxPool2d(kernel_size=3, stride=2),
                    nn.ReLU(inplace=True),
                )
    此函数不容易找到解,但是每一层的kernel_size和stride的设置可以保证不丢失特征
    比如不可能在featrue_size=4的时候使用kernel_size=3,stride=2使得输出的feature_size=1
    只会在featrue_size=4的时候使用kernel_size=4,stride=1使得输出的feature_size=1,保证了特征不丢失
    此函数会输出符合要求的最小的kernel_size和stride
    :param config: 期望的每个block的channel
    :param image_size: 输入的图片大小
    :param output_size: 期望的输出的特征大小
    :param min_kernel_size: 期望的最小的kernel_size
    :param max_kernel_size: 期望的最大的kernel_size
    :param min_stride: 期望的最小的stride
    :param max_stride: 期望的最大的stride
    :return: 符合要求的一个layer配置解
    '''

    mmap_size = image_size
    # 计算总共需要的层数,可知每次channel变化需要两层,conv和pool,一共len(block_config)-1次变化
    layer_number = (len(block_config)-1)*2
    # 我们采用打表的方式进行计算,看最终能够实现哪些输入输出,最终看看我们想要的那个有没有
    # 表的大小是我们用n层,能够达到哪些featuremap大小的输出
    # 首先是一层都不使用,输入图像的大小就是最终的featuremap的大小,不能改变,那么层数的遍历应该是从0层开始,一直到layer_number层
    # 由于我们是卷积加pooling,所以尺寸最大是图像的尺寸,后边的featuremap的尺寸只会越来越小,那么表的尺寸范围就是0到mmap_size了
    # 定义表格如下,0代表没能达到的尺寸大小
    # 第n行第m列的意思就是,使用n层,能不能让featuremap的size达到m的大小
    mmap = np.zeros((layer_number+1, mmap_size+1)).astype(int)
    # 首先一层都不使用,那么我们能够达到原图像的大小,首先标记为1
    mmap[0][image_size] = 1
    # 由于不仅仅要知道,能不能实现n层达到m的大小,还要知道是如何达到这个大小的,那么我们需要记录,中间达到这个大小使用的kernel_size和stride
    # 这里用pre这个数组进行记录,可知大小和mmap类似,每个地方我们先初始化放置一个我们的layer类,
    # layer类里边记录了上一层使用了的kernel_size和stride才到达当前大小的
    pre = np.zeros((layer_number + 1, mmap_size + 1)).astype(int)
    layers = [[] for i in range(layer_number+1)]
    for i in range(len(layers)):
        for j in range(mmap_size+1):
            layers[i].append(Layer())

    # 首先循环每一层
    for i in range(layer_number):
        # 再循环每一个大小
        for j in range(mmap_size+1):
            # 如果当前这个大小可以到达,那么我们可以加一层,到达下一层的新大小
            if mmap[i][j] == 1:
                # 那么加的一层的kernel_size和stride,从大到小遍历,这样保证小的覆盖大的,整体模型数据量可以相对小一点
                for f in range(max_kernel_size, min_kernel_size-1, -1):
                    for s in range(max_stride, min_stride-1, -1):
                        # 保证不丢失特征
                        if (j-f) % s == 0:
                            t_output_size = calc_output_size(j, 0, f, s)
                            if t_output_size >= output_size and t_output_size <= image_size:
                                mmap[i+1][t_output_size] = 1
                                if i%2==0:
                                    layers[i+1][t_output_size] = Layer(f, s, block_config[int(i/2)], block_config[int(i/2)+1])
                                else:
                                    layers[i + 1][t_output_size] = Layer(f, s)
                                pre[i+1][t_output_size] = j

    ans_layers = []
    if mmap[layer_number][output_size] == 1:
        pre_no = output_size
        for i in range(layer_number, 0, -1):
            ans_layers.append(layers[i][pre_no])
            pre_no = pre[i][pre_no]

    ans_layers.reverse()

    return ans_layers


def show_mid_feature_size(image_size, layers):
    feature_size = image_size
    for i in range(len(layers)):
        f, s = layers[i].kernel_size, layers[i].stride
        feature_size = calc_output_size(feature_size, 0, f, s)
        print(feature_size)

def refine_print_for_build_model(ans_layers):
    '''
    这里假设你定义模型的方式如下
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        channel_config = [1, 5, 10]
        layer_config = [[1,5,2,1], [2, 1], [5,10,8,2], [10, 1]]
        self.sequential = nn.Sequential()
        for i in range(len(layer_config)):
            self.add_conv(channel_config[i], channel_config[i+1], config[i][0], config[i][1])

    def add_conv(self, in_channel, out_channel, conv_kernel_size, conv_stride_size, pool_kernel_size, pool_stride_size):
        self.sequential.add_module(
            nn.Conv2d(in_channel, out_channel, kernel_size=(conv_kernel_size, conv_kernel_size), stride=(conv_stride_size, conv_stride_size)),
            nn.BatchNorm2d(out_channel),
            nn.MaxPool2d(kernel_size=pool_kernel_size, stride=pool_stride_size),
            nn.ReLU(inplace=True),
        )

    def forward(self, X):
        return self.head(self.conv(X))
    :param ans_layers:
    :return:
    '''

if __name__ == '__main__':
    config = [1, 5, 10]
    image_size = 28
    output_size = 1
    # layers = imperfect_build_moudel(config, image_size, output_size)
    layers = perfect_build_moudel(config, image_size, output_size)
    for i in range(len(layers)):
        print(layers[i])

    # show_mid_feature_size(image_size, layers)
文章目录