2

I am using neural network for a regression task. My input is an gray image whose size is 100x70x1.

The gray area has a unique value 60.

The input will go through a preprocessing layer, which multiply 1./255 on every pixel value.

enter image description here

My output is just three double number: [0.87077969, 0.98989031, 0.98888382]

I used ResNet152 model as shown below:

class Bottleneck(tf.keras.Model):
    expansion = 4

    def __init__(self, in_channels, out_channels, strides=1):
        super(Bottleneck, self).__init__()

        self.conv1 = tf.keras.layers.Conv2D(out_channels, 1, 1, use_bias=False)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(out_channels, 3, strides, padding="same", use_bias=False)
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.conv3 = tf.keras.layers.Conv2D(out_channels*self.expansion, 1, 1, use_bias=False)
        self.bn3 = tf.keras.layers.BatchNormalization()

        if strides != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = tf.keras.Sequential([
                    tf.keras.layers.Conv2D(self.expansion*out_channels, kernel_size=1,
                                           strides=strides, use_bias=False),
                    tf.keras.layers.BatchNormalization()]
                    )
        else:
            self.shortcut = lambda x,_: x

    def call(self, x, training=False):
        out = tf.nn.elu(self.bn1(self.conv1(x), training))
        out = tf.nn.elu(self.bn2(self.conv2(out), training))
        out = self.bn3(self.conv3(out), training)
        out += self.shortcut(x, training)
        return tf.nn.elu(out)


class ResNet(tf.keras.Model):
    def __init__(self, block, num_blocks):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = tf.keras.layers.Conv2D(64, 7, 2, padding="same", use_bias=False) # 60x60
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=2, padding='same') # 30x30

        self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)

        self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
        self.flatten = tf.keras.layers.Flatten()

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return tf.keras.Sequential(layers)

    def call(self, x, training=False):
        out = self.pool1(tf.nn.elu(self.bn1(self.conv1(x), training)))
        out = self.layer1(out, training=training)
        out = self.layer2(out, training=training)
        out = self.layer3(out, training=training)
        out = self.layer4(out, training=training)

        # For classification
        out = self.flatten(out)
        
        # out = tf.keras.layers.Reshape((out.shape[-1],))(out)
        #out = self.linear(out)
        return out

    def model(self):
        x = tf.keras.layers.Input(shape=(100,70,1))
        return tf.keras.Model(inputs=[x], outputs=self.call(x))

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])

I used elu as activation function and changed the GlobalAveragePooling layer into flatten layer at the end of ResNet.

Before output I stack two Dense layer(2048 units and 3 units) on top of the ResNet model.

For training I used adam optimizer and inital learning rate is 1e-4, which will decreasing by factor 10 when the val_loss not decreasing for 3 epoch.

The loss is just mse error.

After early stopping while learning rate is 1e-8, the mse loss is still very high:8.6225

The prediction is [2.92318237, 5.53124916, 3.00686643] which is far away from the ground truth: [0.87077969, 0.98989031, 0.98888382]

I don't know why such a deep network cannot overfit such a sample.

Is this the reason that my input image has too few information? Could someone help me?

Kanda Wa
  • 31
  • 2

0 Answers0