The neural network works well when training on just one batch but fails to learn when training on the whole dataset.

The problem started when I noticed that neither the training accuracy nor the testing accuracy was improving. I tried to train the network using one batch only (four samples) to ensure that the code itself was working correctly, and it worked well and the accuracy increased.

On the other hand, when trying to train the network using the whole dataset, the network doesn't train. It turned out that when working with the whole dataset the weights are not getting updated (except for the bias terms on the final output layer), whereas when working with only one batch, all the weights get properly updated.

Any ideas of what may be happening?

The architecture of the network: 3 conv , 1 max-pool, 1 conv, 1 max-pool, 2 FC, and 1 output layer.

This is the code of the CNN:

anat_x = tf.placeholder("float", [None, anat_img_depth, anat_img_dim1, anat_img_dim2, 1])
y = tf.placeholder("float", [None, n_classes])
kp = tf.placeholder("float", shape=())

def conv3d(x, W, b, s=2):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv3d(x, W, strides=[1, s, s, s, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool3d(x, k=2, s=2):
    return tf.nn.max_pool3d(x, ksize=[1, k, k, k, 1], strides=[1, s, s, s, 1], padding='SAME')

    weights_anat = {
    'wc1': tf.get_variable('W0_A', shape=(3,3,3,1,32), initializer=tf.contrib.layers.xavier_initializer()),
    'wc2': tf.get_variable('W1_A', shape=(3,3,3,32,32), initializer=tf.contrib.layers.xavier_initializer()),
    'wc3': tf.get_variable('W2_A', shape=(3,3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()),
    'wc4': tf.get_variable('W3_A', shape=(3,3,3,64,64), initializer=tf.contrib.layers.xavier_initializer()),
    'wd1': tf.get_variable('W4_A', shape=(3*4*4*64,1024), initializer=tf.contrib.layers.xavier_initializer()),
    'wd2': tf.get_variable('W5_A', shape=(1024,512), initializer=tf.contrib.layers.xavier_initializer()),
    'out': tf.get_variable('W6_A', shape=(512,2), initializer=tf.contrib.layers.xavier_initializer()),
}
biases_anat = {
    'bc1': tf.get_variable('B0_A', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
    'bc2': tf.get_variable('B1_A', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
    'bc3': tf.get_variable('B2_A', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
    'bc4': tf.get_variable('B3_A', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
    'bd1': tf.get_variable('B4_A', shape=(1024), initializer=tf.contrib.layers.xavier_initializer()),
    'bd2': tf.get_variable('B5_A', shape=(512), initializer=tf.contrib.layers.xavier_initializer()),
    'out': tf.get_variable('B6_A', shape=(2), initializer=tf.contrib.layers.xavier_initializer()),
}

def conv_net(x, weights, biases):

    print("input shape: " + str(x.shape))  

    # Here we call the conv3d function we had defined above and pass the input image x, weights wc1 and bias bc1.
    conv1_anat = conv3d(x, weights['wc1'], biases['bc1'], s=2)
    print("conv1_anat shape: " + str(conv1_anat.shape))  

    # Here we call the conv3d function we had defined above and pass the previous conv1 layer, weights wc2 and bias bc2.    
    conv2_anat = conv3d(conv1_anat, weights['wc2'], biases['bc2'], s=2)
    print("conv2_anat shape: " + str(conv2_anat.shape))  

    # Here we call the conv3d function we had defined above and pass the previous conv2 layer, weights wc3 and bias bc3.    
    conv3_anat = conv3d(conv2_anat, weights['wc3'], biases['bc3'], s=2)
    print("conv3_anat shape: " + str(conv3_anat.shape))  

    # Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 14*14 matrix.
    pooling1_anat = maxpool3d(conv3_anat, k=2, s=2)
    print("pooling1_anat shape: " + str(pooling1_anat.shape))  

    conv4_anat = conv3d(pooling1_anat, weights['wc4'], biases['bc4'], s=2)
    print("conv4_anat shape: " + str(conv4_anat.shape))  

    pooling2_anat = maxpool3d(conv4_anat, k=2, s=2)
    print("pooling2_anat shape: " + str(pooling2_anat.shape))  

    fc1 = tf.reshape(pooling2_anat, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    print("fc1_anat shape: " + str(fc1.shape))  

    fc2 = tf.reshape(fc1, [-1, weights['wd2'].get_shape().as_list()[0]])
    fc2 = tf.add(tf.matmul(fc2, weights['wd2']), biases['bd2'])
    fc2 = tf.nn.relu(fc2)
    print("fc2_anat shape: " + str(fc2.shape))  

    out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])
    print("out_anat shape: " + str(out.shape))  

    return out

pred = conv_net(anat_x, weights_anat, biases_anat)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

Related posts

Recent Viewed