Solved – My training loss is increasing and the training accuracy is also increasing. Is it normal

conv-neural-networkdeep learningpythontensorflow

This is how my training looks like

(500, 300, 50)
(500,)
Minibatch Loss = 7.722980, Training Accuracy= 0.18400
(500, 300, 50)
(500,)
Minibatch Loss = 20.557695, Training Accuracy= 0.35600
(500, 300, 50)
(500,)
Minibatch Loss = 32.925579, Training Accuracy= 0.22800
(500, 300, 50)
(500,)
Minibatch Loss = 34.841656, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 38.137703, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 22.291409, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 26.780132, Training Accuracy= 0.34800
(500, 300, 50)
(500,)
Minibatch Loss = 27.132868, Training Accuracy= 0.34800
(500, 300, 50)
(500,)
Minibatch Loss = 21.303114, Training Accuracy= 0.35800
(500, 300, 50)
(500,)
Minibatch Loss = 20.854801, Training Accuracy= 0.31600
(500, 300, 50)
(500,)
Minibatch Loss = 24.449608, Training Accuracy= 0.23000
(500, 300, 50)
(500,)
Minibatch Loss = 29.198355, Training Accuracy= 0.19600
(500, 300, 50)
(500,)
Minibatch Loss = 20.845459, Training Accuracy= 0.20000
(500, 300, 50)
(500,)
Minibatch Loss = 17.757305, Training Accuracy= 0.23600
(500, 300, 50)
(500,)
Minibatch Loss = 15.250696, Training Accuracy= 0.37000
(500, 300, 50)
(500,)
Minibatch Loss = 15.362234, Training Accuracy= 0.37200
(500, 300, 50)
(500,)
Minibatch Loss = 14.827072, Training Accuracy= 0.35000
(500, 300, 50)
(500,)
Minibatch Loss = 20.541281, Training Accuracy= 0.31800
(500, 300, 50)
(500,)
Minibatch Loss = 22.777840, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 15.121683, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 8.579925, Training Accuracy= 0.33400
(500, 300, 50)
(500,)
Minibatch Loss = 9.990248, Training Accuracy= 0.31400

Sometimes training loss increases and so does accuracy and I'm training my neural network with same single batch of size 500. I'm passing this same single batch every time and this is how my results look like. I think as I'm passing same single batch every time, loss should go down and training accuracy should increase. But it's not what is happening. What could go wrong?

Here is my code:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
import math
import os
import nltk
batch = 500
start = 0
end = batch
learning_rate = 0.2
num_classes = 8
path = "/home/indy/Downloads/aclImdb/train/pos"
time_steps = 300
embedding = 50
step = 10

def get_embedding():
    gfile_path = os.path.join("/home/indy/Downloads/glove.6B", "glove.6B.50d.txt")
    f = open(gfile_path,'r')
    embeddings = {}
    for line in f:
        sp_value = line.split()
        word = sp_value[0]
        embedding = [float(value) for value in sp_value[1:]]
        assert len(embedding) == 50
        embeddings[word] = embedding
    return embeddings

ebd = get_embedding()

def get_y(file_name):
    y_value = file_name.split('_')
    y_value = y_value[1].split('.')
    if y_value[0] == '1':
       return 0
    elif y_value[0] == '2':
         return 1
    elif y_value[0] == '3':
          return 2
    elif y_value[0] == '4':
          return 3
    elif y_value[0] == '7':
          return 4
    elif y_value[0] == '8':
          return 5
    elif y_value[0] == '9':
          return 6
    elif y_value[0] == '10':
          return 7 

def get_x(path,file_name):
    file_path = os.path.join(path,file_name)
    x_value = open(file_path,'r')
    for line in x_value:
        x_value = line.replace("<br /><br />","") 
        x_value = x_value.lower()
    x_value = nltk.word_tokenize(x_value.decode('utf-8'))
    padding = 300 - len(x_value)
    if padding > 0:
       p_value = ['pad' for i in range(padding)]
       x_value = np.concatenate((x_value,p_value))
    if padding < 0:
       x_value = x_value[:300]
    for i in x_value:
        if ebd.get(i) == None:
           ebd[i] = [float(np.random.normal(0.0,1.0)) for j in range(50)]
    x_value = [ebd[value] for value in x_value]
    assert len(x_value) == 300
    return x_value

def  batch_f(path):
     directory = os.listdir(path)
     y = [get_y(directory[i]) for i in range(len(directory))]
     x = [get_x(path,directory[i]) for i in range(len(directory))]    
     return x , y

x , y = batch_f(path)   

def batch_size(start,end):
    if start == 12500:
       start = 0
       end = 500
    return x[:200] , y[:200]


X = tf.placeholder(tf.float32, [200,time_steps,embedding])
Y = tf.placeholder(tf.int32, [200])

def build_nlp_model(x, _units,num_classes,num_of_filters):

     x = tf.expand_dims(x,3)


     filter_shape = [1, embedding, 1, num_of_filters]
     conv_weights = tf.Variable(tf.truncated_normal(filter_shape, stddev = 1.0))
     conv_biases = tf.Variable(tf.constant(0.1, shape=[num_of_filters]))
     conv = tf.nn.conv2d(x, conv_weights, strides=[1,1,1,1], padding = "VALID")
     relu = tf.nn.relu(conv + conv_biases)
     pooling = tf.nn.max_pool(relu, [1, 1, 1, 1], strides=[1,1,1,1], padding="VALID")
     outputs_fed_lstm = pooling

     x = tf.squeeze(outputs_fed_lstm)     
     x = tf.transpose(x, [1, 0, 2])
     x = tf.reshape(x, [-1, num_of_filters])
     x = tf.split(0, time_steps, x)

     lstm = tf.nn.rnn_cell.LSTMCell(num_units = _units)

     # multi_lstm = tf.nn.rnn_cell.MultiRNNCell([lstm] * lstm_layers, state_is_tuple = True)

     outputs , state = tf.nn.rnn(lstm,x, dtype = tf.float32)     

     weights = tf.Variable(tf.random_normal([_units,num_classes]))
     biases  = tf.Variable(tf.random_normal([num_classes]))

     logits = tf.matmul(outputs[-1], weights) + biases
     return logits

logits = build_nlp_model(X,500,num_classes,1500)
c_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,Y)
loss = tf.reduce_mean(c_loss)


global_step = tf.Variable(0, name="global_step", trainable=False)
decayed_learning_rate = tf.train.exponential_decay(learning_rate,0,10000,0.9)
optimizer= tf.train.AdamOptimizer(decayed_learning_rate)
minimize_loss = optimizer.minimize(loss, global_step=global_step)



correct_predict = tf.nn.in_top_k(logits, Y, 1)
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))


init = tf.initialize_all_variables()

with tf.Session() as sess:
     sess.run(init)
     for i in range(2500):
         x , y = batch_size(start,end)
         print (np.array(x).shape)
         print(np.array(y).shape)   
         sess.run(minimize_loss,feed_dict={X : x, Y : y})
         step1 = sess.run(global_step)

         cost = sess.run(loss,feed_dict = {X: x,Y: y})
         accu = sess.run(accuracy,feed_dict = {X: x, Y: y})
         print ("Minibatch Loss = " + "{:.6f}".format(cost) + ", Training Accuracy= " + "{:.5f}".format(accu))



     print ("Optimization Finished")  

Best Answer

I think the issue is that you are printing out the accuracy and cost after each mini-batch. This is very noisy. Notice that on average the loss tends to decrease which the accuracy tends to increase. You should try to take the average over many batches.

Related Question