Solved – My training loss is increasing and the training accuracy is also increasing. Is it normal

This is how my training looks like

(500, 300, 50)
(500,)
Minibatch Loss = 7.722980, Training Accuracy= 0.18400
(500, 300, 50)
(500,)
Minibatch Loss = 20.557695, Training Accuracy= 0.35600
(500, 300, 50)
(500,)
Minibatch Loss = 32.925579, Training Accuracy= 0.22800
(500, 300, 50)
(500,)
Minibatch Loss = 34.841656, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 38.137703, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 22.291409, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 26.780132, Training Accuracy= 0.34800
(500, 300, 50)
(500,)
Minibatch Loss = 27.132868, Training Accuracy= 0.34800
(500, 300, 50)
(500,)
Minibatch Loss = 21.303114, Training Accuracy= 0.35800
(500, 300, 50)
(500,)
Minibatch Loss = 20.854801, Training Accuracy= 0.31600
(500, 300, 50)
(500,)
Minibatch Loss = 24.449608, Training Accuracy= 0.23000
(500, 300, 50)
(500,)
Minibatch Loss = 29.198355, Training Accuracy= 0.19600
(500, 300, 50)
(500,)
Minibatch Loss = 20.845459, Training Accuracy= 0.20000
(500, 300, 50)
(500,)
Minibatch Loss = 17.757305, Training Accuracy= 0.23600
(500, 300, 50)
(500,)
Minibatch Loss = 15.250696, Training Accuracy= 0.37000
(500, 300, 50)
(500,)
Minibatch Loss = 15.362234, Training Accuracy= 0.37200
(500, 300, 50)
(500,)
Minibatch Loss = 14.827072, Training Accuracy= 0.35000
(500, 300, 50)
(500,)
Minibatch Loss = 20.541281, Training Accuracy= 0.31800
(500, 300, 50)
(500,)
Minibatch Loss = 22.777840, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 15.121683, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 8.579925, Training Accuracy= 0.33400
(500, 300, 50)
(500,)
Minibatch Loss = 9.990248, Training Accuracy= 0.31400

Sometimes training loss increases and so does accuracy and I'm training my neural network with same single batch of size 500. I'm passing this same single batch every time and this is how my results look like. I think as I'm passing same single batch every time, loss should go down and training accuracy should increase. But it's not what is happening. What could go wrong?

Here is my code:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
import math
import os
import nltk
batch = 500
start = 0
end = batch
learning_rate = 0.2
num_classes = 8
path = "/home/indy/Downloads/aclImdb/train/pos"
time_steps = 300
embedding = 50
step = 10

def get_embedding():
    gfile_path = os.path.join("/home/indy/Downloads/glove.6B", "glove.6B.50d.txt")
    f = open(gfile_path,'r')
    embeddings = {}
    for line in f:
        sp_value = line.split()
        word = sp_value[0]
        embedding = [float(value) for value in sp_value[1:]]
        assert len(embedding) == 50
        embeddings[word] = embedding
    return embeddings

ebd = get_embedding()

def get_y(file_name):
    y_value = file_name.split('_')
    y_value = y_value[1].split('.')
    if y_value[0] == '1':
       return 0
    elif y_value[0] == '2':
         return 1
    elif y_value[0] == '3':
          return 2
    elif y_value[0] == '4':
          return 3
    elif y_value[0] == '7':
          return 4
    elif y_value[0] == '8':
          return 5
    elif y_value[0] == '9':
          return 6
    elif y_value[0] == '10':
          return 7 

def get_x(path,file_name):
    file_path = os.path.join(path,file_name)
    x_value = open(file_path,'r')
    for line in x_value:
        x_value = line.replace("<br /><br />","") 
        x_value = x_value.lower()
    x_value = nltk.word_tokenize(x_value.decode('utf-8'))
    padding = 300 - len(x_value)
    if padding > 0:
       p_value = ['pad' for i in range(padding)]
       x_value = np.concatenate((x_value,p_value))
    if padding < 0:
       x_value = x_value[:300]
    for i in x_value:
        if ebd.get(i) == None:
           ebd[i] = [float(np.random.normal(0.0,1.0)) for j in range(50)]
    x_value = [ebd[value] for value in x_value]
    assert len(x_value) == 300
    return x_value

def  batch_f(path):
     directory = os.listdir(path)
     y = [get_y(directory[i]) for i in range(len(directory))]
     x = [get_x(path,directory[i]) for i in range(len(directory))]    
     return x , y

x , y = batch_f(path)   

def batch_size(start,end):
    if start == 12500:
       start = 0
       end = 500
    return x[:200] , y[:200]


X = tf.placeholder(tf.float32, [200,time_steps,embedding])
Y = tf.placeholder(tf.int32, [200])

def build_nlp_model(x, _units,num_classes,num_of_filters):

     x = tf.expand_dims(x,3)


     filter_shape = [1, embedding, 1, num_of_filters]
     conv_weights = tf.Variable(tf.truncated_normal(filter_shape, stddev = 1.0))
     conv_biases = tf.Variable(tf.constant(0.1, shape=[num_of_filters]))
     conv = tf.nn.conv2d(x, conv_weights, strides=[1,1,1,1], padding = "VALID")
     relu = tf.nn.relu(conv + conv_biases)
     pooling = tf.nn.max_pool(relu, [1, 1, 1, 1], strides=[1,1,1,1], padding="VALID")
     outputs_fed_lstm = pooling

     x = tf.squeeze(outputs_fed_lstm)     
     x = tf.transpose(x, [1, 0, 2])
     x = tf.reshape(x, [-1, num_of_filters])
     x = tf.split(0, time_steps, x)

     lstm = tf.nn.rnn_cell.LSTMCell(num_units = _units)

     # multi_lstm = tf.nn.rnn_cell.MultiRNNCell([lstm] * lstm_layers, state_is_tuple = True)

     outputs , state = tf.nn.rnn(lstm,x, dtype = tf.float32)     

     weights = tf.Variable(tf.random_normal([_units,num_classes]))
     biases  = tf.Variable(tf.random_normal([num_classes]))

     logits = tf.matmul(outputs[-1], weights) + biases
     return logits

logits = build_nlp_model(X,500,num_classes,1500)
c_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,Y)
loss = tf.reduce_mean(c_loss)


global_step = tf.Variable(0, name="global_step", trainable=False)
decayed_learning_rate = tf.train.exponential_decay(learning_rate,0,10000,0.9)
optimizer= tf.train.AdamOptimizer(decayed_learning_rate)
minimize_loss = optimizer.minimize(loss, global_step=global_step)



correct_predict = tf.nn.in_top_k(logits, Y, 1)
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))


init = tf.initialize_all_variables()

with tf.Session() as sess:
     sess.run(init)
     for i in range(2500):
         x , y = batch_size(start,end)
         print (np.array(x).shape)
         print(np.array(y).shape)   
         sess.run(minimize_loss,feed_dict={X : x, Y : y})
         step1 = sess.run(global_step)

         cost = sess.run(loss,feed_dict = {X: x,Y: y})
         accu = sess.run(accuracy,feed_dict = {X: x, Y: y})
         print ("Minibatch Loss = " + "{:.6f}".format(cost) + ", Training Accuracy= " + "{:.5f}".format(accu))



     print ("Optimization Finished")

Solved – My training loss is increasing and the training accuracy is also increasing. Is it normal

Best Answer

Related Question

Best Answer

Related Solutions

Solved – Training accuracy increase abruptly at first epoch to 99%. is it normal

Neural Networks – Why Validation Loss Increases While Validation Accuracy Increases

Related Question