This is how my training looks like
(500, 300, 50)
(500,)
Minibatch Loss = 7.722980, Training Accuracy= 0.18400
(500, 300, 50)
(500,)
Minibatch Loss = 20.557695, Training Accuracy= 0.35600
(500, 300, 50)
(500,)
Minibatch Loss = 32.925579, Training Accuracy= 0.22800
(500, 300, 50)
(500,)
Minibatch Loss = 34.841656, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 38.137703, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 22.291409, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 26.780132, Training Accuracy= 0.34800
(500, 300, 50)
(500,)
Minibatch Loss = 27.132868, Training Accuracy= 0.34800
(500, 300, 50)
(500,)
Minibatch Loss = 21.303114, Training Accuracy= 0.35800
(500, 300, 50)
(500,)
Minibatch Loss = 20.854801, Training Accuracy= 0.31600
(500, 300, 50)
(500,)
Minibatch Loss = 24.449608, Training Accuracy= 0.23000
(500, 300, 50)
(500,)
Minibatch Loss = 29.198355, Training Accuracy= 0.19600
(500, 300, 50)
(500,)
Minibatch Loss = 20.845459, Training Accuracy= 0.20000
(500, 300, 50)
(500,)
Minibatch Loss = 17.757305, Training Accuracy= 0.23600
(500, 300, 50)
(500,)
Minibatch Loss = 15.250696, Training Accuracy= 0.37000
(500, 300, 50)
(500,)
Minibatch Loss = 15.362234, Training Accuracy= 0.37200
(500, 300, 50)
(500,)
Minibatch Loss = 14.827072, Training Accuracy= 0.35000
(500, 300, 50)
(500,)
Minibatch Loss = 20.541281, Training Accuracy= 0.31800
(500, 300, 50)
(500,)
Minibatch Loss = 22.777840, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 15.121683, Training Accuracy= 0.22400
(500, 300, 50)
(500,)
Minibatch Loss = 8.579925, Training Accuracy= 0.33400
(500, 300, 50)
(500,)
Minibatch Loss = 9.990248, Training Accuracy= 0.31400
Sometimes training loss increases and so does accuracy and I'm training my neural network with same single batch of size 500. I'm passing this same single batch every time and this is how my results look like. I think as I'm passing same single batch every time, loss should go down and training accuracy should increase. But it's not what is happening. What could go wrong?
Here is my code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import math
import os
import nltk
batch = 500
start = 0
end = batch
learning_rate = 0.2
num_classes = 8
path = "/home/indy/Downloads/aclImdb/train/pos"
time_steps = 300
embedding = 50
step = 10
def get_embedding():
gfile_path = os.path.join("/home/indy/Downloads/glove.6B", "glove.6B.50d.txt")
f = open(gfile_path,'r')
embeddings = {}
for line in f:
sp_value = line.split()
word = sp_value[0]
embedding = [float(value) for value in sp_value[1:]]
assert len(embedding) == 50
embeddings[word] = embedding
return embeddings
ebd = get_embedding()
def get_y(file_name):
y_value = file_name.split('_')
y_value = y_value[1].split('.')
if y_value[0] == '1':
return 0
elif y_value[0] == '2':
return 1
elif y_value[0] == '3':
return 2
elif y_value[0] == '4':
return 3
elif y_value[0] == '7':
return 4
elif y_value[0] == '8':
return 5
elif y_value[0] == '9':
return 6
elif y_value[0] == '10':
return 7
def get_x(path,file_name):
file_path = os.path.join(path,file_name)
x_value = open(file_path,'r')
for line in x_value:
x_value = line.replace("<br /><br />","")
x_value = x_value.lower()
x_value = nltk.word_tokenize(x_value.decode('utf-8'))
padding = 300 - len(x_value)
if padding > 0:
p_value = ['pad' for i in range(padding)]
x_value = np.concatenate((x_value,p_value))
if padding < 0:
x_value = x_value[:300]
for i in x_value:
if ebd.get(i) == None:
ebd[i] = [float(np.random.normal(0.0,1.0)) for j in range(50)]
x_value = [ebd[value] for value in x_value]
assert len(x_value) == 300
return x_value
def batch_f(path):
directory = os.listdir(path)
y = [get_y(directory[i]) for i in range(len(directory))]
x = [get_x(path,directory[i]) for i in range(len(directory))]
return x , y
x , y = batch_f(path)
def batch_size(start,end):
if start == 12500:
start = 0
end = 500
return x[:200] , y[:200]
X = tf.placeholder(tf.float32, [200,time_steps,embedding])
Y = tf.placeholder(tf.int32, [200])
def build_nlp_model(x, _units,num_classes,num_of_filters):
x = tf.expand_dims(x,3)
filter_shape = [1, embedding, 1, num_of_filters]
conv_weights = tf.Variable(tf.truncated_normal(filter_shape, stddev = 1.0))
conv_biases = tf.Variable(tf.constant(0.1, shape=[num_of_filters]))
conv = tf.nn.conv2d(x, conv_weights, strides=[1,1,1,1], padding = "VALID")
relu = tf.nn.relu(conv + conv_biases)
pooling = tf.nn.max_pool(relu, [1, 1, 1, 1], strides=[1,1,1,1], padding="VALID")
outputs_fed_lstm = pooling
x = tf.squeeze(outputs_fed_lstm)
x = tf.transpose(x, [1, 0, 2])
x = tf.reshape(x, [-1, num_of_filters])
x = tf.split(0, time_steps, x)
lstm = tf.nn.rnn_cell.LSTMCell(num_units = _units)
# multi_lstm = tf.nn.rnn_cell.MultiRNNCell([lstm] * lstm_layers, state_is_tuple = True)
outputs , state = tf.nn.rnn(lstm,x, dtype = tf.float32)
weights = tf.Variable(tf.random_normal([_units,num_classes]))
biases = tf.Variable(tf.random_normal([num_classes]))
logits = tf.matmul(outputs[-1], weights) + biases
return logits
logits = build_nlp_model(X,500,num_classes,1500)
c_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,Y)
loss = tf.reduce_mean(c_loss)
global_step = tf.Variable(0, name="global_step", trainable=False)
decayed_learning_rate = tf.train.exponential_decay(learning_rate,0,10000,0.9)
optimizer= tf.train.AdamOptimizer(decayed_learning_rate)
minimize_loss = optimizer.minimize(loss, global_step=global_step)
correct_predict = tf.nn.in_top_k(logits, Y, 1)
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for i in range(2500):
x , y = batch_size(start,end)
print (np.array(x).shape)
print(np.array(y).shape)
sess.run(minimize_loss,feed_dict={X : x, Y : y})
step1 = sess.run(global_step)
cost = sess.run(loss,feed_dict = {X: x,Y: y})
accu = sess.run(accuracy,feed_dict = {X: x, Y: y})
print ("Minibatch Loss = " + "{:.6f}".format(cost) + ", Training Accuracy= " + "{:.5f}".format(accu))
print ("Optimization Finished")
Best Answer
I think the issue is that you are printing out the accuracy and cost after each mini-batch. This is very noisy. Notice that on average the loss tends to decrease which the accuracy tends to increase. You should try to take the average over many batches.