Solved – multidimensional inputs, outputs and backpropagation

backpropagationneural networks

Let's say I have a neural network in matrix form. Inputs, hidden layer nodes and outputs are represented by row vectors, while the weights are matrices of the sizes [outputRows; inputRows].
Now, let's say I'd like to handle multiple inputs and outputs without having to iterate through columns of row vectors. For output calculation, this should work out without any trouble, as there will just be additional columns after vector multiplication. But for training with multiple input-output value pairs, up to now I relied on iterating through the columns. I couldn't find really meaningful resources regarding backpropagation on multimensional inputs/outputs. Is it possible and/or advisable?

Best Answer

Yes, it's possible, not too difficult and therefore also advisable. I'm giving an code example in Java for forward- and backpropagation using org.ejml.simple.SimpleMatrix.

private SimpleMatrix[] forwardPropagate(SimpleMatrix input, SimpleMatrix weightsIj, SimpleMatrix weightsJk, boolean derivatives) {
    SimpleMatrix[] result = new SimpleMatrix[4];

    SimpleMatrix hiddenValue = weightsIj.mult(input);
    SimpleMatrix hiddenActivation = threshold(hiddenValue); //a simple method to calculate the threshold function on every matrix element

    SimpleMatrix hiddenActWithBias = new SimpleMatrix(hiddenActivation.numRows() + 1, hiddenActivation.numCols());
    hiddenActWithBias.insertIntoThis(0, 0, hiddenActivation);
    hiddenActWithBias = setEntireRow(hiddenActWithBias, true, hiddenActWithBias.numRows() - 1, 1);

    SimpleMatrix outputValue = weightsJk.mult(hiddenActWithBias);
    SimpleMatrix outputActivation = threshold(outputValue); //change here if a linear output activation function should be used

    SimpleMatrix derivatives1 = null;
    SimpleMatrix derivatives2 = null;
    if (derivatives) {
        derivatives1 = thresholdderivative(hiddenValue); //the derivatives are needed if MSE (mean square error) cost function is to be applied, not needed for cross entropy
        derivatives2 = thresholdderivative(outputValue);
    }

    result[0] = outputActivation;
    result[1] = hiddenActWithBias;
    result[2] = derivatives1;
    result[3] = derivatives2;
    return result;
}

private SimpleMatrix[] backpropagate(SimpleMatrix input, SimpleMatrix[] forwardPropagateResult, SimpleMatrix trainingweights1, SimpleMatrix trainingweights2,
                                     SimpleMatrix targetOutput, boolean derivatives, double eta) {
    SimpleMatrix deltaOutput;
    SimpleMatrix weightsOutDiff;
    SimpleMatrix weightsHiddenDiff;
    SimpleMatrix[] result = new SimpleMatrix[3];

    if (derivatives) {
        deltaOutput = forwardPropagateResult[0].minus(targetOutput); //vector(s) of the difference between calculated and training output
        SimpleMatrix deltaOutDer = deltaOutput.elementMult(forwardPropagateResult[3]); //multiplied by the partial derivative, equals delta of the output layer
        SimpleMatrix backweights2 = new SimpleMatrix(trainingweights2.numRows(), trainingweights2.numCols());
        backweights2.insertIntoThis(0, 0, trainingweights2);
        backweights2.reshape(backweights2.numRows(), backweights2.numCols() - 1); // "delete" the connections to the bias node, as it doesn't backpropagate into layer 1
        SimpleMatrix deltaHiddenPart = backweights2.transpose().mult(deltaOutDer); // deltaOut being sent back through the weights to the hidden layer
        SimpleMatrix deltaHidden = deltaHiddenPart.elementMult(forwardPropagateResult[2]); //multiplied by the partial derivative, equals delta of the hidden layer
        weightsOutDiff = deltaOutDer.mult(forwardPropagateResult[1].transpose()).scale(-eta); //weights difference vector for the output layer
        weightsHiddenDiff = deltaHidden.mult(input.transpose()).scale(-eta); //weights difference vector for the hidden layer
    } else {
        deltaOutput = forwardPropagateResult[0].minus(targetOutput); //vector of the difference between calculated and training output
        SimpleMatrix backweights2 = new SimpleMatrix(trainingweights2.numRows(), trainingweights2.numCols());
        backweights2.insertIntoThis(0, 0, trainingweights2);
        backweights2.reshape(backweights2.numRows(), backweights2.numCols() - 1); // "delete" the connections to the bias node, as it doesn't backpropagate into layer 1
        SimpleMatrix deltaHidden = backweights2.transpose().mult(deltaOutput); // deltaOut being sent back through the weights to the hidden layer
        weightsOutDiff = deltaOutput.mult(forwardPropagateResult[1].transpose()).scale(-eta); //weights difference vector for the output layer
        weightsHiddenDiff = deltaHidden.mult(input.transpose()).scale(-eta); //weights difference vector for the hidden layer
    }

    result[0] = deltaOutput;
    result[1] = weightsOutDiff;
    result[2] = weightsHiddenDiff;
    return result;
}
Related Question