quacknet.backPropgation

  1from quacknet.activationFunctions import relu, sigmoid, tanH, linear, softMax
  2from quacknet.activationDerivativeFunctions import ReLUDerivative, SigmoidDerivative, TanHDerivative, LinearDerivative, SoftMaxDerivative
  3from quacknet.lossDerivativeFunctions import MSEDerivative, MAEDerivative, CrossEntropyLossDerivative
  4from quacknet.lossFunctions import MSELossFunction, MAELossFunction, CrossEntropyLossFunction
  5import numpy as np
  6
  7'''
  8output layer backpropogation for weights:
  9e = (dL/da) * f'(z)
 10e = error term
 11dL/da = derivative of the loss function
 12f'() = derivative of the activation function
 13z = the current layer's node (only one)
 14
 15(dL/dW) = e * a
 16dL/dW  = derivative of loss function with respect to weight
 17e = error term
 18a = past layer's node value
 19
 20nw = ow - r * (dL/dW)
 21nw = new weight
 22ow = old weight
 23r = learning rate
 24(dL/dW) = derivative of loss function with respect to weight
 25
 26hidden layer backpropgation for weights:
 27e = SUM(e[l + 1][k] * w[l + 1][k]) * f'(z)
 28e = error term
 29SUM(e[l + 1][k] * w[l + 1][k]) = the sum of the next layers's error term for the current node multiplied by the weight in the nextlayer connected to the current one
 30f'() = derivative of the activation function
 31z = the current layer's node (only one)
 32
 33(dL/dW) = e * a
 34dL/dW  = derivative of loss function with respect to weight
 35e = error term
 36a = past layer's node value
 37
 38nw = ow - r * (dL/dW)
 39nw = new weight
 40ow = old weight
 41r = learning rate
 42(dL/dW) = derivative of loss function with respect to weight
 43'''
 44
 45def _outputLayerWeightChange(lossDerivative, activationDerivative, currentLayerNodes, pastLayerNodes, trueValues): 
 46    """
 47    Calculate the weight gradients and error terms for the output layer during backpropagation.
 48
 49    Args:
 50        lossDerivative (function): Derivative function of the loss function.
 51        activationDerivative (function): Derivative function of the activation function.
 52        currentLayerNodes (ndarray): Output values of the current (output) layer.
 53        pastLayerNodes (ndarray): Output values of the previous layer.
 54        trueValues (ndarray): True target values for the output.
 55    
 56    Returns:
 57        weightGradients (ndarray): Gradient of the loss with respect to the weight.
 58        errorTerms (ndarray): Error terms for the output layer nodes.
 59    """
 60    if(activationDerivative == SoftMaxDerivative and lossDerivative == CrossEntropyLossDerivative):
 61        errorTerms = currentLayerNodes - trueValues
 62    else:
 63        lossDerivativeValue = lossDerivative(currentLayerNodes, trueValues, len(currentLayerNodes))
 64        errorTerms = lossDerivativeValue * activationDerivative(currentLayerNodes)
 65    weightGradients = np.outer(pastLayerNodes, errorTerms)
 66    return weightGradients, errorTerms
 67
 68def _hiddenLayerWeightChange(pastLayerErrorTerms, pastLayerWeights, activationDerivative, currentLayerNodes, pastLayerNodes):
 69    """
 70    Calculate the weight gradients and error terms for the hidden layer during backpropagation.
 71
 72    Args:
 73        pastLayerErrorTerms (ndarray): Error terms for the next layer.
 74        pastLayerWeights (ndarray): Weights connecting current layer to the next layer.
 75        activationDerivative (function): Derivative function of the activation function for the current layer.
 76        currentLayerNodes (ndarray): Output values of the current layer.
 77        pastLayerNodes (ndarray): Output values of the previous layer.
 78        
 79    Returns:
 80        weightGradients (ndarray): Gradient of the loss with respect to the weight.
 81        errorTerms (ndarray): Error terms for the current layer nodes.
 82    """   
 83    errorTerms = (pastLayerErrorTerms @ pastLayerWeights.T) * activationDerivative(currentLayerNodes)
 84    weightGradients = np.outer(pastLayerNodes, errorTerms)
 85    return weightGradients, errorTerms
 86
 87def _outputLayerBiasChange(lossDerivative, activationDerivative, currentLayerNodes, trueValues):
 88    """
 89    Calculate the bias gradients and error terms for the output layer during backpropagation.
 90
 91    Args:
 92        lossDerivative (function): Derivative function of the loss function.
 93        activationDerivative (function): Derivative function of the activation function.
 94        currentLayerNodes (ndarray): Output values of the current (output) layer.
 95        trueValues (ndarray): True target values for the output.
 96    
 97    Returns:
 98        biasGradients (ndarray): Gradient of the loss with respect to the biases.
 99        errorTerms (ndarray): Error terms for the output layer nodes.
100    """
101    if(activationDerivative == SoftMaxDerivative and lossDerivative == CrossEntropyLossDerivative):
102        errorTerms = currentLayerNodes - trueValues
103    else:
104        lossDerivativeValue = lossDerivative(currentLayerNodes, trueValues, len(currentLayerNodes))
105        errorTerms = lossDerivativeValue * activationDerivative(currentLayerNodes)
106    biasGradients = errorTerms
107    return biasGradients, errorTerms
108
109
110def _hiddenLayerBiasChange(pastLayerErrorTerms, pastLayerWeights, activationDerivative, currentLayerNodes):
111    """
112    Calculate the bias gradients and error terms for the hidden layer during backpropagation.
113
114    Args:
115        pastLayerErrorTerms (ndarray): Error terms for the next layer.
116        pastLayerWeights (ndarray): Weights connecting current layer to the next layer.
117        activationDerivative (function): Derivative function of the activation function for the current layer.
118        currentLayerNodes (ndarray): Output values of the current layer.
119        
120    Returns:
121        biasGradients (ndarray): Gradient of the loss with respect to the biases.
122        errorTerms (ndarray): Error terms for the current layer nodes.
123    """  
124    errorTerms = (pastLayerErrorTerms @ pastLayerWeights.T) * activationDerivative(currentLayerNodes)
125    biasGradients = errorTerms
126    return biasGradients, errorTerms
127
128def _backPropgation(layerNodes, weights, biases, trueValues, layers, lossFunction, returnErrorTermForCNN = False):
129    """
130    Perform backpropagation over the network layers to compute gradients for weights and biases.
131
132    Args:
133        layerNodes (list of ndarray): List of output values for each layer.
134        weights (list of ndarray): List of weights for each layer.
135        biases (list of ndarray): List of biases for each layer.
136        trueValues (list of ndarray): True target values for the output layer.
137        layers (list of tuples): Network layers with format (number of nodes, activation function).
138        lossFunction (function): Loss function used.
139        returnErrorTermForCNN (bool, optional): Whether to return error terms for CNN backpropagation. Defaults to False.
140
141    Returns:
142        weightGradients (list of ndarray): Gradients of weights for each layer.
143        biasGradients (list of ndarray): Gradients of biases for each layer.
144        If returnErrorTermForCNN is True:
145            hiddenWeightErrorTermsForCNNBackpropgation (ndarray): Error terms from the output layer weights.   
146    """  
147    lossDerivatives = {
148        MSELossFunction: MSEDerivative,
149        MAELossFunction: MAEDerivative,
150        CrossEntropyLossFunction: CrossEntropyLossDerivative,
151    }
152    activationDerivatives = {
153        relu: ReLUDerivative,
154        sigmoid: SigmoidDerivative,
155        linear: LinearDerivative,
156        tanH: TanHDerivative,
157        softMax: SoftMaxDerivative,
158    }
159    w, weightErrorTerms = _outputLayerWeightChange(lossDerivatives[lossFunction], activationDerivatives[layers[len(layers) - 1][1]], layerNodes[len(layerNodes) - 1], layerNodes[len(layerNodes) - 2], trueValues)
160    b, biasErrorTerms = _outputLayerBiasChange(lossDerivatives[lossFunction], activationDerivatives[layers[len(layers) - 1][1]], layerNodes[len(layerNodes) - 1], trueValues)
161    hiddenWeightErrorTermsForCNNBackpropgation = weightErrorTerms
162    weightGradients = [w]
163    biasGradients = [b]
164    for i in range(len(layers) - 2, 0, -1):
165        w, weightErrorTerms = _hiddenLayerWeightChange(
166            weightErrorTerms, 
167            weights[i], 
168            activationDerivatives[layers[i][1]], 
169            layerNodes[i], 
170            layerNodes[i - 1]
171        )
172        b, biasErrorTerms = _hiddenLayerBiasChange(
173            biasErrorTerms, 
174            weights[i], 
175            activationDerivatives[layers[i][1]], 
176            layerNodes[i]
177        )
178        weightGradients.append(w)
179        biasGradients.append(b)
180    weightGradients.reverse()
181    biasGradients.reverse()
182    if(returnErrorTermForCNN == True):
183        return weightGradients, biasGradients, hiddenWeightErrorTermsForCNNBackpropgation
184    return weightGradients, biasGradients