1from quacknet.activationFunctions import relu, sigmoid, tanH, linear, softMax
2from quacknet.activationDerivativeFunctions import ReLUDerivative, SigmoidDerivative, TanHDerivative, LinearDerivative, SoftMaxDerivative
3from quacknet.lossDerivativeFunctions import MSEDerivative, MAEDerivative, CrossEntropyLossDerivative
4from quacknet.lossFunctions import MSELossFunction, MAELossFunction, CrossEntropyLossFunction
5import numpy as np
6
7'''
8output layer backpropogation for weights:
9e = (dL/da) * f'(z)
10e = error term
11dL/da = derivative of the loss function
12f'() = derivative of the activation function
13z = the current layer's node (only one)
14
15(dL/dW) = e * a
16dL/dW = derivative of loss function with respect to weight
17e = error term
18a = past layer's node value
19
20nw = ow - r * (dL/dW)
21nw = new weight
22ow = old weight
23r = learning rate
24(dL/dW) = derivative of loss function with respect to weight
25
26hidden layer backpropgation for weights:
27e = SUM(e[l + 1][k] * w[l + 1][k]) * f'(z)
28e = error term
29SUM(e[l + 1][k] * w[l + 1][k]) = the sum of the next layers's error term for the current node multiplied by the weight in the nextlayer connected to the current one
30f'() = derivative of the activation function
31z = the current layer's node (only one)
32
33(dL/dW) = e * a
34dL/dW = derivative of loss function with respect to weight
35e = error term
36a = past layer's node value
37
38nw = ow - r * (dL/dW)
39nw = new weight
40ow = old weight
41r = learning rate
42(dL/dW) = derivative of loss function with respect to weight
43'''
44
45def _outputLayerWeightChange(lossDerivative, activationDerivative, currentLayerNodes, pastLayerNodes, trueValues):
46 """
47 Calculate the weight gradients and error terms for the output layer during backpropagation.
48
49 Args:
50 lossDerivative (function): Derivative function of the loss function.
51 activationDerivative (function): Derivative function of the activation function.
52 currentLayerNodes (ndarray): Output values of the current (output) layer.
53 pastLayerNodes (ndarray): Output values of the previous layer.
54 trueValues (ndarray): True target values for the output.
55
56 Returns:
57 weightGradients (ndarray): Gradient of the loss with respect to the weight.
58 errorTerms (ndarray): Error terms for the output layer nodes.
59 """
60 if(activationDerivative == SoftMaxDerivative and lossDerivative == CrossEntropyLossDerivative):
61 errorTerms = currentLayerNodes - trueValues
62 else:
63 lossDerivativeValue = lossDerivative(currentLayerNodes, trueValues, len(currentLayerNodes))
64 errorTerms = lossDerivativeValue * activationDerivative(currentLayerNodes)
65 weightGradients = np.outer(pastLayerNodes, errorTerms)
66 return weightGradients, errorTerms
67
68def _hiddenLayerWeightChange(pastLayerErrorTerms, pastLayerWeights, activationDerivative, currentLayerNodes, pastLayerNodes):
69 """
70 Calculate the weight gradients and error terms for the hidden layer during backpropagation.
71
72 Args:
73 pastLayerErrorTerms (ndarray): Error terms for the next layer.
74 pastLayerWeights (ndarray): Weights connecting current layer to the next layer.
75 activationDerivative (function): Derivative function of the activation function for the current layer.
76 currentLayerNodes (ndarray): Output values of the current layer.
77 pastLayerNodes (ndarray): Output values of the previous layer.
78
79 Returns:
80 weightGradients (ndarray): Gradient of the loss with respect to the weight.
81 errorTerms (ndarray): Error terms for the current layer nodes.
82 """
83 errorTerms = (pastLayerErrorTerms @ pastLayerWeights.T) * activationDerivative(currentLayerNodes)
84 weightGradients = np.outer(pastLayerNodes, errorTerms)
85 return weightGradients, errorTerms
86
87def _outputLayerBiasChange(lossDerivative, activationDerivative, currentLayerNodes, trueValues):
88 """
89 Calculate the bias gradients and error terms for the output layer during backpropagation.
90
91 Args:
92 lossDerivative (function): Derivative function of the loss function.
93 activationDerivative (function): Derivative function of the activation function.
94 currentLayerNodes (ndarray): Output values of the current (output) layer.
95 trueValues (ndarray): True target values for the output.
96
97 Returns:
98 biasGradients (ndarray): Gradient of the loss with respect to the biases.
99 errorTerms (ndarray): Error terms for the output layer nodes.
100 """
101 if(activationDerivative == SoftMaxDerivative and lossDerivative == CrossEntropyLossDerivative):
102 errorTerms = currentLayerNodes - trueValues
103 else:
104 lossDerivativeValue = lossDerivative(currentLayerNodes, trueValues, len(currentLayerNodes))
105 errorTerms = lossDerivativeValue * activationDerivative(currentLayerNodes)
106 biasGradients = errorTerms
107 return biasGradients, errorTerms
108
109
110def _hiddenLayerBiasChange(pastLayerErrorTerms, pastLayerWeights, activationDerivative, currentLayerNodes):
111 """
112 Calculate the bias gradients and error terms for the hidden layer during backpropagation.
113
114 Args:
115 pastLayerErrorTerms (ndarray): Error terms for the next layer.
116 pastLayerWeights (ndarray): Weights connecting current layer to the next layer.
117 activationDerivative (function): Derivative function of the activation function for the current layer.
118 currentLayerNodes (ndarray): Output values of the current layer.
119
120 Returns:
121 biasGradients (ndarray): Gradient of the loss with respect to the biases.
122 errorTerms (ndarray): Error terms for the current layer nodes.
123 """
124 errorTerms = (pastLayerErrorTerms @ pastLayerWeights.T) * activationDerivative(currentLayerNodes)
125 biasGradients = errorTerms
126 return biasGradients, errorTerms
127
128def _backPropgation(layerNodes, weights, biases, trueValues, layers, lossFunction, returnErrorTermForCNN = False):
129 """
130 Perform backpropagation over the network layers to compute gradients for weights and biases.
131
132 Args:
133 layerNodes (list of ndarray): List of output values for each layer.
134 weights (list of ndarray): List of weights for each layer.
135 biases (list of ndarray): List of biases for each layer.
136 trueValues (list of ndarray): True target values for the output layer.
137 layers (list of tuples): Network layers with format (number of nodes, activation function).
138 lossFunction (function): Loss function used.
139 returnErrorTermForCNN (bool, optional): Whether to return error terms for CNN backpropagation. Defaults to False.
140
141 Returns:
142 weightGradients (list of ndarray): Gradients of weights for each layer.
143 biasGradients (list of ndarray): Gradients of biases for each layer.
144 If returnErrorTermForCNN is True:
145 hiddenWeightErrorTermsForCNNBackpropgation (ndarray): Error terms from the output layer weights.
146 """
147 lossDerivatives = {
148 MSELossFunction: MSEDerivative,
149 MAELossFunction: MAEDerivative,
150 CrossEntropyLossFunction: CrossEntropyLossDerivative,
151 }
152 activationDerivatives = {
153 relu: ReLUDerivative,
154 sigmoid: SigmoidDerivative,
155 linear: LinearDerivative,
156 tanH: TanHDerivative,
157 softMax: SoftMaxDerivative,
158 }
159 w, weightErrorTerms = _outputLayerWeightChange(lossDerivatives[lossFunction], activationDerivatives[layers[len(layers) - 1][1]], layerNodes[len(layerNodes) - 1], layerNodes[len(layerNodes) - 2], trueValues)
160 b, biasErrorTerms = _outputLayerBiasChange(lossDerivatives[lossFunction], activationDerivatives[layers[len(layers) - 1][1]], layerNodes[len(layerNodes) - 1], trueValues)
161 hiddenWeightErrorTermsForCNNBackpropgation = weightErrorTerms
162 weightGradients = [w]
163 biasGradients = [b]
164 for i in range(len(layers) - 2, 0, -1):
165 w, weightErrorTerms = _hiddenLayerWeightChange(
166 weightErrorTerms,
167 weights[i],
168 activationDerivatives[layers[i][1]],
169 layerNodes[i],
170 layerNodes[i - 1]
171 )
172 b, biasErrorTerms = _hiddenLayerBiasChange(
173 biasErrorTerms,
174 weights[i],
175 activationDerivatives[layers[i][1]],
176 layerNodes[i]
177 )
178 weightGradients.append(w)
179 biasGradients.append(b)
180 weightGradients.reverse()
181 biasGradients.reverse()
182 if(returnErrorTermForCNN == True):
183 return weightGradients, biasGradients, hiddenWeightErrorTermsForCNNBackpropgation
184 return weightGradients, biasGradients