quacknet.convulationalOptimiser

  1import numpy as np
  2
  3class CNNoptimiser:
  4    def _AdamsOptimiserWithBatches(self, inputData, labels, weights, biases, batchSize, alpha, beta1, beta2, epsilon):
  5        """
  6        Performs Adam optimisation on the CNN weights and biases using mini batches.
  7
  8        Args:
  9            inputData (ndarray): All the training data.
 10            labels (ndarray): All the true labels for the training data.
 11            weights (list of ndarray): Current weights of the CNN layers.
 12            biases (list of ndarray): Current biases of the CNN layers.
 13            batchSize (int): Size of batches.
 14            alpha (float): Learning rate.
 15            beta1 (float): Decay rate for the first moment.
 16            beta2 (float): Decay rate for the second moment. 
 17            epsilon (float): Small constant to avoid division by zero.
 18        
 19        Returns: 
 20            allNodes (list): List of layers for each input processed.
 21            weights (list of ndarray): Updated weights after optimisation.
 22            biases (list of ndarray): Updated biases after optimisation.
 23        """
 24        firstMomentWeight, firstMomentBias = self._initialiseMoment(weights, biases)
 25        secondMomentWeight, secondMomentBias = self._initialiseMoment(weights, biases)
 26        weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 27        allNodes = []
 28        for i in range(0, len(inputData), batchSize):
 29            batchData = inputData[i:i+batchSize]
 30            batchLabels = labels[i:i+batchSize]
 31            for j in range(len(batchData)):
 32                layerNodes = self.forward(batchData[j])
 33                allNodes.append(layerNodes)
 34                w, b = self._backpropagation(layerNodes, batchLabels[j])
 35                weightGradients, biasGradients = self._addGradients(batchSize, weightGradients, biasGradients, w, b)
 36            weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias = self._Adams(weightGradients, biasGradients, weights, biases, i + 1, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon)
 37            weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 38            print(f"finished batch: {(i // batchSize) + 1}/{len(inputData) // batchSize}")
 39        return allNodes, weights, biases
 40
 41    def _AdamsOptimiserWithoutBatches(self, inputData, labels, weights, biases, alpha, beta1, beta2, epsilon):
 42        """
 43        Performs Adam optimisation on the CNN weights and biases without using batches.
 44
 45        Args:
 46            inputData (ndarray): All the training data.
 47            labels (ndarray): All the true labels for the training data.
 48            weights (list of ndarray): Current weights of the CNN layers.
 49            biases (list of ndarray): Current biases of the CNN layers.
 50            alpha (float): Learning rate.
 51            beta1 (float): Decay rate for the first moment.
 52            beta2 (float): Decay rate for the second moment. 
 53            epsilon (float): Small constant to avoid division by zero.
 54        
 55        Returns: 
 56            allNodes (list): List of layers for each input processed.
 57            weights (list of ndarray): Updated weights after optimisation.
 58            biases (list of ndarray): Updated biases after optimisation.
 59        """
 60        firstMomentWeight, firstMomentBias = self._initialiseMoment(weights, biases)
 61        secondMomentWeight, secondMomentBias = self._initialiseMoment(weights, biases)
 62        weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 63        allNodes = []
 64        for i in range(len(inputData)):
 65            layerNodes = self.forward(inputData[i])
 66            allNodes.append(layerNodes)
 67            w, b = self._backpropagation(layerNodes, labels[i])
 68            weightGradients, biasGradients = self._addGradients(1, weightGradients, biasGradients, w, b)
 69            weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias = self._Adams(weightGradients, biasGradients, weights, biases, i + 1, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon)
 70            weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 71        return allNodes, weights, biases
 72
 73    def _Adams(self, weightGradients, biasGradients, weights, biases, timeStamp, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon):
 74        """
 75        Performs a single Adam optimisation update on weights and biases.
 76
 77        Args:
 78            weightGradients (list of ndarray): Gradients of the weights.
 79            biasGradients (list of ndarray): Gradients of the biases.
 80            weights (list of ndarray): Current weights.
 81            biases (list of ndarray): Current biases.
 82            timeStamp (int): The current time step, used for bias correction.
 83            firstMomentWeight (list of ndarray): First moment estimates for weights.
 84            firstMomentBias (list of ndarray): First moment estimates for biases.
 85            secondMomentWeight (list of ndarray): Second moment estimates for weights.
 86            secondMomentBias (list of ndarray): Second moment estimates for biases.
 87            alpha (float): Learning rate.
 88            beta1 (float): Decay rate for the first moment.
 89            beta2 (float): Decay rate for the second moment. 
 90            epsilon (float): Small constant to avoid division by zero.
 91        
 92        Returns: 
 93            weights (list of ndarray): Updated weights after optimisation.
 94            biases (list of ndarray): Updated biases after optimisation.
 95            firstMomentWeight (list of ndarray): Updated firstMomentWeight after optimisation.
 96            firstMomentBias (list of ndarray): Updated firstMomentBias after optimisation.
 97            secondMomentWeight (list of ndarray): Updated secondMomentWeight after optimisation.
 98            secondMomentBias (list of ndarray): Updated secondMomentBias after optimisation.
 99        """
100        for i in range(len(weights)):
101            for j in range(len(weights[i])):
102                firstMomentWeight[i][j] = beta1 * np.array(firstMomentWeight[i][j]) + (1 - beta1) * weightGradients[i][j]
103                secondMomentWeight[i][j] = beta2 * np.array(secondMomentWeight[i][j]) + (1 - beta2) * (weightGradients[i][j] ** 2)
104
105                firstMomentWeightHat = firstMomentWeight[i][j] / (1 - beta1 ** timeStamp)
106                secondMomentWeightHat = secondMomentWeight[i][j] / (1 - beta2 ** timeStamp)
107
108                weights[i][j] -= alpha * firstMomentWeightHat / (np.sqrt(secondMomentWeightHat) + epsilon)
109            
110        for i in range(len(biases)):
111            for j in range(len(biases[i])):
112                firstMomentBias[i][j] = beta1 * np.array(firstMomentBias[i][j]) + (1 - beta1) * np.array(biasGradients[i][j])
113                secondMomentBias[i][j] = beta2 * np.array(secondMomentBias[i][j]) + (1 - beta2) * (np.array(biasGradients[i][j]) ** 2)
114
115                firstMomentBiasHat = firstMomentBias[i][j] / (1 - beta1 ** timeStamp)
116                secondMomentBiasHat = secondMomentBias[i][j] / (1 - beta2 ** timeStamp)
117
118                biases[i][j] -= alpha * firstMomentBiasHat / (np.sqrt(secondMomentBiasHat) + epsilon)
119        return weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias
120
121    def _initialiseGradients(self, weights, biases):
122        """
123        Initialise the weight and bias gradients as zero arrays with the same shape as weights and biases.
124
125        Args:
126            weights (list of ndarray): The weights of the CNN layers.
127            biases (list of ndarray): The biases of the CNN layers.
128
129        Returns:
130            weightGradients (list of ndarray): Initialised gradients for weights.
131            biasGradients (list of ndarray): Initialised gradients for biases.
132        """
133        weightGradients, biasGradients = [], []
134        for i in weights:
135            w = []
136            for j in i:
137                w.append(np.zeros_like(j, dtype=np.float64))
138            weightGradients.append(w)
139        for i in biases:
140            b = []
141            for j in i:
142                b.append(np.zeros_like(j, dtype=np.float64))
143            biasGradients.append(b)
144        return weightGradients, biasGradients
145
146    def _addGradients(self, batchSize, weightGradients, biasGradients, w, b):
147        """
148        Adds gardients from a batch to the accumulated gradients.
149
150        Args:
151            batchSize (int): Number of samples in the current batch.
152            weightGradients (list of ndarray): Accumulated weight gradients.
153            biasGradients (list of ndarray): Accumulated bias gradients. 
154            w (list of ndarray): Gradients of the weights from the current batch.
155            b (list of ndarray): Gradients of the biases from the current batch.
156        
157        Returns:
158            weightGradients (list of ndarray): Updated accumulated weight gradients.
159            biasGradients (list of ndarray): Updated accumulated bias gradients. 
160        """
161        for i in range(len(weightGradients)):
162            for j in range(len(weightGradients[i])):
163                weightGradients[i][j] += np.array(w[i][j]) / batchSize
164            #weightGradients[i] = np.clip(weightGradients[i], -1, 1)
165
166        for i in range(len(biasGradients)):
167            for j in range(len(biasGradients[i])):
168                biasGradients[i][j] += np.array(b[i][j]) / batchSize
169            #biasGradients[i] = np.clip(biasGradients[i], -1, 1)
170        return weightGradients, biasGradients
171
172    def _initialiseMoment(self, weights, biases):
173        """
174        Initialise the first and second moment estimates for Adam optimiser as zero arrays matching weights and biases.
175
176        Args:
177            weights (list of ndarray): The weights of the CNN layers.
178            biases (list of ndarray): The biases of the CNN layers.
179
180        Returns:
181            momentWeight (list of ndarray): Initialised moments for weights.
182            momentBias (list of ndarray): Initialised moments for biases.
183        """
184        momentWeight = []
185        momentBias = []
186        for i in weights:
187            w = []
188            for j in i:
189                w.append(np.zeros_like(j))
190            momentWeight.append(w)
191        for i in biases:
192            b = []
193            for j in i:
194                b.append(np.zeros_like(j))
195            momentBias.append(b)
196        return momentWeight, momentBias
197
198    
class CNNoptimiser:
  4class CNNoptimiser:
  5    def _AdamsOptimiserWithBatches(self, inputData, labels, weights, biases, batchSize, alpha, beta1, beta2, epsilon):
  6        """
  7        Performs Adam optimisation on the CNN weights and biases using mini batches.
  8
  9        Args:
 10            inputData (ndarray): All the training data.
 11            labels (ndarray): All the true labels for the training data.
 12            weights (list of ndarray): Current weights of the CNN layers.
 13            biases (list of ndarray): Current biases of the CNN layers.
 14            batchSize (int): Size of batches.
 15            alpha (float): Learning rate.
 16            beta1 (float): Decay rate for the first moment.
 17            beta2 (float): Decay rate for the second moment. 
 18            epsilon (float): Small constant to avoid division by zero.
 19        
 20        Returns: 
 21            allNodes (list): List of layers for each input processed.
 22            weights (list of ndarray): Updated weights after optimisation.
 23            biases (list of ndarray): Updated biases after optimisation.
 24        """
 25        firstMomentWeight, firstMomentBias = self._initialiseMoment(weights, biases)
 26        secondMomentWeight, secondMomentBias = self._initialiseMoment(weights, biases)
 27        weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 28        allNodes = []
 29        for i in range(0, len(inputData), batchSize):
 30            batchData = inputData[i:i+batchSize]
 31            batchLabels = labels[i:i+batchSize]
 32            for j in range(len(batchData)):
 33                layerNodes = self.forward(batchData[j])
 34                allNodes.append(layerNodes)
 35                w, b = self._backpropagation(layerNodes, batchLabels[j])
 36                weightGradients, biasGradients = self._addGradients(batchSize, weightGradients, biasGradients, w, b)
 37            weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias = self._Adams(weightGradients, biasGradients, weights, biases, i + 1, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon)
 38            weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 39            print(f"finished batch: {(i // batchSize) + 1}/{len(inputData) // batchSize}")
 40        return allNodes, weights, biases
 41
 42    def _AdamsOptimiserWithoutBatches(self, inputData, labels, weights, biases, alpha, beta1, beta2, epsilon):
 43        """
 44        Performs Adam optimisation on the CNN weights and biases without using batches.
 45
 46        Args:
 47            inputData (ndarray): All the training data.
 48            labels (ndarray): All the true labels for the training data.
 49            weights (list of ndarray): Current weights of the CNN layers.
 50            biases (list of ndarray): Current biases of the CNN layers.
 51            alpha (float): Learning rate.
 52            beta1 (float): Decay rate for the first moment.
 53            beta2 (float): Decay rate for the second moment. 
 54            epsilon (float): Small constant to avoid division by zero.
 55        
 56        Returns: 
 57            allNodes (list): List of layers for each input processed.
 58            weights (list of ndarray): Updated weights after optimisation.
 59            biases (list of ndarray): Updated biases after optimisation.
 60        """
 61        firstMomentWeight, firstMomentBias = self._initialiseMoment(weights, biases)
 62        secondMomentWeight, secondMomentBias = self._initialiseMoment(weights, biases)
 63        weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 64        allNodes = []
 65        for i in range(len(inputData)):
 66            layerNodes = self.forward(inputData[i])
 67            allNodes.append(layerNodes)
 68            w, b = self._backpropagation(layerNodes, labels[i])
 69            weightGradients, biasGradients = self._addGradients(1, weightGradients, biasGradients, w, b)
 70            weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias = self._Adams(weightGradients, biasGradients, weights, biases, i + 1, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon)
 71            weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 72        return allNodes, weights, biases
 73
 74    def _Adams(self, weightGradients, biasGradients, weights, biases, timeStamp, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon):
 75        """
 76        Performs a single Adam optimisation update on weights and biases.
 77
 78        Args:
 79            weightGradients (list of ndarray): Gradients of the weights.
 80            biasGradients (list of ndarray): Gradients of the biases.
 81            weights (list of ndarray): Current weights.
 82            biases (list of ndarray): Current biases.
 83            timeStamp (int): The current time step, used for bias correction.
 84            firstMomentWeight (list of ndarray): First moment estimates for weights.
 85            firstMomentBias (list of ndarray): First moment estimates for biases.
 86            secondMomentWeight (list of ndarray): Second moment estimates for weights.
 87            secondMomentBias (list of ndarray): Second moment estimates for biases.
 88            alpha (float): Learning rate.
 89            beta1 (float): Decay rate for the first moment.
 90            beta2 (float): Decay rate for the second moment. 
 91            epsilon (float): Small constant to avoid division by zero.
 92        
 93        Returns: 
 94            weights (list of ndarray): Updated weights after optimisation.
 95            biases (list of ndarray): Updated biases after optimisation.
 96            firstMomentWeight (list of ndarray): Updated firstMomentWeight after optimisation.
 97            firstMomentBias (list of ndarray): Updated firstMomentBias after optimisation.
 98            secondMomentWeight (list of ndarray): Updated secondMomentWeight after optimisation.
 99            secondMomentBias (list of ndarray): Updated secondMomentBias after optimisation.
100        """
101        for i in range(len(weights)):
102            for j in range(len(weights[i])):
103                firstMomentWeight[i][j] = beta1 * np.array(firstMomentWeight[i][j]) + (1 - beta1) * weightGradients[i][j]
104                secondMomentWeight[i][j] = beta2 * np.array(secondMomentWeight[i][j]) + (1 - beta2) * (weightGradients[i][j] ** 2)
105
106                firstMomentWeightHat = firstMomentWeight[i][j] / (1 - beta1 ** timeStamp)
107                secondMomentWeightHat = secondMomentWeight[i][j] / (1 - beta2 ** timeStamp)
108
109                weights[i][j] -= alpha * firstMomentWeightHat / (np.sqrt(secondMomentWeightHat) + epsilon)
110            
111        for i in range(len(biases)):
112            for j in range(len(biases[i])):
113                firstMomentBias[i][j] = beta1 * np.array(firstMomentBias[i][j]) + (1 - beta1) * np.array(biasGradients[i][j])
114                secondMomentBias[i][j] = beta2 * np.array(secondMomentBias[i][j]) + (1 - beta2) * (np.array(biasGradients[i][j]) ** 2)
115
116                firstMomentBiasHat = firstMomentBias[i][j] / (1 - beta1 ** timeStamp)
117                secondMomentBiasHat = secondMomentBias[i][j] / (1 - beta2 ** timeStamp)
118
119                biases[i][j] -= alpha * firstMomentBiasHat / (np.sqrt(secondMomentBiasHat) + epsilon)
120        return weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias
121
122    def _initialiseGradients(self, weights, biases):
123        """
124        Initialise the weight and bias gradients as zero arrays with the same shape as weights and biases.
125
126        Args:
127            weights (list of ndarray): The weights of the CNN layers.
128            biases (list of ndarray): The biases of the CNN layers.
129
130        Returns:
131            weightGradients (list of ndarray): Initialised gradients for weights.
132            biasGradients (list of ndarray): Initialised gradients for biases.
133        """
134        weightGradients, biasGradients = [], []
135        for i in weights:
136            w = []
137            for j in i:
138                w.append(np.zeros_like(j, dtype=np.float64))
139            weightGradients.append(w)
140        for i in biases:
141            b = []
142            for j in i:
143                b.append(np.zeros_like(j, dtype=np.float64))
144            biasGradients.append(b)
145        return weightGradients, biasGradients
146
147    def _addGradients(self, batchSize, weightGradients, biasGradients, w, b):
148        """
149        Adds gardients from a batch to the accumulated gradients.
150
151        Args:
152            batchSize (int): Number of samples in the current batch.
153            weightGradients (list of ndarray): Accumulated weight gradients.
154            biasGradients (list of ndarray): Accumulated bias gradients. 
155            w (list of ndarray): Gradients of the weights from the current batch.
156            b (list of ndarray): Gradients of the biases from the current batch.
157        
158        Returns:
159            weightGradients (list of ndarray): Updated accumulated weight gradients.
160            biasGradients (list of ndarray): Updated accumulated bias gradients. 
161        """
162        for i in range(len(weightGradients)):
163            for j in range(len(weightGradients[i])):
164                weightGradients[i][j] += np.array(w[i][j]) / batchSize
165            #weightGradients[i] = np.clip(weightGradients[i], -1, 1)
166
167        for i in range(len(biasGradients)):
168            for j in range(len(biasGradients[i])):
169                biasGradients[i][j] += np.array(b[i][j]) / batchSize
170            #biasGradients[i] = np.clip(biasGradients[i], -1, 1)
171        return weightGradients, biasGradients
172
173    def _initialiseMoment(self, weights, biases):
174        """
175        Initialise the first and second moment estimates for Adam optimiser as zero arrays matching weights and biases.
176
177        Args:
178            weights (list of ndarray): The weights of the CNN layers.
179            biases (list of ndarray): The biases of the CNN layers.
180
181        Returns:
182            momentWeight (list of ndarray): Initialised moments for weights.
183            momentBias (list of ndarray): Initialised moments for biases.
184        """
185        momentWeight = []
186        momentBias = []
187        for i in weights:
188            w = []
189            for j in i:
190                w.append(np.zeros_like(j))
191            momentWeight.append(w)
192        for i in biases:
193            b = []
194            for j in i:
195                b.append(np.zeros_like(j))
196            momentBias.append(b)
197        return momentWeight, momentBias