quacknet.convulationalOptimiser
1import numpy as np 2 3class CNNoptimiser: 4 def _AdamsOptimiserWithBatches(self, inputData, labels, weights, biases, batchSize, alpha, beta1, beta2, epsilon): 5 """ 6 Performs Adam optimisation on the CNN weights and biases using mini batches. 7 8 Args: 9 inputData (ndarray): All the training data. 10 labels (ndarray): All the true labels for the training data. 11 weights (list of ndarray): Current weights of the CNN layers. 12 biases (list of ndarray): Current biases of the CNN layers. 13 batchSize (int): Size of batches. 14 alpha (float): Learning rate. 15 beta1 (float): Decay rate for the first moment. 16 beta2 (float): Decay rate for the second moment. 17 epsilon (float): Small constant to avoid division by zero. 18 19 Returns: 20 allNodes (list): List of layers for each input processed. 21 weights (list of ndarray): Updated weights after optimisation. 22 biases (list of ndarray): Updated biases after optimisation. 23 """ 24 firstMomentWeight, firstMomentBias = self._initialiseMoment(weights, biases) 25 secondMomentWeight, secondMomentBias = self._initialiseMoment(weights, biases) 26 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 27 allNodes = [] 28 for i in range(0, len(inputData), batchSize): 29 batchData = inputData[i:i+batchSize] 30 batchLabels = labels[i:i+batchSize] 31 for j in range(len(batchData)): 32 layerNodes = self.forward(batchData[j]) 33 allNodes.append(layerNodes) 34 w, b = self._backpropagation(layerNodes, batchLabels[j]) 35 weightGradients, biasGradients = self._addGradients(batchSize, weightGradients, biasGradients, w, b) 36 weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias = self._Adams(weightGradients, biasGradients, weights, biases, i + 1, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon) 37 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 38 print(f"finished batch: {(i // batchSize) + 1}/{len(inputData) // batchSize}") 39 return allNodes, weights, biases 40 41 def _AdamsOptimiserWithoutBatches(self, inputData, labels, weights, biases, alpha, beta1, beta2, epsilon): 42 """ 43 Performs Adam optimisation on the CNN weights and biases without using batches. 44 45 Args: 46 inputData (ndarray): All the training data. 47 labels (ndarray): All the true labels for the training data. 48 weights (list of ndarray): Current weights of the CNN layers. 49 biases (list of ndarray): Current biases of the CNN layers. 50 alpha (float): Learning rate. 51 beta1 (float): Decay rate for the first moment. 52 beta2 (float): Decay rate for the second moment. 53 epsilon (float): Small constant to avoid division by zero. 54 55 Returns: 56 allNodes (list): List of layers for each input processed. 57 weights (list of ndarray): Updated weights after optimisation. 58 biases (list of ndarray): Updated biases after optimisation. 59 """ 60 firstMomentWeight, firstMomentBias = self._initialiseMoment(weights, biases) 61 secondMomentWeight, secondMomentBias = self._initialiseMoment(weights, biases) 62 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 63 allNodes = [] 64 for i in range(len(inputData)): 65 layerNodes = self.forward(inputData[i]) 66 allNodes.append(layerNodes) 67 w, b = self._backpropagation(layerNodes, labels[i]) 68 weightGradients, biasGradients = self._addGradients(1, weightGradients, biasGradients, w, b) 69 weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias = self._Adams(weightGradients, biasGradients, weights, biases, i + 1, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon) 70 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 71 return allNodes, weights, biases 72 73 def _Adams(self, weightGradients, biasGradients, weights, biases, timeStamp, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon): 74 """ 75 Performs a single Adam optimisation update on weights and biases. 76 77 Args: 78 weightGradients (list of ndarray): Gradients of the weights. 79 biasGradients (list of ndarray): Gradients of the biases. 80 weights (list of ndarray): Current weights. 81 biases (list of ndarray): Current biases. 82 timeStamp (int): The current time step, used for bias correction. 83 firstMomentWeight (list of ndarray): First moment estimates for weights. 84 firstMomentBias (list of ndarray): First moment estimates for biases. 85 secondMomentWeight (list of ndarray): Second moment estimates for weights. 86 secondMomentBias (list of ndarray): Second moment estimates for biases. 87 alpha (float): Learning rate. 88 beta1 (float): Decay rate for the first moment. 89 beta2 (float): Decay rate for the second moment. 90 epsilon (float): Small constant to avoid division by zero. 91 92 Returns: 93 weights (list of ndarray): Updated weights after optimisation. 94 biases (list of ndarray): Updated biases after optimisation. 95 firstMomentWeight (list of ndarray): Updated firstMomentWeight after optimisation. 96 firstMomentBias (list of ndarray): Updated firstMomentBias after optimisation. 97 secondMomentWeight (list of ndarray): Updated secondMomentWeight after optimisation. 98 secondMomentBias (list of ndarray): Updated secondMomentBias after optimisation. 99 """ 100 for i in range(len(weights)): 101 for j in range(len(weights[i])): 102 firstMomentWeight[i][j] = beta1 * np.array(firstMomentWeight[i][j]) + (1 - beta1) * weightGradients[i][j] 103 secondMomentWeight[i][j] = beta2 * np.array(secondMomentWeight[i][j]) + (1 - beta2) * (weightGradients[i][j] ** 2) 104 105 firstMomentWeightHat = firstMomentWeight[i][j] / (1 - beta1 ** timeStamp) 106 secondMomentWeightHat = secondMomentWeight[i][j] / (1 - beta2 ** timeStamp) 107 108 weights[i][j] -= alpha * firstMomentWeightHat / (np.sqrt(secondMomentWeightHat) + epsilon) 109 110 for i in range(len(biases)): 111 for j in range(len(biases[i])): 112 firstMomentBias[i][j] = beta1 * np.array(firstMomentBias[i][j]) + (1 - beta1) * np.array(biasGradients[i][j]) 113 secondMomentBias[i][j] = beta2 * np.array(secondMomentBias[i][j]) + (1 - beta2) * (np.array(biasGradients[i][j]) ** 2) 114 115 firstMomentBiasHat = firstMomentBias[i][j] / (1 - beta1 ** timeStamp) 116 secondMomentBiasHat = secondMomentBias[i][j] / (1 - beta2 ** timeStamp) 117 118 biases[i][j] -= alpha * firstMomentBiasHat / (np.sqrt(secondMomentBiasHat) + epsilon) 119 return weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias 120 121 def _initialiseGradients(self, weights, biases): 122 """ 123 Initialise the weight and bias gradients as zero arrays with the same shape as weights and biases. 124 125 Args: 126 weights (list of ndarray): The weights of the CNN layers. 127 biases (list of ndarray): The biases of the CNN layers. 128 129 Returns: 130 weightGradients (list of ndarray): Initialised gradients for weights. 131 biasGradients (list of ndarray): Initialised gradients for biases. 132 """ 133 weightGradients, biasGradients = [], [] 134 for i in weights: 135 w = [] 136 for j in i: 137 w.append(np.zeros_like(j, dtype=np.float64)) 138 weightGradients.append(w) 139 for i in biases: 140 b = [] 141 for j in i: 142 b.append(np.zeros_like(j, dtype=np.float64)) 143 biasGradients.append(b) 144 return weightGradients, biasGradients 145 146 def _addGradients(self, batchSize, weightGradients, biasGradients, w, b): 147 """ 148 Adds gardients from a batch to the accumulated gradients. 149 150 Args: 151 batchSize (int): Number of samples in the current batch. 152 weightGradients (list of ndarray): Accumulated weight gradients. 153 biasGradients (list of ndarray): Accumulated bias gradients. 154 w (list of ndarray): Gradients of the weights from the current batch. 155 b (list of ndarray): Gradients of the biases from the current batch. 156 157 Returns: 158 weightGradients (list of ndarray): Updated accumulated weight gradients. 159 biasGradients (list of ndarray): Updated accumulated bias gradients. 160 """ 161 for i in range(len(weightGradients)): 162 for j in range(len(weightGradients[i])): 163 weightGradients[i][j] += np.array(w[i][j]) / batchSize 164 #weightGradients[i] = np.clip(weightGradients[i], -1, 1) 165 166 for i in range(len(biasGradients)): 167 for j in range(len(biasGradients[i])): 168 biasGradients[i][j] += np.array(b[i][j]) / batchSize 169 #biasGradients[i] = np.clip(biasGradients[i], -1, 1) 170 return weightGradients, biasGradients 171 172 def _initialiseMoment(self, weights, biases): 173 """ 174 Initialise the first and second moment estimates for Adam optimiser as zero arrays matching weights and biases. 175 176 Args: 177 weights (list of ndarray): The weights of the CNN layers. 178 biases (list of ndarray): The biases of the CNN layers. 179 180 Returns: 181 momentWeight (list of ndarray): Initialised moments for weights. 182 momentBias (list of ndarray): Initialised moments for biases. 183 """ 184 momentWeight = [] 185 momentBias = [] 186 for i in weights: 187 w = [] 188 for j in i: 189 w.append(np.zeros_like(j)) 190 momentWeight.append(w) 191 for i in biases: 192 b = [] 193 for j in i: 194 b.append(np.zeros_like(j)) 195 momentBias.append(b) 196 return momentWeight, momentBias 197 198
class
CNNoptimiser:
4class CNNoptimiser: 5 def _AdamsOptimiserWithBatches(self, inputData, labels, weights, biases, batchSize, alpha, beta1, beta2, epsilon): 6 """ 7 Performs Adam optimisation on the CNN weights and biases using mini batches. 8 9 Args: 10 inputData (ndarray): All the training data. 11 labels (ndarray): All the true labels for the training data. 12 weights (list of ndarray): Current weights of the CNN layers. 13 biases (list of ndarray): Current biases of the CNN layers. 14 batchSize (int): Size of batches. 15 alpha (float): Learning rate. 16 beta1 (float): Decay rate for the first moment. 17 beta2 (float): Decay rate for the second moment. 18 epsilon (float): Small constant to avoid division by zero. 19 20 Returns: 21 allNodes (list): List of layers for each input processed. 22 weights (list of ndarray): Updated weights after optimisation. 23 biases (list of ndarray): Updated biases after optimisation. 24 """ 25 firstMomentWeight, firstMomentBias = self._initialiseMoment(weights, biases) 26 secondMomentWeight, secondMomentBias = self._initialiseMoment(weights, biases) 27 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 28 allNodes = [] 29 for i in range(0, len(inputData), batchSize): 30 batchData = inputData[i:i+batchSize] 31 batchLabels = labels[i:i+batchSize] 32 for j in range(len(batchData)): 33 layerNodes = self.forward(batchData[j]) 34 allNodes.append(layerNodes) 35 w, b = self._backpropagation(layerNodes, batchLabels[j]) 36 weightGradients, biasGradients = self._addGradients(batchSize, weightGradients, biasGradients, w, b) 37 weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias = self._Adams(weightGradients, biasGradients, weights, biases, i + 1, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon) 38 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 39 print(f"finished batch: {(i // batchSize) + 1}/{len(inputData) // batchSize}") 40 return allNodes, weights, biases 41 42 def _AdamsOptimiserWithoutBatches(self, inputData, labels, weights, biases, alpha, beta1, beta2, epsilon): 43 """ 44 Performs Adam optimisation on the CNN weights and biases without using batches. 45 46 Args: 47 inputData (ndarray): All the training data. 48 labels (ndarray): All the true labels for the training data. 49 weights (list of ndarray): Current weights of the CNN layers. 50 biases (list of ndarray): Current biases of the CNN layers. 51 alpha (float): Learning rate. 52 beta1 (float): Decay rate for the first moment. 53 beta2 (float): Decay rate for the second moment. 54 epsilon (float): Small constant to avoid division by zero. 55 56 Returns: 57 allNodes (list): List of layers for each input processed. 58 weights (list of ndarray): Updated weights after optimisation. 59 biases (list of ndarray): Updated biases after optimisation. 60 """ 61 firstMomentWeight, firstMomentBias = self._initialiseMoment(weights, biases) 62 secondMomentWeight, secondMomentBias = self._initialiseMoment(weights, biases) 63 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 64 allNodes = [] 65 for i in range(len(inputData)): 66 layerNodes = self.forward(inputData[i]) 67 allNodes.append(layerNodes) 68 w, b = self._backpropagation(layerNodes, labels[i]) 69 weightGradients, biasGradients = self._addGradients(1, weightGradients, biasGradients, w, b) 70 weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias = self._Adams(weightGradients, biasGradients, weights, biases, i + 1, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon) 71 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 72 return allNodes, weights, biases 73 74 def _Adams(self, weightGradients, biasGradients, weights, biases, timeStamp, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias, alpha, beta1, beta2, epsilon): 75 """ 76 Performs a single Adam optimisation update on weights and biases. 77 78 Args: 79 weightGradients (list of ndarray): Gradients of the weights. 80 biasGradients (list of ndarray): Gradients of the biases. 81 weights (list of ndarray): Current weights. 82 biases (list of ndarray): Current biases. 83 timeStamp (int): The current time step, used for bias correction. 84 firstMomentWeight (list of ndarray): First moment estimates for weights. 85 firstMomentBias (list of ndarray): First moment estimates for biases. 86 secondMomentWeight (list of ndarray): Second moment estimates for weights. 87 secondMomentBias (list of ndarray): Second moment estimates for biases. 88 alpha (float): Learning rate. 89 beta1 (float): Decay rate for the first moment. 90 beta2 (float): Decay rate for the second moment. 91 epsilon (float): Small constant to avoid division by zero. 92 93 Returns: 94 weights (list of ndarray): Updated weights after optimisation. 95 biases (list of ndarray): Updated biases after optimisation. 96 firstMomentWeight (list of ndarray): Updated firstMomentWeight after optimisation. 97 firstMomentBias (list of ndarray): Updated firstMomentBias after optimisation. 98 secondMomentWeight (list of ndarray): Updated secondMomentWeight after optimisation. 99 secondMomentBias (list of ndarray): Updated secondMomentBias after optimisation. 100 """ 101 for i in range(len(weights)): 102 for j in range(len(weights[i])): 103 firstMomentWeight[i][j] = beta1 * np.array(firstMomentWeight[i][j]) + (1 - beta1) * weightGradients[i][j] 104 secondMomentWeight[i][j] = beta2 * np.array(secondMomentWeight[i][j]) + (1 - beta2) * (weightGradients[i][j] ** 2) 105 106 firstMomentWeightHat = firstMomentWeight[i][j] / (1 - beta1 ** timeStamp) 107 secondMomentWeightHat = secondMomentWeight[i][j] / (1 - beta2 ** timeStamp) 108 109 weights[i][j] -= alpha * firstMomentWeightHat / (np.sqrt(secondMomentWeightHat) + epsilon) 110 111 for i in range(len(biases)): 112 for j in range(len(biases[i])): 113 firstMomentBias[i][j] = beta1 * np.array(firstMomentBias[i][j]) + (1 - beta1) * np.array(biasGradients[i][j]) 114 secondMomentBias[i][j] = beta2 * np.array(secondMomentBias[i][j]) + (1 - beta2) * (np.array(biasGradients[i][j]) ** 2) 115 116 firstMomentBiasHat = firstMomentBias[i][j] / (1 - beta1 ** timeStamp) 117 secondMomentBiasHat = secondMomentBias[i][j] / (1 - beta2 ** timeStamp) 118 119 biases[i][j] -= alpha * firstMomentBiasHat / (np.sqrt(secondMomentBiasHat) + epsilon) 120 return weights, biases, firstMomentWeight, firstMomentBias, secondMomentWeight, secondMomentBias 121 122 def _initialiseGradients(self, weights, biases): 123 """ 124 Initialise the weight and bias gradients as zero arrays with the same shape as weights and biases. 125 126 Args: 127 weights (list of ndarray): The weights of the CNN layers. 128 biases (list of ndarray): The biases of the CNN layers. 129 130 Returns: 131 weightGradients (list of ndarray): Initialised gradients for weights. 132 biasGradients (list of ndarray): Initialised gradients for biases. 133 """ 134 weightGradients, biasGradients = [], [] 135 for i in weights: 136 w = [] 137 for j in i: 138 w.append(np.zeros_like(j, dtype=np.float64)) 139 weightGradients.append(w) 140 for i in biases: 141 b = [] 142 for j in i: 143 b.append(np.zeros_like(j, dtype=np.float64)) 144 biasGradients.append(b) 145 return weightGradients, biasGradients 146 147 def _addGradients(self, batchSize, weightGradients, biasGradients, w, b): 148 """ 149 Adds gardients from a batch to the accumulated gradients. 150 151 Args: 152 batchSize (int): Number of samples in the current batch. 153 weightGradients (list of ndarray): Accumulated weight gradients. 154 biasGradients (list of ndarray): Accumulated bias gradients. 155 w (list of ndarray): Gradients of the weights from the current batch. 156 b (list of ndarray): Gradients of the biases from the current batch. 157 158 Returns: 159 weightGradients (list of ndarray): Updated accumulated weight gradients. 160 biasGradients (list of ndarray): Updated accumulated bias gradients. 161 """ 162 for i in range(len(weightGradients)): 163 for j in range(len(weightGradients[i])): 164 weightGradients[i][j] += np.array(w[i][j]) / batchSize 165 #weightGradients[i] = np.clip(weightGradients[i], -1, 1) 166 167 for i in range(len(biasGradients)): 168 for j in range(len(biasGradients[i])): 169 biasGradients[i][j] += np.array(b[i][j]) / batchSize 170 #biasGradients[i] = np.clip(biasGradients[i], -1, 1) 171 return weightGradients, biasGradients 172 173 def _initialiseMoment(self, weights, biases): 174 """ 175 Initialise the first and second moment estimates for Adam optimiser as zero arrays matching weights and biases. 176 177 Args: 178 weights (list of ndarray): The weights of the CNN layers. 179 biases (list of ndarray): The biases of the CNN layers. 180 181 Returns: 182 momentWeight (list of ndarray): Initialised moments for weights. 183 momentBias (list of ndarray): Initialised moments for biases. 184 """ 185 momentWeight = [] 186 momentBias = [] 187 for i in weights: 188 w = [] 189 for j in i: 190 w.append(np.zeros_like(j)) 191 momentWeight.append(w) 192 for i in biases: 193 b = [] 194 for j in i: 195 b.append(np.zeros_like(j)) 196 momentBias.append(b) 197 return momentWeight, momentBias