quacknet.optimisers
1import numpy as np 2 3class Optimisers: 4 def _trainGradientDescent(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, _): 5 """ 6 Trains a model using gradient descent. 7 8 Args: 9 inputData (ndarray): All the training data. 10 labels (ndarray): All the true labels for the training data. 11 epochs (int): Number of training iterations over the dataset. 12 weights (list of ndarray): Current weights of the model. 13 biases (list of ndarray): Current biases of the model. 14 momentumCoefficient (float): Coefficient for momentum. 15 momentumDecay (float): Decay factor for the momentum coefficient. 16 useMomentum (bool): Whether to use momentum. 17 velocityWeight (list of ndarray): Velocity terms for weights, if using momentum. 18 velocityBias (list of ndarray): Velocity terms for biases, if using momentum. 19 learningRate (float): The learning rate for optimisation. 20 21 Returns: 22 l (list): Output of the network for each epoch. 23 weights (list of ndarray): Updated weights after training. 24 biases (list of ndarray): Updated biases after training. 25 velocityWeight (list of ndarray): Updated velocity for weights. 26 velocityBias (list of ndarray): Updated velocity for biases. 27 """ 28 l = [] 29 if(useMomentum == True): 30 self.initialiseVelocity() 31 for _ in range(epochs): 32 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 33 for data in range(len(inputData)): 34 layerNodes = self.forwardPropagation(inputData[data]) 35 l.append(layerNodes[len(layerNodes) - 1]) 36 w, b = self._backPropgation(layerNodes, weights, biases, labels[data]) 37 velocityWeight, velocityBias = self._addGradients(weightGradients, biasGradients, w, b) 38 weights, biases, velocityWeight, velocityBias = self._updateWeightsBiases(len(inputData), weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate) 39 momentumCoefficient *= momentumDecay 40 return l, weights, biases, velocityWeight, velocityBias 41 42 def _trainStochasticGradientDescent(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, _): 43 """ 44 Trains a model using stochastic gradient descent (SGD). 45 46 Args: 47 inputData (ndarray): All the training data. 48 labels (ndarray): All the true labels for the training data. 49 epochs (int): Number of training iterations over the dataset. 50 weights (list of ndarray): Current weights of the model. 51 biases (list of ndarray): Current biases of the model. 52 momentumCoefficient (float): Coefficient for momentum. 53 momentumDecay (float): Decay factor for the momentum coefficient. 54 useMomentum (bool): Whether to use momentum. 55 velocityWeight (list of ndarray): Velocity terms for weights, if using momentum. 56 velocityBias (list of ndarray): Velocity terms for biases, if using momentum. 57 learningRate (float): The learning rate for optimisation. 58 59 Returns: 60 l (list): Output of the network for each epoch. 61 weights (list of ndarray): Updated weights after training. 62 biases (list of ndarray): Updated biases after training. 63 velocityWeight (list of ndarray): Updated velocity for weights. 64 velocityBias (list of ndarray): Updated velocity for biases. 65 """ 66 l = [] 67 if(useMomentum == True): 68 self.initialiseVelocity() 69 for _ in range(epochs): 70 for data in range(len(inputData)): 71 layerNodes = self.forwardPropagation(inputData[data]) 72 l.append(layerNodes) 73 w, b = self._backPropgation(layerNodes, weights, biases, labels[data]) 74 if(useMomentum == True): 75 velocityWeight = momentumCoefficient * velocityWeight - learningRate * w 76 weights += velocityWeight 77 velocityBias = momentumCoefficient * velocityBias - learningRate * b 78 biases += velocityBias 79 else: 80 for i in range(len(weights)): 81 weights[i] -= learningRate * w[i] 82 for i in range(len(biases)): 83 biases[i] -= learningRate * b[i] 84 85 momentumCoefficient *= momentumDecay 86 return l, weights, biases, self.velocityWeight, self.velocityBias 87 88 def _trainGradientDescentUsingBatching(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, batchSize): 89 """ 90 Trains a model using gradient descent. 91 92 Args: 93 inputData (ndarray): All the training data. 94 labels (ndarray): All the true labels for the training data. 95 epochs (int): Number of training iterations over the dataset. 96 weights (list of ndarray): Current weights of the model. 97 biases (list of ndarray): Current biases of the model. 98 momentumCoefficient (float): Coefficient for momentum. 99 momentumDecay (float): Decay factor for the momentum coefficient. 100 useMomentum (bool): Whether to use momentum. 101 velocityWeight (list of ndarray): Velocity terms for weights, if using momentum. 102 velocityBias (list of ndarray): Velocity terms for biases, if using momentum. 103 learningRate (float): The learning rate for optimisation. 104 batchSize (int): The size of each mini batch 105 106 Returns: 107 l (list): Output of the network for each epoch. 108 weights (list of ndarray): Updated weights after training. 109 biases (list of ndarray): Updated biases after training. 110 velocityWeight (list of ndarray): Updated velocity for weights. 111 velocityBias (list of ndarray): Updated velocity for biases. 112 """ 113 l = [] 114 if(useMomentum == True): 115 velocityWeight, velocityBias = self.initialiseVelocity(velocityWeight, velocityBias, weights, biases) 116 for _ in range(epochs): 117 for i in range(0, len(inputData), batchSize): 118 batchData = inputData[i:i+batchSize] 119 batchLabels = labels[i:i+batchSize] 120 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 121 for j in range(len(batchData)): 122 layerNodes = self.forwardPropagation(batchData[j]) 123 l.append(layerNodes) 124 w, b = self._backPropgation(layerNodes, weights, biases, batchLabels[j]) 125 weightGradients, biasGradients = self._addGradients(weightGradients, biasGradients, w, b) 126 weights, biases, velocityWeight, velocityBias = self._updateWeightsBiases(batchSize, weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate) 127 momentumCoefficient *= momentumDecay 128 return l, weights, biases, velocityWeight, velocityBias 129 130 def _initialiseVelocity(self, velocityWeight, velocityBias, weights, biases): 131 """ 132 Initialise velocity terms for momentum optimisation. 133 134 Args: 135 velocityWeight (list of ndarray): Velocity terms for weights. 136 velocityBias (list of ndarray): Velocity terms for biases. 137 weights (list of ndarray): The weights of the model. 138 biases (list of ndarray): The biases of the model. 139 140 Returns: 141 velocityWeight (list of ndarray): Initialised velocity for weights. 142 velocityBias (list of ndarray): Initialised velocity for biases. 143 """ 144 if(velocityWeight == None): 145 velocityWeight = [] 146 for i in weights: 147 velocityWeight.append(np.zeros_like(i)) 148 if(velocityBias == None): 149 velocityBias = [] 150 for i in biases: 151 velocityBias.append(np.zeros_like(i)) 152 return velocityWeight, velocityBias 153 154 def _initialiseGradients(self, weights, biases): 155 """ 156 Initialise gradients for weights and biases. 157 158 Args: 159 weights (list of ndarray): The weights of the model. 160 biases (list of ndarray): The biases of the model. 161 162 Returns: 163 weightGradients (list of ndarray): Initialised gradients for weights. 164 biasGradients (list of ndarray): Initialised gradients for biases. 165 """ 166 weightGradients, biasGradients = [], [] 167 for i in weights: 168 weightGradients.append(np.zeros_like(i)) 169 for i in biases: 170 biasGradients.append(np.zeros_like(i)) 171 return weightGradients, biasGradients 172 173 def _addGradients(self, weightGradients, biasGradients, w, b): 174 """ 175 Accumulates gradients for weights and biases. 176 177 Args: 178 weightGradients (list of ndarray): Accumulated weight gradients. 179 biasGradients (list of ndarray): Accumulated bias gradients. 180 w (list of ndarray): Gradients of the weights from the current batch. 181 b (list of ndarray): Gradients of the biases from the current batch. 182 183 Returns: 184 weightGradients (list of ndarray): Updated accumulated weight gradients. 185 biasGradients (list of ndarray): Updated accumulated bias gradients. 186 """ 187 for i in range(len(weightGradients)): 188 weightGradients[i] += w[i] 189 weightGradients[i] = np.clip(weightGradients[i], -1, 1) 190 for i in range(len(biasGradients)): 191 biasGradients[i] += b[i].T 192 biasGradients[i] = np.clip(biasGradients[i], -1, 1) 193 return weightGradients, biasGradients 194 195 def _updateWeightsBiases(self, size, weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate): 196 """ 197 Updates the weights and biases of the model. 198 199 Args: 200 size (int): Number of samples in the batch. 201 weights (list of ndarray): Current weights of the model. 202 biases (list of ndarray): Current biases of the model. 203 weightGradients (list of ndarray): Weight gradients. 204 biasGradients (list of ndarray): Bias gradients. 205 velocityWeight (list of ndarray): Velocity terms for weights, if using momentum. 206 velocityBias (list of ndarray): Velocity terms for biases, if using momentum. 207 useMomentum (bool): Whether to use momentum. 208 momentumCoefficient (float): Coefficient for momentum. 209 learningRate (float): The learning rate for optimisation. 210 211 Returns: 212 weights (list of ndarray): Updated weights after training. 213 biases (list of ndarray): Updated biases after training. 214 velocityWeight (list of ndarray): Updated velocity for weights. 215 velocityBias (list of ndarray): Updated velocity for biases. 216 """ 217 if(useMomentum == True): 218 for i in range(len(weights)): 219 velocityWeight[i] -= momentumCoefficient * velocityWeight[i] - learningRate * (weightGradients[i] / size) 220 weights[i] += velocityWeight[i] 221 for i in range(len(biases)): 222 velocityBias[i] = momentumCoefficient * velocityBias[i] - learningRate * (biasGradients[i] / size) 223 biases[i] += velocityBias[i] 224 else: 225 for i in range(len(weights)): 226 weights[i] = weights[i] - learningRate * (weightGradients[i] / size) 227 for i in range(len(biases)): 228 biases[i] -= learningRate * (biasGradients[i] / size) 229 return weights, biases, velocityWeight, velocityBias
class
Optimisers:
4class Optimisers: 5 def _trainGradientDescent(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, _): 6 """ 7 Trains a model using gradient descent. 8 9 Args: 10 inputData (ndarray): All the training data. 11 labels (ndarray): All the true labels for the training data. 12 epochs (int): Number of training iterations over the dataset. 13 weights (list of ndarray): Current weights of the model. 14 biases (list of ndarray): Current biases of the model. 15 momentumCoefficient (float): Coefficient for momentum. 16 momentumDecay (float): Decay factor for the momentum coefficient. 17 useMomentum (bool): Whether to use momentum. 18 velocityWeight (list of ndarray): Velocity terms for weights, if using momentum. 19 velocityBias (list of ndarray): Velocity terms for biases, if using momentum. 20 learningRate (float): The learning rate for optimisation. 21 22 Returns: 23 l (list): Output of the network for each epoch. 24 weights (list of ndarray): Updated weights after training. 25 biases (list of ndarray): Updated biases after training. 26 velocityWeight (list of ndarray): Updated velocity for weights. 27 velocityBias (list of ndarray): Updated velocity for biases. 28 """ 29 l = [] 30 if(useMomentum == True): 31 self.initialiseVelocity() 32 for _ in range(epochs): 33 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 34 for data in range(len(inputData)): 35 layerNodes = self.forwardPropagation(inputData[data]) 36 l.append(layerNodes[len(layerNodes) - 1]) 37 w, b = self._backPropgation(layerNodes, weights, biases, labels[data]) 38 velocityWeight, velocityBias = self._addGradients(weightGradients, biasGradients, w, b) 39 weights, biases, velocityWeight, velocityBias = self._updateWeightsBiases(len(inputData), weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate) 40 momentumCoefficient *= momentumDecay 41 return l, weights, biases, velocityWeight, velocityBias 42 43 def _trainStochasticGradientDescent(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, _): 44 """ 45 Trains a model using stochastic gradient descent (SGD). 46 47 Args: 48 inputData (ndarray): All the training data. 49 labels (ndarray): All the true labels for the training data. 50 epochs (int): Number of training iterations over the dataset. 51 weights (list of ndarray): Current weights of the model. 52 biases (list of ndarray): Current biases of the model. 53 momentumCoefficient (float): Coefficient for momentum. 54 momentumDecay (float): Decay factor for the momentum coefficient. 55 useMomentum (bool): Whether to use momentum. 56 velocityWeight (list of ndarray): Velocity terms for weights, if using momentum. 57 velocityBias (list of ndarray): Velocity terms for biases, if using momentum. 58 learningRate (float): The learning rate for optimisation. 59 60 Returns: 61 l (list): Output of the network for each epoch. 62 weights (list of ndarray): Updated weights after training. 63 biases (list of ndarray): Updated biases after training. 64 velocityWeight (list of ndarray): Updated velocity for weights. 65 velocityBias (list of ndarray): Updated velocity for biases. 66 """ 67 l = [] 68 if(useMomentum == True): 69 self.initialiseVelocity() 70 for _ in range(epochs): 71 for data in range(len(inputData)): 72 layerNodes = self.forwardPropagation(inputData[data]) 73 l.append(layerNodes) 74 w, b = self._backPropgation(layerNodes, weights, biases, labels[data]) 75 if(useMomentum == True): 76 velocityWeight = momentumCoefficient * velocityWeight - learningRate * w 77 weights += velocityWeight 78 velocityBias = momentumCoefficient * velocityBias - learningRate * b 79 biases += velocityBias 80 else: 81 for i in range(len(weights)): 82 weights[i] -= learningRate * w[i] 83 for i in range(len(biases)): 84 biases[i] -= learningRate * b[i] 85 86 momentumCoefficient *= momentumDecay 87 return l, weights, biases, self.velocityWeight, self.velocityBias 88 89 def _trainGradientDescentUsingBatching(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, batchSize): 90 """ 91 Trains a model using gradient descent. 92 93 Args: 94 inputData (ndarray): All the training data. 95 labels (ndarray): All the true labels for the training data. 96 epochs (int): Number of training iterations over the dataset. 97 weights (list of ndarray): Current weights of the model. 98 biases (list of ndarray): Current biases of the model. 99 momentumCoefficient (float): Coefficient for momentum. 100 momentumDecay (float): Decay factor for the momentum coefficient. 101 useMomentum (bool): Whether to use momentum. 102 velocityWeight (list of ndarray): Velocity terms for weights, if using momentum. 103 velocityBias (list of ndarray): Velocity terms for biases, if using momentum. 104 learningRate (float): The learning rate for optimisation. 105 batchSize (int): The size of each mini batch 106 107 Returns: 108 l (list): Output of the network for each epoch. 109 weights (list of ndarray): Updated weights after training. 110 biases (list of ndarray): Updated biases after training. 111 velocityWeight (list of ndarray): Updated velocity for weights. 112 velocityBias (list of ndarray): Updated velocity for biases. 113 """ 114 l = [] 115 if(useMomentum == True): 116 velocityWeight, velocityBias = self.initialiseVelocity(velocityWeight, velocityBias, weights, biases) 117 for _ in range(epochs): 118 for i in range(0, len(inputData), batchSize): 119 batchData = inputData[i:i+batchSize] 120 batchLabels = labels[i:i+batchSize] 121 weightGradients, biasGradients = self._initialiseGradients(weights, biases) 122 for j in range(len(batchData)): 123 layerNodes = self.forwardPropagation(batchData[j]) 124 l.append(layerNodes) 125 w, b = self._backPropgation(layerNodes, weights, biases, batchLabels[j]) 126 weightGradients, biasGradients = self._addGradients(weightGradients, biasGradients, w, b) 127 weights, biases, velocityWeight, velocityBias = self._updateWeightsBiases(batchSize, weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate) 128 momentumCoefficient *= momentumDecay 129 return l, weights, biases, velocityWeight, velocityBias 130 131 def _initialiseVelocity(self, velocityWeight, velocityBias, weights, biases): 132 """ 133 Initialise velocity terms for momentum optimisation. 134 135 Args: 136 velocityWeight (list of ndarray): Velocity terms for weights. 137 velocityBias (list of ndarray): Velocity terms for biases. 138 weights (list of ndarray): The weights of the model. 139 biases (list of ndarray): The biases of the model. 140 141 Returns: 142 velocityWeight (list of ndarray): Initialised velocity for weights. 143 velocityBias (list of ndarray): Initialised velocity for biases. 144 """ 145 if(velocityWeight == None): 146 velocityWeight = [] 147 for i in weights: 148 velocityWeight.append(np.zeros_like(i)) 149 if(velocityBias == None): 150 velocityBias = [] 151 for i in biases: 152 velocityBias.append(np.zeros_like(i)) 153 return velocityWeight, velocityBias 154 155 def _initialiseGradients(self, weights, biases): 156 """ 157 Initialise gradients for weights and biases. 158 159 Args: 160 weights (list of ndarray): The weights of the model. 161 biases (list of ndarray): The biases of the model. 162 163 Returns: 164 weightGradients (list of ndarray): Initialised gradients for weights. 165 biasGradients (list of ndarray): Initialised gradients for biases. 166 """ 167 weightGradients, biasGradients = [], [] 168 for i in weights: 169 weightGradients.append(np.zeros_like(i)) 170 for i in biases: 171 biasGradients.append(np.zeros_like(i)) 172 return weightGradients, biasGradients 173 174 def _addGradients(self, weightGradients, biasGradients, w, b): 175 """ 176 Accumulates gradients for weights and biases. 177 178 Args: 179 weightGradients (list of ndarray): Accumulated weight gradients. 180 biasGradients (list of ndarray): Accumulated bias gradients. 181 w (list of ndarray): Gradients of the weights from the current batch. 182 b (list of ndarray): Gradients of the biases from the current batch. 183 184 Returns: 185 weightGradients (list of ndarray): Updated accumulated weight gradients. 186 biasGradients (list of ndarray): Updated accumulated bias gradients. 187 """ 188 for i in range(len(weightGradients)): 189 weightGradients[i] += w[i] 190 weightGradients[i] = np.clip(weightGradients[i], -1, 1) 191 for i in range(len(biasGradients)): 192 biasGradients[i] += b[i].T 193 biasGradients[i] = np.clip(biasGradients[i], -1, 1) 194 return weightGradients, biasGradients 195 196 def _updateWeightsBiases(self, size, weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate): 197 """ 198 Updates the weights and biases of the model. 199 200 Args: 201 size (int): Number of samples in the batch. 202 weights (list of ndarray): Current weights of the model. 203 biases (list of ndarray): Current biases of the model. 204 weightGradients (list of ndarray): Weight gradients. 205 biasGradients (list of ndarray): Bias gradients. 206 velocityWeight (list of ndarray): Velocity terms for weights, if using momentum. 207 velocityBias (list of ndarray): Velocity terms for biases, if using momentum. 208 useMomentum (bool): Whether to use momentum. 209 momentumCoefficient (float): Coefficient for momentum. 210 learningRate (float): The learning rate for optimisation. 211 212 Returns: 213 weights (list of ndarray): Updated weights after training. 214 biases (list of ndarray): Updated biases after training. 215 velocityWeight (list of ndarray): Updated velocity for weights. 216 velocityBias (list of ndarray): Updated velocity for biases. 217 """ 218 if(useMomentum == True): 219 for i in range(len(weights)): 220 velocityWeight[i] -= momentumCoefficient * velocityWeight[i] - learningRate * (weightGradients[i] / size) 221 weights[i] += velocityWeight[i] 222 for i in range(len(biases)): 223 velocityBias[i] = momentumCoefficient * velocityBias[i] - learningRate * (biasGradients[i] / size) 224 biases[i] += velocityBias[i] 225 else: 226 for i in range(len(weights)): 227 weights[i] = weights[i] - learningRate * (weightGradients[i] / size) 228 for i in range(len(biases)): 229 biases[i] -= learningRate * (biasGradients[i] / size) 230 return weights, biases, velocityWeight, velocityBias