quacknet.optimisers

  1import numpy as np
  2
  3class Optimisers:
  4    def _trainGradientDescent(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, _):
  5        """
  6        Trains a model using gradient descent.
  7
  8        Args:
  9            inputData (ndarray): All the training data.
 10            labels (ndarray): All the true labels for the training data.
 11            epochs (int): Number of training iterations over the dataset.
 12            weights (list of ndarray): Current weights of the model.
 13            biases (list of ndarray): Current biases of the model.
 14            momentumCoefficient (float): Coefficient for momentum.
 15            momentumDecay (float): Decay factor for the momentum coefficient.
 16            useMomentum (bool): Whether to use momentum.
 17            velocityWeight (list of ndarray): Velocity terms for weights, if using momentum.
 18            velocityBias (list of ndarray): Velocity terms for biases, if using momentum.
 19            learningRate (float): The learning rate for optimisation.
 20            
 21        Returns: 
 22            l (list): Output of the network for each epoch.
 23            weights (list of ndarray): Updated weights after training.
 24            biases (list of ndarray): Updated biases after training.
 25            velocityWeight (list of ndarray): Updated velocity for weights.
 26            velocityBias (list of ndarray): Updated velocity for biases.
 27        """
 28        l = []
 29        if(useMomentum == True):
 30            self.initialiseVelocity()
 31        for _ in range(epochs):
 32            weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 33            for data in range(len(inputData)):
 34                layerNodes = self.forwardPropagation(inputData[data])
 35                l.append(layerNodes[len(layerNodes) - 1])
 36                w, b = self._backPropgation(layerNodes, weights, biases, labels[data])
 37                velocityWeight, velocityBias = self._addGradients(weightGradients, biasGradients, w, b)
 38            weights, biases, velocityWeight, velocityBias = self._updateWeightsBiases(len(inputData), weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate)
 39            momentumCoefficient *= momentumDecay
 40        return l, weights, biases, velocityWeight, velocityBias
 41
 42    def _trainStochasticGradientDescent(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, _):
 43        """
 44        Trains a model using stochastic gradient descent (SGD).
 45
 46        Args:
 47            inputData (ndarray): All the training data.
 48            labels (ndarray): All the true labels for the training data.
 49            epochs (int): Number of training iterations over the dataset.
 50            weights (list of ndarray): Current weights of the model.
 51            biases (list of ndarray): Current biases of the model.
 52            momentumCoefficient (float): Coefficient for momentum.
 53            momentumDecay (float): Decay factor for the momentum coefficient.
 54            useMomentum (bool): Whether to use momentum.
 55            velocityWeight (list of ndarray): Velocity terms for weights, if using momentum.
 56            velocityBias (list of ndarray): Velocity terms for biases, if using momentum.
 57            learningRate (float): The learning rate for optimisation.
 58            
 59        Returns: 
 60            l (list): Output of the network for each epoch.
 61            weights (list of ndarray): Updated weights after training.
 62            biases (list of ndarray): Updated biases after training.
 63            velocityWeight (list of ndarray): Updated velocity for weights.
 64            velocityBias (list of ndarray): Updated velocity for biases.
 65        """
 66        l = []
 67        if(useMomentum == True):
 68            self.initialiseVelocity()        
 69        for _ in range(epochs):
 70            for data in range(len(inputData)):
 71                layerNodes = self.forwardPropagation(inputData[data])
 72                l.append(layerNodes)
 73                w, b = self._backPropgation(layerNodes, weights, biases, labels[data])
 74                if(useMomentum == True):
 75                    velocityWeight = momentumCoefficient * velocityWeight - learningRate * w
 76                    weights += velocityWeight
 77                    velocityBias = momentumCoefficient * velocityBias - learningRate * b
 78                    biases += velocityBias
 79                else:
 80                    for i in range(len(weights)):
 81                        weights[i] -= learningRate * w[i]
 82                    for i in range(len(biases)):
 83                        biases[i] -= learningRate * b[i]
 84
 85            momentumCoefficient *= momentumDecay
 86        return l, weights, biases, self.velocityWeight, self.velocityBias
 87
 88    def _trainGradientDescentUsingBatching(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, batchSize):
 89        """
 90        Trains a model using gradient descent.
 91
 92        Args:
 93            inputData (ndarray): All the training data.
 94            labels (ndarray): All the true labels for the training data.
 95            epochs (int): Number of training iterations over the dataset.
 96            weights (list of ndarray): Current weights of the model.
 97            biases (list of ndarray): Current biases of the model.
 98            momentumCoefficient (float): Coefficient for momentum.
 99            momentumDecay (float): Decay factor for the momentum coefficient.
100            useMomentum (bool): Whether to use momentum.
101            velocityWeight (list of ndarray): Velocity terms for weights, if using momentum.
102            velocityBias (list of ndarray): Velocity terms for biases, if using momentum.
103            learningRate (float): The learning rate for optimisation.
104            batchSize (int): The size of each mini batch 
105            
106        Returns: 
107            l (list): Output of the network for each epoch.
108            weights (list of ndarray): Updated weights after training.
109            biases (list of ndarray): Updated biases after training.
110            velocityWeight (list of ndarray): Updated velocity for weights.
111            velocityBias (list of ndarray): Updated velocity for biases.
112        """
113        l = []
114        if(useMomentum == True):
115            velocityWeight, velocityBias = self.initialiseVelocity(velocityWeight, velocityBias, weights, biases)
116        for _ in range(epochs):
117            for i in range(0, len(inputData), batchSize):
118                batchData = inputData[i:i+batchSize]
119                batchLabels = labels[i:i+batchSize]
120                weightGradients, biasGradients = self._initialiseGradients(weights, biases)
121                for j in range(len(batchData)):
122                    layerNodes = self.forwardPropagation(batchData[j])
123                    l.append(layerNodes)
124                    w, b = self._backPropgation(layerNodes, weights, biases, batchLabels[j])
125                    weightGradients, biasGradients = self._addGradients(weightGradients, biasGradients, w, b)
126                weights, biases, velocityWeight, velocityBias = self._updateWeightsBiases(batchSize, weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate)
127            momentumCoefficient *= momentumDecay
128        return l, weights, biases, velocityWeight, velocityBias
129
130    def _initialiseVelocity(self, velocityWeight, velocityBias, weights, biases):
131        """
132        Initialise velocity terms for momentum optimisation.
133
134        Args:
135            velocityWeight (list of ndarray): Velocity terms for weights.
136            velocityBias (list of ndarray): Velocity terms for biases.
137            weights (list of ndarray): The weights of the model.
138            biases (list of ndarray): The biases of the model.
139
140        Returns:
141            velocityWeight (list of ndarray): Initialised velocity for weights.
142            velocityBias (list of ndarray): Initialised velocity for biases.
143        """
144        if(velocityWeight == None):
145            velocityWeight = []
146            for i in weights:
147                velocityWeight.append(np.zeros_like(i))
148        if(velocityBias == None):
149            velocityBias = []
150            for i in biases:
151                velocityBias.append(np.zeros_like(i))
152        return velocityWeight, velocityBias
153    
154    def _initialiseGradients(self, weights, biases):
155        """
156        Initialise gradients for weights and biases.
157
158        Args:
159            weights (list of ndarray): The weights of the model.
160            biases (list of ndarray): The biases of the model.
161
162        Returns:
163            weightGradients (list of ndarray): Initialised gradients for weights.
164            biasGradients (list of ndarray): Initialised gradients for biases.
165        """
166        weightGradients, biasGradients = [], []
167        for i in weights:
168            weightGradients.append(np.zeros_like(i))
169        for i in biases:
170            biasGradients.append(np.zeros_like(i))
171        return weightGradients, biasGradients
172
173    def _addGradients(self, weightGradients, biasGradients, w, b):
174        """
175        Accumulates gradients for weights and biases.
176
177        Args:
178            weightGradients (list of ndarray): Accumulated weight gradients.
179            biasGradients (list of ndarray): Accumulated bias gradients. 
180            w (list of ndarray): Gradients of the weights from the current batch.
181            b (list of ndarray): Gradients of the biases from the current batch.
182        
183        Returns:
184            weightGradients (list of ndarray): Updated accumulated weight gradients.
185            biasGradients (list of ndarray): Updated accumulated bias gradients. 
186        """
187        for i in range(len(weightGradients)):
188            weightGradients[i] += w[i]
189            weightGradients[i] = np.clip(weightGradients[i], -1, 1)
190        for i in range(len(biasGradients)):
191            biasGradients[i] += b[i].T
192            biasGradients[i] = np.clip(biasGradients[i], -1, 1)
193        return weightGradients, biasGradients
194    
195    def _updateWeightsBiases(self, size, weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate):
196        """
197        Updates the weights and biases of the model.
198
199        Args:
200            size (int): Number of samples in the batch.
201            weights (list of ndarray): Current weights of the model.
202            biases (list of ndarray): Current biases of the model.
203            weightGradients (list of ndarray): Weight gradients.
204            biasGradients (list of ndarray): Bias gradients. 
205            velocityWeight (list of ndarray): Velocity terms for weights, if using momentum.
206            velocityBias (list of ndarray): Velocity terms for biases, if using momentum.
207            useMomentum (bool): Whether to use momentum.
208            momentumCoefficient (float): Coefficient for momentum.
209            learningRate (float): The learning rate for optimisation.
210            
211        Returns: 
212            weights (list of ndarray): Updated weights after training.
213            biases (list of ndarray): Updated biases after training.
214            velocityWeight (list of ndarray): Updated velocity for weights.
215            velocityBias (list of ndarray): Updated velocity for biases.
216        """
217        if(useMomentum == True):
218            for i in range(len(weights)):
219                velocityWeight[i] -= momentumCoefficient * velocityWeight[i] - learningRate * (weightGradients[i] / size)
220                weights[i] += velocityWeight[i]
221            for i in range(len(biases)):
222                velocityBias[i] = momentumCoefficient * velocityBias[i] - learningRate * (biasGradients[i] / size)
223                biases[i] += velocityBias[i]
224        else:
225            for i in range(len(weights)):
226                weights[i] = weights[i] - learningRate * (weightGradients[i] / size)
227            for i in range(len(biases)):
228                biases[i] -= learningRate * (biasGradients[i] / size)
229        return weights, biases, velocityWeight, velocityBias
class Optimisers:
  4class Optimisers:
  5    def _trainGradientDescent(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, _):
  6        """
  7        Trains a model using gradient descent.
  8
  9        Args:
 10            inputData (ndarray): All the training data.
 11            labels (ndarray): All the true labels for the training data.
 12            epochs (int): Number of training iterations over the dataset.
 13            weights (list of ndarray): Current weights of the model.
 14            biases (list of ndarray): Current biases of the model.
 15            momentumCoefficient (float): Coefficient for momentum.
 16            momentumDecay (float): Decay factor for the momentum coefficient.
 17            useMomentum (bool): Whether to use momentum.
 18            velocityWeight (list of ndarray): Velocity terms for weights, if using momentum.
 19            velocityBias (list of ndarray): Velocity terms for biases, if using momentum.
 20            learningRate (float): The learning rate for optimisation.
 21            
 22        Returns: 
 23            l (list): Output of the network for each epoch.
 24            weights (list of ndarray): Updated weights after training.
 25            biases (list of ndarray): Updated biases after training.
 26            velocityWeight (list of ndarray): Updated velocity for weights.
 27            velocityBias (list of ndarray): Updated velocity for biases.
 28        """
 29        l = []
 30        if(useMomentum == True):
 31            self.initialiseVelocity()
 32        for _ in range(epochs):
 33            weightGradients, biasGradients = self._initialiseGradients(weights, biases)
 34            for data in range(len(inputData)):
 35                layerNodes = self.forwardPropagation(inputData[data])
 36                l.append(layerNodes[len(layerNodes) - 1])
 37                w, b = self._backPropgation(layerNodes, weights, biases, labels[data])
 38                velocityWeight, velocityBias = self._addGradients(weightGradients, biasGradients, w, b)
 39            weights, biases, velocityWeight, velocityBias = self._updateWeightsBiases(len(inputData), weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate)
 40            momentumCoefficient *= momentumDecay
 41        return l, weights, biases, velocityWeight, velocityBias
 42
 43    def _trainStochasticGradientDescent(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, _):
 44        """
 45        Trains a model using stochastic gradient descent (SGD).
 46
 47        Args:
 48            inputData (ndarray): All the training data.
 49            labels (ndarray): All the true labels for the training data.
 50            epochs (int): Number of training iterations over the dataset.
 51            weights (list of ndarray): Current weights of the model.
 52            biases (list of ndarray): Current biases of the model.
 53            momentumCoefficient (float): Coefficient for momentum.
 54            momentumDecay (float): Decay factor for the momentum coefficient.
 55            useMomentum (bool): Whether to use momentum.
 56            velocityWeight (list of ndarray): Velocity terms for weights, if using momentum.
 57            velocityBias (list of ndarray): Velocity terms for biases, if using momentum.
 58            learningRate (float): The learning rate for optimisation.
 59            
 60        Returns: 
 61            l (list): Output of the network for each epoch.
 62            weights (list of ndarray): Updated weights after training.
 63            biases (list of ndarray): Updated biases after training.
 64            velocityWeight (list of ndarray): Updated velocity for weights.
 65            velocityBias (list of ndarray): Updated velocity for biases.
 66        """
 67        l = []
 68        if(useMomentum == True):
 69            self.initialiseVelocity()        
 70        for _ in range(epochs):
 71            for data in range(len(inputData)):
 72                layerNodes = self.forwardPropagation(inputData[data])
 73                l.append(layerNodes)
 74                w, b = self._backPropgation(layerNodes, weights, biases, labels[data])
 75                if(useMomentum == True):
 76                    velocityWeight = momentumCoefficient * velocityWeight - learningRate * w
 77                    weights += velocityWeight
 78                    velocityBias = momentumCoefficient * velocityBias - learningRate * b
 79                    biases += velocityBias
 80                else:
 81                    for i in range(len(weights)):
 82                        weights[i] -= learningRate * w[i]
 83                    for i in range(len(biases)):
 84                        biases[i] -= learningRate * b[i]
 85
 86            momentumCoefficient *= momentumDecay
 87        return l, weights, biases, self.velocityWeight, self.velocityBias
 88
 89    def _trainGradientDescentUsingBatching(self, inputData, labels, epochs, weights, biases, momentumCoefficient, momentumDecay, useMomentum, velocityWeight, velocityBias, learningRate, batchSize):
 90        """
 91        Trains a model using gradient descent.
 92
 93        Args:
 94            inputData (ndarray): All the training data.
 95            labels (ndarray): All the true labels for the training data.
 96            epochs (int): Number of training iterations over the dataset.
 97            weights (list of ndarray): Current weights of the model.
 98            biases (list of ndarray): Current biases of the model.
 99            momentumCoefficient (float): Coefficient for momentum.
100            momentumDecay (float): Decay factor for the momentum coefficient.
101            useMomentum (bool): Whether to use momentum.
102            velocityWeight (list of ndarray): Velocity terms for weights, if using momentum.
103            velocityBias (list of ndarray): Velocity terms for biases, if using momentum.
104            learningRate (float): The learning rate for optimisation.
105            batchSize (int): The size of each mini batch 
106            
107        Returns: 
108            l (list): Output of the network for each epoch.
109            weights (list of ndarray): Updated weights after training.
110            biases (list of ndarray): Updated biases after training.
111            velocityWeight (list of ndarray): Updated velocity for weights.
112            velocityBias (list of ndarray): Updated velocity for biases.
113        """
114        l = []
115        if(useMomentum == True):
116            velocityWeight, velocityBias = self.initialiseVelocity(velocityWeight, velocityBias, weights, biases)
117        for _ in range(epochs):
118            for i in range(0, len(inputData), batchSize):
119                batchData = inputData[i:i+batchSize]
120                batchLabels = labels[i:i+batchSize]
121                weightGradients, biasGradients = self._initialiseGradients(weights, biases)
122                for j in range(len(batchData)):
123                    layerNodes = self.forwardPropagation(batchData[j])
124                    l.append(layerNodes)
125                    w, b = self._backPropgation(layerNodes, weights, biases, batchLabels[j])
126                    weightGradients, biasGradients = self._addGradients(weightGradients, biasGradients, w, b)
127                weights, biases, velocityWeight, velocityBias = self._updateWeightsBiases(batchSize, weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate)
128            momentumCoefficient *= momentumDecay
129        return l, weights, biases, velocityWeight, velocityBias
130
131    def _initialiseVelocity(self, velocityWeight, velocityBias, weights, biases):
132        """
133        Initialise velocity terms for momentum optimisation.
134
135        Args:
136            velocityWeight (list of ndarray): Velocity terms for weights.
137            velocityBias (list of ndarray): Velocity terms for biases.
138            weights (list of ndarray): The weights of the model.
139            biases (list of ndarray): The biases of the model.
140
141        Returns:
142            velocityWeight (list of ndarray): Initialised velocity for weights.
143            velocityBias (list of ndarray): Initialised velocity for biases.
144        """
145        if(velocityWeight == None):
146            velocityWeight = []
147            for i in weights:
148                velocityWeight.append(np.zeros_like(i))
149        if(velocityBias == None):
150            velocityBias = []
151            for i in biases:
152                velocityBias.append(np.zeros_like(i))
153        return velocityWeight, velocityBias
154    
155    def _initialiseGradients(self, weights, biases):
156        """
157        Initialise gradients for weights and biases.
158
159        Args:
160            weights (list of ndarray): The weights of the model.
161            biases (list of ndarray): The biases of the model.
162
163        Returns:
164            weightGradients (list of ndarray): Initialised gradients for weights.
165            biasGradients (list of ndarray): Initialised gradients for biases.
166        """
167        weightGradients, biasGradients = [], []
168        for i in weights:
169            weightGradients.append(np.zeros_like(i))
170        for i in biases:
171            biasGradients.append(np.zeros_like(i))
172        return weightGradients, biasGradients
173
174    def _addGradients(self, weightGradients, biasGradients, w, b):
175        """
176        Accumulates gradients for weights and biases.
177
178        Args:
179            weightGradients (list of ndarray): Accumulated weight gradients.
180            biasGradients (list of ndarray): Accumulated bias gradients. 
181            w (list of ndarray): Gradients of the weights from the current batch.
182            b (list of ndarray): Gradients of the biases from the current batch.
183        
184        Returns:
185            weightGradients (list of ndarray): Updated accumulated weight gradients.
186            biasGradients (list of ndarray): Updated accumulated bias gradients. 
187        """
188        for i in range(len(weightGradients)):
189            weightGradients[i] += w[i]
190            weightGradients[i] = np.clip(weightGradients[i], -1, 1)
191        for i in range(len(biasGradients)):
192            biasGradients[i] += b[i].T
193            biasGradients[i] = np.clip(biasGradients[i], -1, 1)
194        return weightGradients, biasGradients
195    
196    def _updateWeightsBiases(self, size, weights, biases, weightGradients, biasGradients, velocityWeight, velocityBias, useMomentum, momentumCoefficient, learningRate):
197        """
198        Updates the weights and biases of the model.
199
200        Args:
201            size (int): Number of samples in the batch.
202            weights (list of ndarray): Current weights of the model.
203            biases (list of ndarray): Current biases of the model.
204            weightGradients (list of ndarray): Weight gradients.
205            biasGradients (list of ndarray): Bias gradients. 
206            velocityWeight (list of ndarray): Velocity terms for weights, if using momentum.
207            velocityBias (list of ndarray): Velocity terms for biases, if using momentum.
208            useMomentum (bool): Whether to use momentum.
209            momentumCoefficient (float): Coefficient for momentum.
210            learningRate (float): The learning rate for optimisation.
211            
212        Returns: 
213            weights (list of ndarray): Updated weights after training.
214            biases (list of ndarray): Updated biases after training.
215            velocityWeight (list of ndarray): Updated velocity for weights.
216            velocityBias (list of ndarray): Updated velocity for biases.
217        """
218        if(useMomentum == True):
219            for i in range(len(weights)):
220                velocityWeight[i] -= momentumCoefficient * velocityWeight[i] - learningRate * (weightGradients[i] / size)
221                weights[i] += velocityWeight[i]
222            for i in range(len(biases)):
223                velocityBias[i] = momentumCoefficient * velocityBias[i] - learningRate * (biasGradients[i] / size)
224                biases[i] += velocityBias[i]
225        else:
226            for i in range(len(weights)):
227                weights[i] = weights[i] - learningRate * (weightGradients[i] / size)
228            for i in range(len(biases)):
229                biases[i] -= learningRate * (biasGradients[i] / size)
230        return weights, biases, velocityWeight, velocityBias