quacknet.convulationalBackpropagation
1import numpy as np 2 3class CNNbackpropagation: 4 def _ConvolutionDerivative(self, errorPatch, kernals, inputTensor, stride): 5 """ 6 Compute gradients for conolutional layer weights, biases and input errors during backpropagation. 7 8 Args: 9 errorPatch (ndarray): Error gradient from the next layer. 10 kernals (ndarray): Kernals used during forward propagatation, shape (input channels, num kernels, kernel height, kernel width). 11 inputTensor (ndarray): Input to the convolutional layer during forward propagation. 12 stride (int): Stride length used during convolution. 13 14 Returns: 15 weightGradients (ndarray): Gradients of the loss with respect to kernels. 16 biasGradients (ndarray): Gradients of the loss with respect to biases for each kernel. 17 inputErrorTerms (ndarray): Error terms propagated to the previous layer. 18 """ 19 ################################### 20 # gets the error gradient from the layer infront and it is a error patch 21 # this error patch is the same size as what the convolutional layer outputed during forward propgation 22 # get the kernal (as in a patch of the image) again, but this time you are multipling each value in the kernal by 1 value that is inside the error patch 23 # this makes the gradient of the loss of one kernal's weight 24 25 # the gradient of the loss of one kernal's bias is the summ of all the error terms 26 # because bias is applied to every input in forward propgation 27 28 # the gradient of the loss of the input, which is the error terms for the layer behind it 29 # firstly the kernal has to be flipped, meaning flip the kernal left to right and then top to bottom, but not flipping the layers, 30 # the gradient of one pixel, is the summ of each error term multiplied by the flipped kernal 31 ################################### 32 33 kernalSize = self.kernalSize # all kernals are the same shape and squares 34 weightGradients = np.zeros((len(inputTensor), len(kernals), kernalSize, kernalSize)) #kernals are the same size 35 outputHeight, outputWidth = errorPatch.shape[1], errorPatch.shape[2] 36 for output in range(len(kernals)): 37 for layer in range(len(inputTensor)): 38 for i in range(outputHeight): 39 for j in range(outputWidth): 40 startI = i * stride 41 startJ = j * stride 42 if(startI + kernalSize > inputTensor.shape[1] or startJ + kernalSize > inputTensor.shape[2]): 43 continue 44 kernal = inputTensor[layer, startI: startI + kernalSize, startJ : startJ + kernalSize] 45 weightGradients[layer, output] += kernal * errorPatch[output, i, j] 46 47 biasGradients = np.sum(errorPatch, axis=(1, 2)) 48 49 inputErrorTerms = np.zeros_like(inputTensor) 50 flipped = kernals[:, :, ::-1, ::-1] 51 for output in range(len(errorPatch)): 52 for layer in range(len(inputTensor)): 53 for i in range(outputHeight): 54 inputI = i * stride 55 for j in range(outputWidth): 56 inputJ = j * stride 57 if(inputI + kernalSize > inputTensor.shape[1] or inputJ + kernalSize > inputTensor.shape[2]): 58 continue 59 errorKernal = errorPatch[output, i, j] 60 inputErrorTerms[layer, inputI: inputI + kernalSize, inputJ: inputJ + kernalSize] += errorKernal * flipped[output, layer] 61 62 weightGradients = np.transpose(weightGradients, (1, 0, 2, 3)) 63 return weightGradients, biasGradients, inputErrorTerms 64 65 66 def _MaxPoolingDerivative(self, errorPatch, inputTensor, sizeOfGrid, strideLength): 67 """ 68 Compute the gradient of the loss with respect to the input of the max pooling layer during backpropagation. 69 70 Args: 71 errorPatch (ndarray): Error gradient from the next layer. 72 inputTensor (ndarray): Input to the max pooling layer during forward propagation. 73 sizeOfGrid (int): Size of the pooling window. 74 strideLength (int): Stride length used during pooling. 75 76 Returns: 77 inputGradient (ndarray): Gradient of the loss with respect to the inputTensor 78 """ 79 inputGradient = np.zeros_like(inputTensor, dtype=np.float64) 80 outputHeight = (inputTensor.shape[1] - sizeOfGrid) // strideLength + 1 81 outputWidth = (inputTensor.shape[2] - sizeOfGrid) // strideLength + 1 82 for image in range(len(inputTensor)): # tensor is a 3d structures, so it is turning it into a 2d array (eg. an layer or image) 83 for x in range(outputHeight): 84 for y in range(outputWidth): 85 indexX = x * strideLength 86 indexY = y * strideLength 87 88 gridOfValues = inputTensor[image, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] 89 indexMax = np.argmax(gridOfValues) 90 maxX, maxY = divmod(indexMax, sizeOfGrid) 91 92 #newValues = np.zeros((sizeOfGrid, sizeOfGrid)) 93 #newValues[maxX, maxY] = 1 94 #inputGradient[image, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] += newValues * errorPatch[image, x, y] 95 96 inputGradient[image, indexX + maxX, indexY + maxY] += errorPatch[image, x, y] 97 return inputGradient 98 99 def _AveragePoolingDerivative(self, errorPatch, inputTensor, sizeOfGrid, strideLength): 100 """ 101 Compute the gradient of the loss with respect to the input of the average pooling layer during backpropagation. 102 103 Args: 104 errorPatch (ndarray): Error gradient from the next layer. 105 inputTensor (ndarray): Input to the average pooling layer during forward propagation. 106 sizeOfGrid (int): Size of the pooling window. 107 strideLength (int): Stride length used during pooling. 108 109 Returns: 110 inputGradient (ndarray): Gradient of the loss with respect to the inputTensor 111 """ 112 inputGradient = np.zeros_like(inputTensor, dtype=np.float32) 113 outputHeight = (inputTensor.shape[1] - sizeOfGrid) // strideLength + 1 114 outputWidth = (inputTensor.shape[2] - sizeOfGrid) // strideLength + 1 115 avgMultiplier = 1 / (sizeOfGrid ** 2) 116 for image in range(len(inputTensor)): # tensor is a 3d structures, so it is turning it into a 2d array (eg. an layer or image) 117 for x in range(outputHeight): 118 for y in range(outputWidth): 119 indexX = x * strideLength 120 indexY = y * strideLength 121 #newValues = np.ones((sizeOfGrid, sizeOfGrid)) * errorPatch[image, x, y] / (sizeOfGrid ** 2) 122 newValues = errorPatch[image, x, y] * avgMultiplier 123 inputGradient[image, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] += newValues 124 return inputGradient 125 126 def _GlobalAveragePoolingDerivative(self, inputTensor): 127 """ 128 Compute the gradient of the loss with respect to the input of the global average pooling layer during backpropagation. 129 130 Args: 131 inputTensor (ndarray): Input to the global average pooling layer during forward propagation. 132 133 Returns: 134 inputGradient (ndarray): Gradient of the loss with respect to the inputTensor 135 """ 136 return np.ones_like(inputTensor) * (1 / (inputTensor.shape[1] * inputTensor.shape[2])) 137 138 def _ActivationLayerDerivative(self, errorPatch, activationDerivative, inputTensor): 139 """ 140 Compute the gradient of the loss with respect to the input of the activation layer during backpropagation. 141 142 Args: 143 errorPatch (ndarray): Error gradient from the next layer. 144 activationDerivative (function): Derivative function of the activation function. 145 inputTensor (ndarray): Input to the activation layer during forward propagation. 146 147 Returns: 148 inputGradient (ndarray): Gradient of the loss with respect to the inputTensor 149 """ 150 return errorPatch * activationDerivative(inputTensor)
class
CNNbackpropagation:
4class CNNbackpropagation: 5 def _ConvolutionDerivative(self, errorPatch, kernals, inputTensor, stride): 6 """ 7 Compute gradients for conolutional layer weights, biases and input errors during backpropagation. 8 9 Args: 10 errorPatch (ndarray): Error gradient from the next layer. 11 kernals (ndarray): Kernals used during forward propagatation, shape (input channels, num kernels, kernel height, kernel width). 12 inputTensor (ndarray): Input to the convolutional layer during forward propagation. 13 stride (int): Stride length used during convolution. 14 15 Returns: 16 weightGradients (ndarray): Gradients of the loss with respect to kernels. 17 biasGradients (ndarray): Gradients of the loss with respect to biases for each kernel. 18 inputErrorTerms (ndarray): Error terms propagated to the previous layer. 19 """ 20 ################################### 21 # gets the error gradient from the layer infront and it is a error patch 22 # this error patch is the same size as what the convolutional layer outputed during forward propgation 23 # get the kernal (as in a patch of the image) again, but this time you are multipling each value in the kernal by 1 value that is inside the error patch 24 # this makes the gradient of the loss of one kernal's weight 25 26 # the gradient of the loss of one kernal's bias is the summ of all the error terms 27 # because bias is applied to every input in forward propgation 28 29 # the gradient of the loss of the input, which is the error terms for the layer behind it 30 # firstly the kernal has to be flipped, meaning flip the kernal left to right and then top to bottom, but not flipping the layers, 31 # the gradient of one pixel, is the summ of each error term multiplied by the flipped kernal 32 ################################### 33 34 kernalSize = self.kernalSize # all kernals are the same shape and squares 35 weightGradients = np.zeros((len(inputTensor), len(kernals), kernalSize, kernalSize)) #kernals are the same size 36 outputHeight, outputWidth = errorPatch.shape[1], errorPatch.shape[2] 37 for output in range(len(kernals)): 38 for layer in range(len(inputTensor)): 39 for i in range(outputHeight): 40 for j in range(outputWidth): 41 startI = i * stride 42 startJ = j * stride 43 if(startI + kernalSize > inputTensor.shape[1] or startJ + kernalSize > inputTensor.shape[2]): 44 continue 45 kernal = inputTensor[layer, startI: startI + kernalSize, startJ : startJ + kernalSize] 46 weightGradients[layer, output] += kernal * errorPatch[output, i, j] 47 48 biasGradients = np.sum(errorPatch, axis=(1, 2)) 49 50 inputErrorTerms = np.zeros_like(inputTensor) 51 flipped = kernals[:, :, ::-1, ::-1] 52 for output in range(len(errorPatch)): 53 for layer in range(len(inputTensor)): 54 for i in range(outputHeight): 55 inputI = i * stride 56 for j in range(outputWidth): 57 inputJ = j * stride 58 if(inputI + kernalSize > inputTensor.shape[1] or inputJ + kernalSize > inputTensor.shape[2]): 59 continue 60 errorKernal = errorPatch[output, i, j] 61 inputErrorTerms[layer, inputI: inputI + kernalSize, inputJ: inputJ + kernalSize] += errorKernal * flipped[output, layer] 62 63 weightGradients = np.transpose(weightGradients, (1, 0, 2, 3)) 64 return weightGradients, biasGradients, inputErrorTerms 65 66 67 def _MaxPoolingDerivative(self, errorPatch, inputTensor, sizeOfGrid, strideLength): 68 """ 69 Compute the gradient of the loss with respect to the input of the max pooling layer during backpropagation. 70 71 Args: 72 errorPatch (ndarray): Error gradient from the next layer. 73 inputTensor (ndarray): Input to the max pooling layer during forward propagation. 74 sizeOfGrid (int): Size of the pooling window. 75 strideLength (int): Stride length used during pooling. 76 77 Returns: 78 inputGradient (ndarray): Gradient of the loss with respect to the inputTensor 79 """ 80 inputGradient = np.zeros_like(inputTensor, dtype=np.float64) 81 outputHeight = (inputTensor.shape[1] - sizeOfGrid) // strideLength + 1 82 outputWidth = (inputTensor.shape[2] - sizeOfGrid) // strideLength + 1 83 for image in range(len(inputTensor)): # tensor is a 3d structures, so it is turning it into a 2d array (eg. an layer or image) 84 for x in range(outputHeight): 85 for y in range(outputWidth): 86 indexX = x * strideLength 87 indexY = y * strideLength 88 89 gridOfValues = inputTensor[image, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] 90 indexMax = np.argmax(gridOfValues) 91 maxX, maxY = divmod(indexMax, sizeOfGrid) 92 93 #newValues = np.zeros((sizeOfGrid, sizeOfGrid)) 94 #newValues[maxX, maxY] = 1 95 #inputGradient[image, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] += newValues * errorPatch[image, x, y] 96 97 inputGradient[image, indexX + maxX, indexY + maxY] += errorPatch[image, x, y] 98 return inputGradient 99 100 def _AveragePoolingDerivative(self, errorPatch, inputTensor, sizeOfGrid, strideLength): 101 """ 102 Compute the gradient of the loss with respect to the input of the average pooling layer during backpropagation. 103 104 Args: 105 errorPatch (ndarray): Error gradient from the next layer. 106 inputTensor (ndarray): Input to the average pooling layer during forward propagation. 107 sizeOfGrid (int): Size of the pooling window. 108 strideLength (int): Stride length used during pooling. 109 110 Returns: 111 inputGradient (ndarray): Gradient of the loss with respect to the inputTensor 112 """ 113 inputGradient = np.zeros_like(inputTensor, dtype=np.float32) 114 outputHeight = (inputTensor.shape[1] - sizeOfGrid) // strideLength + 1 115 outputWidth = (inputTensor.shape[2] - sizeOfGrid) // strideLength + 1 116 avgMultiplier = 1 / (sizeOfGrid ** 2) 117 for image in range(len(inputTensor)): # tensor is a 3d structures, so it is turning it into a 2d array (eg. an layer or image) 118 for x in range(outputHeight): 119 for y in range(outputWidth): 120 indexX = x * strideLength 121 indexY = y * strideLength 122 #newValues = np.ones((sizeOfGrid, sizeOfGrid)) * errorPatch[image, x, y] / (sizeOfGrid ** 2) 123 newValues = errorPatch[image, x, y] * avgMultiplier 124 inputGradient[image, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] += newValues 125 return inputGradient 126 127 def _GlobalAveragePoolingDerivative(self, inputTensor): 128 """ 129 Compute the gradient of the loss with respect to the input of the global average pooling layer during backpropagation. 130 131 Args: 132 inputTensor (ndarray): Input to the global average pooling layer during forward propagation. 133 134 Returns: 135 inputGradient (ndarray): Gradient of the loss with respect to the inputTensor 136 """ 137 return np.ones_like(inputTensor) * (1 / (inputTensor.shape[1] * inputTensor.shape[2])) 138 139 def _ActivationLayerDerivative(self, errorPatch, activationDerivative, inputTensor): 140 """ 141 Compute the gradient of the loss with respect to the input of the activation layer during backpropagation. 142 143 Args: 144 errorPatch (ndarray): Error gradient from the next layer. 145 activationDerivative (function): Derivative function of the activation function. 146 inputTensor (ndarray): Input to the activation layer during forward propagation. 147 148 Returns: 149 inputGradient (ndarray): Gradient of the loss with respect to the inputTensor 150 """ 151 return errorPatch * activationDerivative(inputTensor)