quacknet.convulationalFeutures
1import numpy as np 2import math 3 4''' 5Covulutional neural network: 6Kernels: 7-is a cube (eg. 2 x 2 x 2) that cotains weights 8-it moves across the image starting from the left to the right, with each step being a stride (eg. 2) 9-once the cube reaches the very right it goes to the row underneath and starts at the very left pixel of that row (so no bouncing back and forth) 10-the cube calculates the dot product of its weight and the pixel values in the image under the cube for all layers 11-when the cube is moving it may not be over the image so you add padding 12-the padding can be either 1 or 0, or you can ignore thos values which means the image will get smaller 13 14Activation: 15-goes through all the data that the last layer made and uses either relu or leaky relu 16 17Pooling: 18-is a grid (eg. 2 x 2) that moves the same as kernels 19-however the grid can either be max pooling or average pooling 20-max pooling gets the highest value in the grid of the image whilst average gets the average 21 22Neural Network: 23-flattens the tensors and inputs into a neural network 24''' 25 26class ConvulationalNetwork: 27 def _padImage(self, inputTensor, kernalSize, strideLength, typeOfPadding): #pads image 28 """ 29 Pads each image in the input tensor. 30 31 Args: 32 inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width). 33 kernalSize (int): The size of the covolution kernel (assumed it is a square). 34 strideLength (int): The stride length for convolution. 35 typeOfPadding (int): The value used for padding the images. 36 37 Returns: 38 ndarray: A 3D array of padded images. 39 """ 40 paddingTensor = [] 41 for image in inputTensor: 42 paddingSize = math.ceil(((strideLength - 1) * len(image) - strideLength + kernalSize) / 2) 43 padding = np.full((image.shape[0] + paddingSize * 2, image.shape[1] + paddingSize * 2), typeOfPadding) #creates an 2d numpy array of size paddingSize x paddingSize 44 padding[paddingSize: paddingSize + image.shape[0], paddingSize: paddingSize + image.shape[1]] = image 45 paddingTensor.append(padding) 46 return np.array(paddingTensor) 47 48 def _kernalisation(self, inputTensor, kernalsWeights, kernalsBiases, sizeOfGrid = 2, usePadding = True, typeOfPadding= 0, strideLength = 2): 49 """ 50 Performs the convolution operation on the input tensor. 51 52 Args: 53 inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width). 54 kernalsWeights (ndarray): A 4D array containing weights of the convolution kernels. 55 kernalsWeights (ndarray): A 1D array containing biases for each kernels. 56 sizeOfGrid (int, optional): The size of the covolution grid. Default is 2. 57 usePadding (bool, optional): Whether to pad the input images. Default is True. 58 typeOfPadding (int, optional): The value used for padding. Defaults to 0. 59 strideLength (int): The stride length for convolution. Defaults to 2. 60 61 Returns: 62 ndarray: A 3D array of feuture maps with shape. 63 """ 64 tensorKernals = [] 65 if(usePadding == True): 66 imageTensor = self._padImage(inputTensor, sizeOfGrid, strideLength, typeOfPadding) 67 else: 68 imageTensor = inputTensor 69 outputHeight = (imageTensor.shape[1] - sizeOfGrid) // strideLength + 1 70 outputWidth = (imageTensor.shape[2] - sizeOfGrid) // strideLength + 1 71 for i in range(len(kernalsWeights)): 72 output = np.zeros((outputHeight, outputWidth)) 73 kernal = kernalsWeights[i] 74 biases = kernalsBiases[i] 75 for x in range(outputHeight): 76 indexX = x * strideLength 77 for y in range(outputWidth): 78 indexY = y * strideLength 79 gridOfValues = imageTensor[:, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] # 2d grid 80 dotProduct = np.sum(gridOfValues * kernal) 81 output[x, y] = dotProduct + biases 82 83 tensorKernals.append(output) 84 return np.stack(tensorKernals, axis = 0) #tensorKernals = (outputHeight, outputWidth, numberOfKernals) 85 86 def _activation(self, inputTensor): 87 """ 88 Applies the Leaky ReLU activation function to the input tensor. 89 90 Args: 91 inputTensor (ndarray): A 3D array representing the input. 92 93 Returns: 94 ndarray: A tensor with the same shape as the input with Leaky ReLU applied to it. 95 """ 96 alpha = 0.01 97 return np.maximum(inputTensor, inputTensor * alpha) 98 99 def _pooling(self, inputTensor, sizeOfGrid = 2, strideLength = 2, typeOfPooling = "max"): 100 """ 101 Applies pooling (max or average) to reduce the size of the batch of inputs. 102 103 Args: 104 inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width). 105 sizeOfGrid (int, optional): The size of the pooling grid. Default is 2. 106 strideLength (int, optional): The stride length for pooling. Defult is 2. 107 typeOfPadding (int, optional): The type of pooling to apply ('max', 'min', 'global'), Defaults to "max". 108 109 Returns: 110 ndarray: A 3D array of feuture maps with reduced shape. 111 """ 112 if(typeOfPooling.lower()== "global" or typeOfPooling.lower() == "gap"): 113 return self._poolingGlobalAverage(inputTensor) 114 tensorPools = [] 115 116 if(typeOfPooling.lower() == "max"): 117 poolFunc = np.max 118 else: 119 poolFunc = np.mean 120 121 for image in inputTensor: # tensor is a 3d structures, so it is turning it into a 2d array (eg. an layer or image) 122 outputHeight = (image.shape[0] - sizeOfGrid) // strideLength + 1 123 outputWidth = (image.shape[1] - sizeOfGrid) // strideLength + 1 124 output = np.zeros((outputHeight, outputWidth)) 125 for x in range(outputHeight): 126 for y in range(outputWidth): 127 indexX = x * strideLength 128 indexY = y * strideLength 129 gridOfValues = image[indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] 130 output[x, y] = poolFunc(gridOfValues) 131 tensorPools.append(output) 132 return np.array(tensorPools) 133 134 def _poolingGlobalAverage(self, inputTensor): 135 """ 136 Performs global average pooling, reducing each feuture map to a single value. 137 138 Args: 139 inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width). 140 141 Returns: 142 ndarray: A 2D array containing global averages for each feuture map. 143 """ 144 output = np.mean(inputTensor, axis = (1, 2)) 145 return output 146 147 def _flatternTensor(self, inputTensor): 148 """ 149 Flattens a tensor into a 1D array. 150 151 Args: 152 inputTensor (ndarray): A tensor of any shape. 153 154 Returns: 155 ndarray: A 1D array containing every element of the input tensor. 156 """ 157 return np.array(inputTensor).reshape(-1)
class
ConvulationalNetwork:
27class ConvulationalNetwork: 28 def _padImage(self, inputTensor, kernalSize, strideLength, typeOfPadding): #pads image 29 """ 30 Pads each image in the input tensor. 31 32 Args: 33 inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width). 34 kernalSize (int): The size of the covolution kernel (assumed it is a square). 35 strideLength (int): The stride length for convolution. 36 typeOfPadding (int): The value used for padding the images. 37 38 Returns: 39 ndarray: A 3D array of padded images. 40 """ 41 paddingTensor = [] 42 for image in inputTensor: 43 paddingSize = math.ceil(((strideLength - 1) * len(image) - strideLength + kernalSize) / 2) 44 padding = np.full((image.shape[0] + paddingSize * 2, image.shape[1] + paddingSize * 2), typeOfPadding) #creates an 2d numpy array of size paddingSize x paddingSize 45 padding[paddingSize: paddingSize + image.shape[0], paddingSize: paddingSize + image.shape[1]] = image 46 paddingTensor.append(padding) 47 return np.array(paddingTensor) 48 49 def _kernalisation(self, inputTensor, kernalsWeights, kernalsBiases, sizeOfGrid = 2, usePadding = True, typeOfPadding= 0, strideLength = 2): 50 """ 51 Performs the convolution operation on the input tensor. 52 53 Args: 54 inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width). 55 kernalsWeights (ndarray): A 4D array containing weights of the convolution kernels. 56 kernalsWeights (ndarray): A 1D array containing biases for each kernels. 57 sizeOfGrid (int, optional): The size of the covolution grid. Default is 2. 58 usePadding (bool, optional): Whether to pad the input images. Default is True. 59 typeOfPadding (int, optional): The value used for padding. Defaults to 0. 60 strideLength (int): The stride length for convolution. Defaults to 2. 61 62 Returns: 63 ndarray: A 3D array of feuture maps with shape. 64 """ 65 tensorKernals = [] 66 if(usePadding == True): 67 imageTensor = self._padImage(inputTensor, sizeOfGrid, strideLength, typeOfPadding) 68 else: 69 imageTensor = inputTensor 70 outputHeight = (imageTensor.shape[1] - sizeOfGrid) // strideLength + 1 71 outputWidth = (imageTensor.shape[2] - sizeOfGrid) // strideLength + 1 72 for i in range(len(kernalsWeights)): 73 output = np.zeros((outputHeight, outputWidth)) 74 kernal = kernalsWeights[i] 75 biases = kernalsBiases[i] 76 for x in range(outputHeight): 77 indexX = x * strideLength 78 for y in range(outputWidth): 79 indexY = y * strideLength 80 gridOfValues = imageTensor[:, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] # 2d grid 81 dotProduct = np.sum(gridOfValues * kernal) 82 output[x, y] = dotProduct + biases 83 84 tensorKernals.append(output) 85 return np.stack(tensorKernals, axis = 0) #tensorKernals = (outputHeight, outputWidth, numberOfKernals) 86 87 def _activation(self, inputTensor): 88 """ 89 Applies the Leaky ReLU activation function to the input tensor. 90 91 Args: 92 inputTensor (ndarray): A 3D array representing the input. 93 94 Returns: 95 ndarray: A tensor with the same shape as the input with Leaky ReLU applied to it. 96 """ 97 alpha = 0.01 98 return np.maximum(inputTensor, inputTensor * alpha) 99 100 def _pooling(self, inputTensor, sizeOfGrid = 2, strideLength = 2, typeOfPooling = "max"): 101 """ 102 Applies pooling (max or average) to reduce the size of the batch of inputs. 103 104 Args: 105 inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width). 106 sizeOfGrid (int, optional): The size of the pooling grid. Default is 2. 107 strideLength (int, optional): The stride length for pooling. Defult is 2. 108 typeOfPadding (int, optional): The type of pooling to apply ('max', 'min', 'global'), Defaults to "max". 109 110 Returns: 111 ndarray: A 3D array of feuture maps with reduced shape. 112 """ 113 if(typeOfPooling.lower()== "global" or typeOfPooling.lower() == "gap"): 114 return self._poolingGlobalAverage(inputTensor) 115 tensorPools = [] 116 117 if(typeOfPooling.lower() == "max"): 118 poolFunc = np.max 119 else: 120 poolFunc = np.mean 121 122 for image in inputTensor: # tensor is a 3d structures, so it is turning it into a 2d array (eg. an layer or image) 123 outputHeight = (image.shape[0] - sizeOfGrid) // strideLength + 1 124 outputWidth = (image.shape[1] - sizeOfGrid) // strideLength + 1 125 output = np.zeros((outputHeight, outputWidth)) 126 for x in range(outputHeight): 127 for y in range(outputWidth): 128 indexX = x * strideLength 129 indexY = y * strideLength 130 gridOfValues = image[indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] 131 output[x, y] = poolFunc(gridOfValues) 132 tensorPools.append(output) 133 return np.array(tensorPools) 134 135 def _poolingGlobalAverage(self, inputTensor): 136 """ 137 Performs global average pooling, reducing each feuture map to a single value. 138 139 Args: 140 inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width). 141 142 Returns: 143 ndarray: A 2D array containing global averages for each feuture map. 144 """ 145 output = np.mean(inputTensor, axis = (1, 2)) 146 return output 147 148 def _flatternTensor(self, inputTensor): 149 """ 150 Flattens a tensor into a 1D array. 151 152 Args: 153 inputTensor (ndarray): A tensor of any shape. 154 155 Returns: 156 ndarray: A 1D array containing every element of the input tensor. 157 """ 158 return np.array(inputTensor).reshape(-1)