quacknet.convulationalFeutures

  1import numpy as np
  2import math
  3
  4'''
  5Covulutional neural network:
  6Kernels:
  7-is a cube (eg. 2 x 2 x 2) that cotains weights
  8-it moves across the image starting from the left to the right, with each step being a stride (eg. 2)
  9-once the cube reaches the very right it goes to the row underneath and starts at the very left pixel of that row (so no bouncing back and forth)
 10-the cube calculates the dot product of its weight and the pixel values in the image under the cube for all layers
 11-when the cube is moving it may not be over the image so you add padding
 12-the padding can be either 1 or 0, or you can ignore thos values which means the image will get smaller
 13
 14Activation:
 15-goes through all the data that the last layer made and uses either relu or leaky relu
 16
 17Pooling:
 18-is a grid (eg. 2 x 2) that moves the same as kernels
 19-however the grid can either be max pooling or average pooling
 20-max pooling gets the highest value in the grid of the image whilst average gets the average
 21
 22Neural Network:
 23-flattens the tensors and inputs into a neural network
 24'''
 25
 26class ConvulationalNetwork:
 27    def _padImage(self, inputTensor, kernalSize, strideLength, typeOfPadding): #pads image
 28        """
 29        Pads each image in the input tensor.
 30
 31        Args:
 32            inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width).
 33            kernalSize (int): The size of the covolution kernel (assumed it is a square).
 34            strideLength (int): The stride length for convolution.
 35            typeOfPadding (int): The value used for padding the images.
 36        
 37        Returns:
 38            ndarray: A 3D array of padded images.
 39        """
 40        paddingTensor = []
 41        for image in inputTensor:
 42            paddingSize = math.ceil(((strideLength - 1) * len(image) - strideLength + kernalSize) / 2)
 43            padding = np.full((image.shape[0] + paddingSize * 2, image.shape[1] + paddingSize * 2), typeOfPadding) #creates an 2d numpy array of size paddingSize x paddingSize
 44            padding[paddingSize: paddingSize + image.shape[0], paddingSize: paddingSize + image.shape[1]] = image
 45            paddingTensor.append(padding)
 46        return np.array(paddingTensor)
 47
 48    def _kernalisation(self, inputTensor, kernalsWeights, kernalsBiases, sizeOfGrid = 2, usePadding = True, typeOfPadding= 0, strideLength = 2):
 49        """
 50        Performs the convolution operation on the input tensor.
 51
 52        Args:
 53            inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width).
 54            kernalsWeights (ndarray): A 4D array containing weights of the convolution kernels.
 55            kernalsWeights (ndarray): A 1D array containing biases for each kernels.
 56            sizeOfGrid (int, optional): The size of the covolution grid. Default is 2.
 57            usePadding (bool, optional): Whether to pad the input images. Default is True.
 58            typeOfPadding (int, optional): The value used for padding. Defaults to 0.
 59            strideLength (int): The stride length for convolution. Defaults to 2.
 60
 61        Returns:
 62            ndarray: A 3D array of feuture maps with shape.
 63        """
 64        tensorKernals = []
 65        if(usePadding == True):
 66            imageTensor = self._padImage(inputTensor, sizeOfGrid, strideLength, typeOfPadding)
 67        else:
 68            imageTensor = inputTensor
 69        outputHeight = (imageTensor.shape[1] - sizeOfGrid) // strideLength + 1
 70        outputWidth = (imageTensor.shape[2] - sizeOfGrid) // strideLength + 1
 71        for i in range(len(kernalsWeights)):
 72            output = np.zeros((outputHeight, outputWidth))
 73            kernal = kernalsWeights[i]
 74            biases = kernalsBiases[i]
 75            for x in range(outputHeight):
 76                indexX = x * strideLength
 77                for y in range(outputWidth):
 78                    indexY = y * strideLength
 79                    gridOfValues = imageTensor[:, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] # 2d grid
 80                    dotProduct = np.sum(gridOfValues * kernal) 
 81                    output[x, y] = dotProduct + biases
 82                    
 83            tensorKernals.append(output)
 84        return np.stack(tensorKernals, axis = 0) #tensorKernals = (outputHeight, outputWidth, numberOfKernals)
 85                    
 86    def _activation(self, inputTensor):
 87        """
 88        Applies the Leaky ReLU activation function to the input tensor.
 89
 90        Args:
 91            inputTensor (ndarray): A 3D array representing the input.
 92        
 93        Returns:
 94            ndarray: A tensor with the same shape as the input with Leaky ReLU applied to it.
 95        """
 96        alpha = 0.01
 97        return np.maximum(inputTensor, inputTensor * alpha)
 98
 99    def _pooling(self, inputTensor, sizeOfGrid = 2, strideLength = 2, typeOfPooling = "max"):
100        """
101        Applies pooling (max or average) to reduce the size of the batch of inputs.
102
103        Args:
104            inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width).
105            sizeOfGrid (int, optional): The size of the pooling grid. Default is 2.
106            strideLength (int, optional): The stride length for pooling. Defult is 2.
107            typeOfPadding (int, optional): The type of pooling to apply ('max', 'min', 'global'), Defaults to "max".
108
109        Returns:
110            ndarray: A 3D array of feuture maps with reduced shape.
111        """
112        if(typeOfPooling.lower()== "global" or typeOfPooling.lower() == "gap"):
113            return self._poolingGlobalAverage(inputTensor)
114        tensorPools = []
115
116        if(typeOfPooling.lower() == "max"):
117            poolFunc = np.max
118        else:
119            poolFunc = np.mean
120
121        for image in inputTensor: # tensor is a 3d structures, so it is turning it into a 2d array (eg. an layer or image)
122            outputHeight = (image.shape[0] - sizeOfGrid) // strideLength + 1
123            outputWidth = (image.shape[1] - sizeOfGrid) // strideLength + 1
124            output = np.zeros((outputHeight, outputWidth))
125            for x in range(outputHeight):
126                for y in range(outputWidth):
127                    indexX = x * strideLength
128                    indexY = y * strideLength
129                    gridOfValues = image[indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid]
130                    output[x, y] = poolFunc(gridOfValues)
131            tensorPools.append(output)
132        return np.array(tensorPools)
133    
134    def _poolingGlobalAverage(self, inputTensor):
135        """
136        Performs global average pooling, reducing each feuture map to a single value.
137
138        Args:
139            inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width).
140        
141        Returns:
142            ndarray: A 2D array containing global averages for each feuture map.
143        """
144        output = np.mean(inputTensor, axis = (1, 2))
145        return output
146
147    def _flatternTensor(self, inputTensor):
148        """
149        Flattens a tensor into a 1D array.
150
151        Args:
152            inputTensor (ndarray): A tensor of any shape.
153        
154        Returns:
155            ndarray: A 1D array containing every element of the input tensor.
156        """
157        return np.array(inputTensor).reshape(-1)
class ConvulationalNetwork:
 27class ConvulationalNetwork:
 28    def _padImage(self, inputTensor, kernalSize, strideLength, typeOfPadding): #pads image
 29        """
 30        Pads each image in the input tensor.
 31
 32        Args:
 33            inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width).
 34            kernalSize (int): The size of the covolution kernel (assumed it is a square).
 35            strideLength (int): The stride length for convolution.
 36            typeOfPadding (int): The value used for padding the images.
 37        
 38        Returns:
 39            ndarray: A 3D array of padded images.
 40        """
 41        paddingTensor = []
 42        for image in inputTensor:
 43            paddingSize = math.ceil(((strideLength - 1) * len(image) - strideLength + kernalSize) / 2)
 44            padding = np.full((image.shape[0] + paddingSize * 2, image.shape[1] + paddingSize * 2), typeOfPadding) #creates an 2d numpy array of size paddingSize x paddingSize
 45            padding[paddingSize: paddingSize + image.shape[0], paddingSize: paddingSize + image.shape[1]] = image
 46            paddingTensor.append(padding)
 47        return np.array(paddingTensor)
 48
 49    def _kernalisation(self, inputTensor, kernalsWeights, kernalsBiases, sizeOfGrid = 2, usePadding = True, typeOfPadding= 0, strideLength = 2):
 50        """
 51        Performs the convolution operation on the input tensor.
 52
 53        Args:
 54            inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width).
 55            kernalsWeights (ndarray): A 4D array containing weights of the convolution kernels.
 56            kernalsWeights (ndarray): A 1D array containing biases for each kernels.
 57            sizeOfGrid (int, optional): The size of the covolution grid. Default is 2.
 58            usePadding (bool, optional): Whether to pad the input images. Default is True.
 59            typeOfPadding (int, optional): The value used for padding. Defaults to 0.
 60            strideLength (int): The stride length for convolution. Defaults to 2.
 61
 62        Returns:
 63            ndarray: A 3D array of feuture maps with shape.
 64        """
 65        tensorKernals = []
 66        if(usePadding == True):
 67            imageTensor = self._padImage(inputTensor, sizeOfGrid, strideLength, typeOfPadding)
 68        else:
 69            imageTensor = inputTensor
 70        outputHeight = (imageTensor.shape[1] - sizeOfGrid) // strideLength + 1
 71        outputWidth = (imageTensor.shape[2] - sizeOfGrid) // strideLength + 1
 72        for i in range(len(kernalsWeights)):
 73            output = np.zeros((outputHeight, outputWidth))
 74            kernal = kernalsWeights[i]
 75            biases = kernalsBiases[i]
 76            for x in range(outputHeight):
 77                indexX = x * strideLength
 78                for y in range(outputWidth):
 79                    indexY = y * strideLength
 80                    gridOfValues = imageTensor[:, indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid] # 2d grid
 81                    dotProduct = np.sum(gridOfValues * kernal) 
 82                    output[x, y] = dotProduct + biases
 83                    
 84            tensorKernals.append(output)
 85        return np.stack(tensorKernals, axis = 0) #tensorKernals = (outputHeight, outputWidth, numberOfKernals)
 86                    
 87    def _activation(self, inputTensor):
 88        """
 89        Applies the Leaky ReLU activation function to the input tensor.
 90
 91        Args:
 92            inputTensor (ndarray): A 3D array representing the input.
 93        
 94        Returns:
 95            ndarray: A tensor with the same shape as the input with Leaky ReLU applied to it.
 96        """
 97        alpha = 0.01
 98        return np.maximum(inputTensor, inputTensor * alpha)
 99
100    def _pooling(self, inputTensor, sizeOfGrid = 2, strideLength = 2, typeOfPooling = "max"):
101        """
102        Applies pooling (max or average) to reduce the size of the batch of inputs.
103
104        Args:
105            inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width).
106            sizeOfGrid (int, optional): The size of the pooling grid. Default is 2.
107            strideLength (int, optional): The stride length for pooling. Defult is 2.
108            typeOfPadding (int, optional): The type of pooling to apply ('max', 'min', 'global'), Defaults to "max".
109
110        Returns:
111            ndarray: A 3D array of feuture maps with reduced shape.
112        """
113        if(typeOfPooling.lower()== "global" or typeOfPooling.lower() == "gap"):
114            return self._poolingGlobalAverage(inputTensor)
115        tensorPools = []
116
117        if(typeOfPooling.lower() == "max"):
118            poolFunc = np.max
119        else:
120            poolFunc = np.mean
121
122        for image in inputTensor: # tensor is a 3d structures, so it is turning it into a 2d array (eg. an layer or image)
123            outputHeight = (image.shape[0] - sizeOfGrid) // strideLength + 1
124            outputWidth = (image.shape[1] - sizeOfGrid) // strideLength + 1
125            output = np.zeros((outputHeight, outputWidth))
126            for x in range(outputHeight):
127                for y in range(outputWidth):
128                    indexX = x * strideLength
129                    indexY = y * strideLength
130                    gridOfValues = image[indexX: indexX + sizeOfGrid, indexY: indexY + sizeOfGrid]
131                    output[x, y] = poolFunc(gridOfValues)
132            tensorPools.append(output)
133        return np.array(tensorPools)
134    
135    def _poolingGlobalAverage(self, inputTensor):
136        """
137        Performs global average pooling, reducing each feuture map to a single value.
138
139        Args:
140            inputTensor (ndarray): A 3D array representing the images with shape (number of images, height, width).
141        
142        Returns:
143            ndarray: A 2D array containing global averages for each feuture map.
144        """
145        output = np.mean(inputTensor, axis = (1, 2))
146        return output
147
148    def _flatternTensor(self, inputTensor):
149        """
150        Flattens a tensor into a 1D array.
151
152        Args:
153            inputTensor (ndarray): A tensor of any shape.
154        
155        Returns:
156            ndarray: A 1D array containing every element of the input tensor.
157        """
158        return np.array(inputTensor).reshape(-1)