mnist1.py

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer
import scipy
from PIL import Image
import pandas as pd
import cv2
from scipy import ndimage
from sklearn.model_selection import train_test_split
import seaborn as sns
from keras import layers,models, backend
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import Dense,Conv2D, MaxPooling2D, Flatten, Dropout, MaxPooling2D
from keras.optimizers import adam
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import RepeatedStratifiedKFold

from tensorflow import keras
from keras.models import Model, Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import TensorBoard

'''Getting info of original datasets (train and test)'''
def introduce_dataset():
    #assign alphabet labels if needed
    train_df=pd.read_csv("/home/afifamushtaq/Desktop/Last_term/ANN/MNIST/sign_mnist_train/sign_mnist_train.csv")
    test_df=pd.read_csv("/home/afifamushtaq/Desktop/Last_term/ANN/MNIST/sign_mnist_test/sign_mnist_test.csv")
    print (f"Train df info is\n{train_df} ")
    print (f"Test df info is\n{test_df} ")
    join_dataset(train_df, test_df)


'''Concatenating datatset to split my own and convert to array'''
def join_dataset(train_df, test_df):
    df = pd.concat([test_df, train_df])
    print (f"Full df info is\n{df} ")

    df_data=np.array(df) #convert to array and to float 32
    print(f"df data is:{df_data}")        #check what df_data holds
    m,n=df_data.shape                     #get shape
    print (f"Size of data in the form (m,n) is:{m,n}")      #print shape
    print (f"Size of m is:{m}")
    np.random.shuffle(df_data)            #shuffle the data

    get_labels(df,df_data)

'''get unique values (0:24 except 9) for alpahbets'''
def get_labels(df,df_data):
    labels= df['label'].values
    print(f"labels for alphabets are {labels}")
    unique_val=np.array(labels)
    array=np.unique(unique_val)
    print('The array + ', array)
    preprocessing_data(df, df_data, labels, array)
    #plot_figures(labels)

'''visualise how many of each value there are: fairly even dataset'''
def plot_figures(labels):
    plt.figure(figsize=(18,8))
    sns.countplot(x=labels)
    plt.show()

# '''dropping label column'''
def preprocessing_data(df, df_data, labels, array):
    df = df.drop("label", axis='columns')   #dropping label column
    print (f"Train df info is\n{df} ")
    flattening_images(df, df_data, labels, array)


def flattening_images(df, df_data, labels, array):
    images=df.values
    images=np.array([np.reshape(i,(28,28)) for i in images])   #reshape all images
    images=np.array([i.flatten() for i in images])             #flatten and stack on each other
    print(images)   #add later
    encode_labels(df_data, labels, images, array)

'''encode labels since it is categorical data'''
def encode_labels(df_data, labels, images, array):
    label_binarizer=LabelBinarizer()
    labels=label_binarizer.fit_transform(labels)
    print (f"The number of classes is {len(labels[0])}")
    print(f"Example of array of encoded labels\n{labels}")
    ouput_image(df_data, labels, images, array)

'''visualise image from each class after flattening and resizing'''
def ouput_image(df_data, labels, images, array):
    for i in array:
#        print(labels[i])
        plt.imshow(images[i].reshape(28,28))
#        plt.show()        #visualise all images, uncomment during submission
  #  CNN_Manual(df_data,labels)

    CNN_Manual(df_data, labels)


def CNN_Manual(df_data,labels):

    def split_dataset(df_data,labels):
        label_binarizer=LabelBinarizer()
        m, n=df_data.shape

        #arrange test data
        test_data=df_data[0:1000].T
        X_test=test_data[1:n]
        Y_test=test_data[0]
        X_test = X_test / 255

        print(f"New Xtest is \n{X_test}")
        print(f"New Ytest is \n{Y_test}")
        print(f"Test data shape is{test_data.shape}")

        #arrange y data
        train_data=df_data[1000:m].T    #transpose, take all rest of rows from 1000

        X_train=train_data[1:n]
        Y_train=train_data[0]   #convert values from 0:1 and make as y_test
        X_train = X_train / 255

        print(f"New Xtrain is \n{X_train}")
        print(f"New Ytrain is \n{Y_train}")
        print(f"X train shape is \n{X_train[:,0].shape}")
        print(f"Train data shape is {train_data.shape}")

        W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 500, m, X_test, Y_test)

    '''Initialising params'''
    def init_params():
        W1 = np.random.rand(25, 784) - 0.5 #dimensions of array (to get from -0.5 to 0.5)
        b1 = np.random.rand(25, 1) - 0.5
        W2= np.random.rand(25, 25) - 0.5
        b2 = np.random.rand(25, 1) - 0.5
        return W1, b1, W2, b2

    def ReLU(Z):
        return np.maximum(Z,0)  #go through each element in Z. Z > 0 = Z. else 0

    def softmax(Z):
        A= np.exp(Z)/ sum(np.exp(Z))  #applied exp(Z) to every value, sum takes all columns and makes row to 1 (exp)
        return A

    def forward_prop(W1, b1, W2, b2, X):
        Z1= W1.dot(X) + b1
        A1= ReLU(Z1)
        Z2=W2.dot(A1) + b2
        A2=softmax(Z2)
        return Z1, A1, Z2, A2

    def ReLU_deriv(Z):
        return Z>0

    def one_hot(Y):
        one_hot_Y = np.zeros((Y.size, Y.max() + 1))
        one_hot_Y[np.arange(Y.size), Y] = 1
        one_hot_Y = one_hot_Y.T
        return one_hot_Y
                                    #sum for each column and divide each column by that sum; gives probability

    def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y, m):
        one_hot_Y = one_hot(Y)
        dZ2 = A2 - one_hot_Y
        dW2 = 1 / m * dZ2.dot(A1.T)
        db2 = 1 / m * np.sum(dZ2)
        dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
        dW1 = 1 / m * dZ1.dot(X.T)
        db1 = 1 / m * np.sum(dZ1)
        return dW1, db1, dW2, db2

    def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
        W1= W1 -alpha * dW1
        b1=b1-alpha *db1
        W2= W2 -alpha * dW2
        b2=b2-alpha *db2
        return W1, b1, W2, b2

    def get_predictions(A2):
        return np.argmax(A2, 0)

    def get_accuracy(predictions, Y):
        print(predictions, Y)
        return np.sum(predictions == Y) / Y.size

    def gradient_descent(X, Y, alpha, iterations, m, X_test, Y_test):
        W1, b1, W2, b2 = init_params()

        for i in range(iterations):
            Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
            dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y, m)
            W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)

            if i % 10 == 0:
                print("Iteration: ", i)     #print every 10th iteration

                predictions = get_predictions(A2)
                Accuracy= get_accuracy(predictions, Y)
                print(f"Training accuracy is : {Accuracy}")
        dothis(X, Y,X_test, Y_test, W1, b1, W2, b2)
        #testing_on_test_set(X_test, Y_test, W1, b1, W2, b2)


    def make_predictions(X, W1, b1, W2, b2):
        _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
        predictions = get_predictions(A2)
        return predictions


    def testing_on_test_set(X_test, Y_test, W1, b1, W2, b2):
        test_predictions = make_predictions(X_test, W1, b1, W2, b2)
        Accuracy_test=get_accuracy(test_predictions, Y_test)
        print(f"Accuracy test is {Accuracy_test}")

    def test_prediction(X_train, Y_train, index, W1, b1, W2, b2):
        current_image = X_train[:, index, None]
        prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
        label = Y_train[index]
        print("Prediction: ", prediction)
        print("Label: ", label)

        current_image = current_image.reshape((28, 28)) * 255
        plt.gray()
        plt.imshow(current_image, interpolation='nearest')
        plt.show()

    def test_examples(X_train, Y_train,X_test, Y_test, W1, b1, W2, b2):
        pred1= test_prediction(X_train, Y_train, 0, W1, b1, W2, b2)
        pred2=test_prediction(X_train, Y_train,1, W1, b1, W2, b2)
        pred3=test_prediction(X_train, Y_train,2, W1, b1, W2, b2)
        pred4=test_prediction(X_train, Y_train,3, W1, b1, W2, b2)
        return pred1, pred2, pred3, pred4

    def dothis(X_train, Y_train, X_test, Y_test, W1, b1, W2, b2):
        pred1, pred2, pred3, pred4 = test_examples(X_train, Y_train,X_test, Y_test, W1, b1, W2, b2)
        Accuracy=testing_on_test_set(X_test, Y_test, W1, b1, W2, b2)

    split_dataset(df_data,labels)


if __name__ == "__main__":

     introduce_dataset()
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.preprocessing import LabelBinarizer
	import scipy
	from PIL import Image
	import pandas as pd
	import cv2
	from scipy import ndimage
	from sklearn.model_selection import train_test_split
	import seaborn as sns
	from keras import layers,models, backend
	from tensorflow.python.keras import Sequential
	from tensorflow.python.keras.layers import Dense,Conv2D, MaxPooling2D, Flatten, Dropout, MaxPooling2D
	from keras.optimizers import adam
	from sklearn.model_selection import ShuffleSplit
	from sklearn.model_selection import cross_val_score
	from sklearn.model_selection import KFold
	from sklearn.model_selection import RepeatedStratifiedKFold

	from tensorflow import keras
	from keras.models import Model, Sequential
	from keras.layers import Dense, LSTM
	from keras.callbacks import TensorBoard

	'''Getting info of original datasets (train and test)'''
	def introduce_dataset():
	#assign alphabet labels if needed
	train_df=pd.read_csv("/home/afifamushtaq/Desktop/Last_term/ANN/MNIST/sign_mnist_train/sign_mnist_train.csv")
	test_df=pd.read_csv("/home/afifamushtaq/Desktop/Last_term/ANN/MNIST/sign_mnist_test/sign_mnist_test.csv")
	print (f"Train df info is\n{train_df} ")
	print (f"Test df info is\n{test_df} ")
	join_dataset(train_df, test_df)


	'''Concatenating datatset to split my own and convert to array'''
	def join_dataset(train_df, test_df):
	df = pd.concat([test_df, train_df])
	print (f"Full df info is\n{df} ")

	df_data=np.array(df) #convert to array and to float 32
	print(f"df data is:{df_data}") #check what df_data holds
	m,n=df_data.shape #get shape
	print (f"Size of data in the form (m,n) is:{m,n}") #print shape
	print (f"Size of m is:{m}")
	np.random.shuffle(df_data) #shuffle the data

	get_labels(df,df_data)

	'''get unique values (0:24 except 9) for alpahbets'''
	def get_labels(df,df_data):
	labels= df['label'].values
	print(f"labels for alphabets are {labels}")
	unique_val=np.array(labels)
	array=np.unique(unique_val)
	print('The array + ', array)
	preprocessing_data(df, df_data, labels, array)
	#plot_figures(labels)

	'''visualise how many of each value there are: fairly even dataset'''
	def plot_figures(labels):
	plt.figure(figsize=(18,8))
	sns.countplot(x=labels)
	plt.show()

	# '''dropping label column'''
	def preprocessing_data(df, df_data, labels, array):
	df = df.drop("label", axis='columns') #dropping label column
	print (f"Train df info is\n{df} ")
	flattening_images(df, df_data, labels, array)


	def flattening_images(df, df_data, labels, array):
	images=df.values
	images=np.array([np.reshape(i,(28,28)) for i in images]) #reshape all images
	images=np.array([i.flatten() for i in images]) #flatten and stack on each other
	print(images) #add later
	encode_labels(df_data, labels, images, array)

	'''encode labels since it is categorical data'''
	def encode_labels(df_data, labels, images, array):
	label_binarizer=LabelBinarizer()
	labels=label_binarizer.fit_transform(labels)
	print (f"The number of classes is {len(labels[0])}")
	print(f"Example of array of encoded labels\n{labels}")
	ouput_image(df_data, labels, images, array)

	'''visualise image from each class after flattening and resizing'''
	def ouput_image(df_data, labels, images, array):
	for i in array:
	# print(labels[i])
	plt.imshow(images[i].reshape(28,28))
	# plt.show() #visualise all images, uncomment during submission
	# CNN_Manual(df_data,labels)

	CNN_Manual(df_data, labels)



	def CNN_Manual(df_data,labels):

	def split_dataset(df_data,labels):
	label_binarizer=LabelBinarizer()
	m, n=df_data.shape

	#arrange test data
	test_data=df_data[0:1000].T
	X_test=test_data[1:n]
	Y_test=test_data[0]
	X_test = X_test / 255

	print(f"New Xtest is \n{X_test}")
	print(f"New Ytest is \n{Y_test}")
	print(f"Test data shape is{test_data.shape}")

	#arrange y data
	train_data=df_data[1000:m].T #transpose, take all rest of rows from 1000

	X_train=train_data[1:n]
	Y_train=train_data[0] #convert values from 0:1 and make as y_test
	X_train = X_train / 255

	print(f"New Xtrain is \n{X_train}")
	print(f"New Ytrain is \n{Y_train}")
	print(f"X train shape is \n{X_train[:,0].shape}")
	print(f"Train data shape is {train_data.shape}")

	W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 500, m, X_test, Y_test)

	'''Initialising params'''
	def init_params():
	W1 = np.random.rand(25, 784) - 0.5 #dimensions of array (to get from -0.5 to 0.5)
	b1 = np.random.rand(25, 1) - 0.5
	W2= np.random.rand(25, 25) - 0.5
	b2 = np.random.rand(25, 1) - 0.5
	return W1, b1, W2, b2

	def ReLU(Z):
	return np.maximum(Z,0) #go through each element in Z. Z > 0 = Z. else 0

	def softmax(Z):
	A= np.exp(Z)/ sum(np.exp(Z)) #applied exp(Z) to every value, sum takes all columns and makes row to 1 (exp)
	return A

	def forward_prop(W1, b1, W2, b2, X):
	Z1= W1.dot(X) + b1
	A1= ReLU(Z1)
	Z2=W2.dot(A1) + b2
	A2=softmax(Z2)
	return Z1, A1, Z2, A2

	def ReLU_deriv(Z):
	return Z>0

	def one_hot(Y):
	one_hot_Y = np.zeros((Y.size, Y.max() + 1))
	one_hot_Y[np.arange(Y.size), Y] = 1
	one_hot_Y = one_hot_Y.T
	return one_hot_Y
	#sum for each column and divide each column by that sum; gives probability

	def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y, m):
	one_hot_Y = one_hot(Y)
	dZ2 = A2 - one_hot_Y
	dW2 = 1 / m * dZ2.dot(A1.T)
	db2 = 1 / m * np.sum(dZ2)
	dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
	dW1 = 1 / m * dZ1.dot(X.T)
	db1 = 1 / m * np.sum(dZ1)
	return dW1, db1, dW2, db2

	def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
	W1= W1 -alpha * dW1
	b1=b1-alpha *db1
	W2= W2 -alpha * dW2
	b2=b2-alpha *db2
	return W1, b1, W2, b2

	def get_predictions(A2):
	return np.argmax(A2, 0)

	def get_accuracy(predictions, Y):
	print(predictions, Y)
	return np.sum(predictions == Y) / Y.size

	def gradient_descent(X, Y, alpha, iterations, m, X_test, Y_test):
	W1, b1, W2, b2 = init_params()

	for i in range(iterations):
	Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
	dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y, m)
	W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)

	if i % 10 == 0:
	print("Iteration: ", i) #print every 10th iteration

	predictions = get_predictions(A2)
	Accuracy= get_accuracy(predictions, Y)
	print(f"Training accuracy is : {Accuracy}")
	dothis(X, Y,X_test, Y_test, W1, b1, W2, b2)
	#testing_on_test_set(X_test, Y_test, W1, b1, W2, b2)



	def make_predictions(X, W1, b1, W2, b2):
	_, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
	predictions = get_predictions(A2)
	return predictions


	def testing_on_test_set(X_test, Y_test, W1, b1, W2, b2):
	test_predictions = make_predictions(X_test, W1, b1, W2, b2)
	Accuracy_test=get_accuracy(test_predictions, Y_test)
	print(f"Accuracy test is {Accuracy_test}")

	def test_prediction(X_train, Y_train, index, W1, b1, W2, b2):
	current_image = X_train[:, index, None]
	prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
	label = Y_train[index]
	print("Prediction: ", prediction)
	print("Label: ", label)

	current_image = current_image.reshape((28, 28)) * 255
	plt.gray()
	plt.imshow(current_image, interpolation='nearest')
	plt.show()

	def test_examples(X_train, Y_train,X_test, Y_test, W1, b1, W2, b2):
	pred1= test_prediction(X_train, Y_train, 0, W1, b1, W2, b2)
	pred2=test_prediction(X_train, Y_train,1, W1, b1, W2, b2)
	pred3=test_prediction(X_train, Y_train,2, W1, b1, W2, b2)
	pred4=test_prediction(X_train, Y_train,3, W1, b1, W2, b2)
	return pred1, pred2, pred3, pred4

	def dothis(X_train, Y_train, X_test, Y_test, W1, b1, W2, b2):
	pred1, pred2, pred3, pred4 = test_examples(X_train, Y_train,X_test, Y_test, W1, b1, W2, b2)
	Accuracy=testing_on_test_set(X_test, Y_test, W1, b1, W2, b2)

	split_dataset(df_data,labels)





	if __name__ == "__main__":

	introduce_dataset()