Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Deep-neural-network-for-plant-classification/Machine_learning.ipynb
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
239 lines (239 sloc)
8.1 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Machine learning.ipynb", | |
"provenance": [], | |
"collapsed_sections": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "LfOoBXKNUFNa" | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from sklearn.preprocessing import LabelEncoder\n", | |
"from sklearn.preprocessing import StandardScaler\n", | |
"from sklearn.model_selection import StratifiedShuffleSplit\n", | |
"from keras.models import Sequential\n", | |
"from keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout\n", | |
"from keras.optimizers import SGD\n", | |
"from keras.utils import np_utils\n", | |
"import matplotlib.pyplot as plt\n", | |
"import numpy\n", | |
"from sklearn.metrics import accuracy_score , precision_score , recall_score , classification_report ,plot_confusion_matrix , confusion_matrix , f1_score\n", | |
"import itertools\n", | |
"from sklearn.metrics import accuracy_score, log_loss\n", | |
"from sklearn.neighbors import KNeighborsClassifier\n", | |
"from sklearn.svm import SVC, LinearSVC, NuSVC\n", | |
"from sklearn.tree import DecisionTreeClassifier\n", | |
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n", | |
"from sklearn.naive_bayes import GaussianNB\n", | |
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", | |
"from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis" | |
], | |
"execution_count": 20, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "5QUIcDzGURC8", | |
"outputId": "a5145c53-1160-4cbe-d952-6c647a41d018" | |
}, | |
"source": [ | |
"from google.colab import drive\n", | |
"drive.mount('/content/drive')" | |
], | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "x-t-FpEcUUWt" | |
}, | |
"source": [ | |
"train = pd.read_csv('/content/drive/MyDrive/train.csv')\n", | |
"test = pd.read_csv('/content/drive/MyDrive/test.csv')" | |
], | |
"execution_count": 22, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "V_Pan7WvUbq3" | |
}, | |
"source": [ | |
"def encode(train, test):\n", | |
" le = LabelEncoder().fit(train.species) \n", | |
" labels = le.transform(train.species) \n", | |
" classes = list(le.classes_) \n", | |
" test_ids = test.id \n", | |
" train = train.drop(['species', 'id'], axis=1) \n", | |
" test = test.drop(['id'], axis=1)\n", | |
" \n", | |
" return train, labels, test, test_ids, classes\n", | |
"\n", | |
"train, labels, test, test_ids, classes = encode(train, test)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "APW7dR10U3jB" | |
}, | |
"source": [ | |
"sss = StratifiedShuffleSplit(test_size=0.2, random_state=23)\n", | |
"\n", | |
"for train_index, test_index in sss.split(train, labels):\n", | |
" X_train, X_test = train.values[train_index], train.values[test_index]\n", | |
" y_train, y_test = labels[train_index], labels[test_index]" | |
], | |
"execution_count": 24, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "vzgy2beNVv0E", | |
"outputId": "e1faef05-c7f0-4806-cde1-81d9f201740f" | |
}, | |
"source": [ | |
"classifiers = [\n", | |
" KNeighborsClassifier(3),\n", | |
" SVC(kernel=\"rbf\", C=0.025, probability=True),\n", | |
" NuSVC(probability=True),\n", | |
" DecisionTreeClassifier(),\n", | |
" RandomForestClassifier(),\n", | |
" AdaBoostClassifier(),\n", | |
" GradientBoostingClassifier(),\n", | |
" GaussianNB(),\n", | |
" LinearDiscriminantAnalysis(),\n", | |
" QuadraticDiscriminantAnalysis()]\n", | |
"\n", | |
"\n", | |
"log_cols=[\"Classifier\", \"Accuracy\", \"Log Loss\"]\n", | |
"log = pd.DataFrame(columns=log_cols)\n", | |
"\n", | |
"for clf in classifiers:\n", | |
" clf.fit(X_train, y_train)\n", | |
" name = clf.__class__.__name__\n", | |
" \n", | |
" print(\"=\"*30)\n", | |
" print(name)\n", | |
" \n", | |
" print('****Results****')\n", | |
" train_predictions = clf.predict(X_test)\n", | |
" acc = accuracy_score(y_test, train_predictions)\n", | |
" print(\"Accuracy: {:.4%}\".format(acc))\n", | |
" \n", | |
" train_predictions = clf.predict_proba(X_test)\n", | |
" ll = log_loss(y_test, train_predictions)\n", | |
" print(\"Log Loss: {}\".format(ll))\n", | |
" \n", | |
" log_entry = pd.DataFrame([[name, acc*100, ll]], columns=log_cols)\n", | |
" log = log.append(log_entry)\n", | |
" \n", | |
"print(\"=\"*30)" | |
], | |
"execution_count": 25, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"==============================\n", | |
"KNeighborsClassifier\n", | |
"****Results****\n", | |
"Accuracy: 88.8889%\n", | |
"Log Loss: 1.5755075129933762\n", | |
"==============================\n", | |
"SVC\n", | |
"****Results****\n", | |
"Accuracy: 85.8586%\n", | |
"Log Loss: 4.689036417340807\n", | |
"==============================\n", | |
"NuSVC\n", | |
"****Results****\n", | |
"Accuracy: 92.9293%\n", | |
"Log Loss: 2.3713118438270464\n", | |
"==============================\n", | |
"DecisionTreeClassifier\n", | |
"****Results****\n", | |
"Accuracy: 60.6061%\n", | |
"Log Loss: 13.606184640419459\n", | |
"==============================\n", | |
"RandomForestClassifier\n", | |
"****Results****\n", | |
"Accuracy: 97.4747%\n", | |
"Log Loss: 0.758327109174288\n", | |
"==============================\n", | |
"AdaBoostClassifier\n", | |
"****Results****\n", | |
"Accuracy: 4.5455%\n", | |
"Log Loss: 4.200034383188025\n", | |
"==============================\n", | |
"GradientBoostingClassifier\n", | |
"****Results****\n", | |
"Accuracy: 58.0808%\n", | |
"Log Loss: 2.6239614477406663\n", | |
"==============================\n", | |
"GaussianNB\n", | |
"****Results****\n", | |
"Accuracy: 57.0707%\n", | |
"Log Loss: 14.827252492813216\n", | |
"==============================\n", | |
"LinearDiscriminantAnalysis\n", | |
"****Results****\n", | |
"Accuracy: 97.9798%\n", | |
"Log Loss: 0.22993448213653384\n", | |
"==============================\n", | |
"QuadraticDiscriminantAnalysis\n", | |
"****Results****\n", | |
"Accuracy: 1.5152%\n", | |
"Log Loss: 34.01546160104849\n", | |
"==============================\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/sklearn/discriminant_analysis.py:691: UserWarning: Variables are collinear\n", | |
" warnings.warn(\"Variables are collinear\")\n" | |
], | |
"name": "stderr" | |
} | |
] | |
} | |
] | |
} |