Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Machine learning.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "LfOoBXKNUFNa"
},
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.model_selection import StratifiedShuffleSplit\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout\n",
"from keras.optimizers import SGD\n",
"from keras.utils import np_utils\n",
"import matplotlib.pyplot as plt\n",
"import numpy\n",
"from sklearn.metrics import accuracy_score , precision_score , recall_score , classification_report ,plot_confusion_matrix , confusion_matrix , f1_score\n",
"import itertools\n",
"from sklearn.metrics import accuracy_score, log_loss\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.svm import SVC, LinearSVC, NuSVC\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis"
],
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5QUIcDzGURC8",
"outputId": "a5145c53-1160-4cbe-d952-6c647a41d018"
},
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "x-t-FpEcUUWt"
},
"source": [
"train = pd.read_csv('/content/drive/MyDrive/train.csv')\n",
"test = pd.read_csv('/content/drive/MyDrive/test.csv')"
],
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "V_Pan7WvUbq3"
},
"source": [
"def encode(train, test):\n",
" le = LabelEncoder().fit(train.species) \n",
" labels = le.transform(train.species) \n",
" classes = list(le.classes_) \n",
" test_ids = test.id \n",
" train = train.drop(['species', 'id'], axis=1) \n",
" test = test.drop(['id'], axis=1)\n",
" \n",
" return train, labels, test, test_ids, classes\n",
"\n",
"train, labels, test, test_ids, classes = encode(train, test)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "APW7dR10U3jB"
},
"source": [
"sss = StratifiedShuffleSplit(test_size=0.2, random_state=23)\n",
"\n",
"for train_index, test_index in sss.split(train, labels):\n",
" X_train, X_test = train.values[train_index], train.values[test_index]\n",
" y_train, y_test = labels[train_index], labels[test_index]"
],
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vzgy2beNVv0E",
"outputId": "e1faef05-c7f0-4806-cde1-81d9f201740f"
},
"source": [
"classifiers = [\n",
" KNeighborsClassifier(3),\n",
" SVC(kernel=\"rbf\", C=0.025, probability=True),\n",
" NuSVC(probability=True),\n",
" DecisionTreeClassifier(),\n",
" RandomForestClassifier(),\n",
" AdaBoostClassifier(),\n",
" GradientBoostingClassifier(),\n",
" GaussianNB(),\n",
" LinearDiscriminantAnalysis(),\n",
" QuadraticDiscriminantAnalysis()]\n",
"\n",
"\n",
"log_cols=[\"Classifier\", \"Accuracy\", \"Log Loss\"]\n",
"log = pd.DataFrame(columns=log_cols)\n",
"\n",
"for clf in classifiers:\n",
" clf.fit(X_train, y_train)\n",
" name = clf.__class__.__name__\n",
" \n",
" print(\"=\"*30)\n",
" print(name)\n",
" \n",
" print('****Results****')\n",
" train_predictions = clf.predict(X_test)\n",
" acc = accuracy_score(y_test, train_predictions)\n",
" print(\"Accuracy: {:.4%}\".format(acc))\n",
" \n",
" train_predictions = clf.predict_proba(X_test)\n",
" ll = log_loss(y_test, train_predictions)\n",
" print(\"Log Loss: {}\".format(ll))\n",
" \n",
" log_entry = pd.DataFrame([[name, acc*100, ll]], columns=log_cols)\n",
" log = log.append(log_entry)\n",
" \n",
"print(\"=\"*30)"
],
"execution_count": 25,
"outputs": [
{
"output_type": "stream",
"text": [
"==============================\n",
"KNeighborsClassifier\n",
"****Results****\n",
"Accuracy: 88.8889%\n",
"Log Loss: 1.5755075129933762\n",
"==============================\n",
"SVC\n",
"****Results****\n",
"Accuracy: 85.8586%\n",
"Log Loss: 4.689036417340807\n",
"==============================\n",
"NuSVC\n",
"****Results****\n",
"Accuracy: 92.9293%\n",
"Log Loss: 2.3713118438270464\n",
"==============================\n",
"DecisionTreeClassifier\n",
"****Results****\n",
"Accuracy: 60.6061%\n",
"Log Loss: 13.606184640419459\n",
"==============================\n",
"RandomForestClassifier\n",
"****Results****\n",
"Accuracy: 97.4747%\n",
"Log Loss: 0.758327109174288\n",
"==============================\n",
"AdaBoostClassifier\n",
"****Results****\n",
"Accuracy: 4.5455%\n",
"Log Loss: 4.200034383188025\n",
"==============================\n",
"GradientBoostingClassifier\n",
"****Results****\n",
"Accuracy: 58.0808%\n",
"Log Loss: 2.6239614477406663\n",
"==============================\n",
"GaussianNB\n",
"****Results****\n",
"Accuracy: 57.0707%\n",
"Log Loss: 14.827252492813216\n",
"==============================\n",
"LinearDiscriminantAnalysis\n",
"****Results****\n",
"Accuracy: 97.9798%\n",
"Log Loss: 0.22993448213653384\n",
"==============================\n",
"QuadraticDiscriminantAnalysis\n",
"****Results****\n",
"Accuracy: 1.5152%\n",
"Log Loss: 34.01546160104849\n",
"==============================\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/sklearn/discriminant_analysis.py:691: UserWarning: Variables are collinear\n",
" warnings.warn(\"Variables are collinear\")\n"
],
"name": "stderr"
}
]
}
]
}