Skip to content
Permalink
d1bf7f4f4f
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
1342 lines (1341 sloc) 224 KB
{
"cells": [
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"#import libraries\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>diagnosis</th>\n",
" <th>radius_mean</th>\n",
" <th>texture_mean</th>\n",
" <th>perimeter_mean</th>\n",
" <th>area_mean</th>\n",
" <th>smoothness_mean</th>\n",
" <th>compactness_mean</th>\n",
" <th>concavity_mean</th>\n",
" <th>concave points_mean</th>\n",
" <th>...</th>\n",
" <th>texture_worst</th>\n",
" <th>perimeter_worst</th>\n",
" <th>area_worst</th>\n",
" <th>smoothness_worst</th>\n",
" <th>compactness_worst</th>\n",
" <th>concavity_worst</th>\n",
" <th>concave points_worst</th>\n",
" <th>symmetry_worst</th>\n",
" <th>fractal_dimension_worst</th>\n",
" <th>Unnamed: 32</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>842302</td>\n",
" <td>M</td>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>...</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>842517</td>\n",
" <td>M</td>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>...</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>84300903</td>\n",
" <td>M</td>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>...</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>84348301</td>\n",
" <td>M</td>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>...</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>84358402</td>\n",
" <td>M</td>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>...</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 33 columns</p>\n",
"</div>"
],
"text/plain": [
" id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
"0 842302 M 17.99 10.38 122.80 1001.0 \n",
"1 842517 M 20.57 17.77 132.90 1326.0 \n",
"2 84300903 M 19.69 21.25 130.00 1203.0 \n",
"3 84348301 M 11.42 20.38 77.58 386.1 \n",
"4 84358402 M 20.29 14.34 135.10 1297.0 \n",
"\n",
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
"0 0.11840 0.27760 0.3001 0.14710 \n",
"1 0.08474 0.07864 0.0869 0.07017 \n",
"2 0.10960 0.15990 0.1974 0.12790 \n",
"3 0.14250 0.28390 0.2414 0.10520 \n",
"4 0.10030 0.13280 0.1980 0.10430 \n",
"\n",
" ... texture_worst perimeter_worst area_worst smoothness_worst \\\n",
"0 ... 17.33 184.60 2019.0 0.1622 \n",
"1 ... 23.41 158.80 1956.0 0.1238 \n",
"2 ... 25.53 152.50 1709.0 0.1444 \n",
"3 ... 26.50 98.87 567.7 0.2098 \n",
"4 ... 16.67 152.20 1575.0 0.1374 \n",
"\n",
" compactness_worst concavity_worst concave points_worst symmetry_worst \\\n",
"0 0.6656 0.7119 0.2654 0.4601 \n",
"1 0.1866 0.2416 0.1860 0.2750 \n",
"2 0.4245 0.4504 0.2430 0.3613 \n",
"3 0.8663 0.6869 0.2575 0.6638 \n",
"4 0.2050 0.4000 0.1625 0.2364 \n",
"\n",
" fractal_dimension_worst Unnamed: 32 \n",
"0 0.11890 NaN \n",
"1 0.08902 NaN \n",
"2 0.08758 NaN \n",
"3 0.17300 NaN \n",
"4 0.07678 NaN \n",
"\n",
"[5 rows x 33 columns]"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#import dataset\n",
"df = pd.read_csv('data.csv') #file is located in source folder\n",
"df.head() #preview the data "
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id 0\n",
"diagnosis 0\n",
"radius_mean 0\n",
"texture_mean 0\n",
"perimeter_mean 0\n",
"area_mean 0\n",
"smoothness_mean 0\n",
"compactness_mean 0\n",
"concavity_mean 0\n",
"concave points_mean 0\n",
"symmetry_mean 0\n",
"fractal_dimension_mean 0\n",
"radius_se 0\n",
"texture_se 0\n",
"perimeter_se 0\n",
"area_se 0\n",
"smoothness_se 0\n",
"compactness_se 0\n",
"concavity_se 0\n",
"concave points_se 0\n",
"symmetry_se 0\n",
"fractal_dimension_se 0\n",
"radius_worst 0\n",
"texture_worst 0\n",
"perimeter_worst 0\n",
"area_worst 0\n",
"smoothness_worst 0\n",
"compactness_worst 0\n",
"concavity_worst 0\n",
"concave points_worst 0\n",
"symmetry_worst 0\n",
"fractal_dimension_worst 0\n",
"Unnamed: 32 569\n",
"dtype: int64"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Checking for columns with empty data\n",
"\n",
"df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>diagnosis</th>\n",
" <th>radius_mean</th>\n",
" <th>texture_mean</th>\n",
" <th>perimeter_mean</th>\n",
" <th>area_mean</th>\n",
" <th>smoothness_mean</th>\n",
" <th>compactness_mean</th>\n",
" <th>concavity_mean</th>\n",
" <th>concave points_mean</th>\n",
" <th>symmetry_mean</th>\n",
" <th>...</th>\n",
" <th>radius_worst</th>\n",
" <th>texture_worst</th>\n",
" <th>perimeter_worst</th>\n",
" <th>area_worst</th>\n",
" <th>smoothness_worst</th>\n",
" <th>compactness_worst</th>\n",
" <th>concavity_worst</th>\n",
" <th>concave points_worst</th>\n",
" <th>symmetry_worst</th>\n",
" <th>fractal_dimension_worst</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M</td>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>...</td>\n",
" <td>25.38</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M</td>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>...</td>\n",
" <td>24.99</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M</td>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>...</td>\n",
" <td>23.57</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M</td>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>...</td>\n",
" <td>14.91</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M</td>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>...</td>\n",
" <td>22.54</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
"0 M 17.99 10.38 122.80 1001.0 \n",
"1 M 20.57 17.77 132.90 1326.0 \n",
"2 M 19.69 21.25 130.00 1203.0 \n",
"3 M 11.42 20.38 77.58 386.1 \n",
"4 M 20.29 14.34 135.10 1297.0 \n",
"\n",
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
"0 0.11840 0.27760 0.3001 0.14710 \n",
"1 0.08474 0.07864 0.0869 0.07017 \n",
"2 0.10960 0.15990 0.1974 0.12790 \n",
"3 0.14250 0.28390 0.2414 0.10520 \n",
"4 0.10030 0.13280 0.1980 0.10430 \n",
"\n",
" symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n",
"0 0.2419 ... 25.38 17.33 184.60 \n",
"1 0.1812 ... 24.99 23.41 158.80 \n",
"2 0.2069 ... 23.57 25.53 152.50 \n",
"3 0.2597 ... 14.91 26.50 98.87 \n",
"4 0.1809 ... 22.54 16.67 152.20 \n",
"\n",
" area_worst smoothness_worst compactness_worst concavity_worst \\\n",
"0 2019.0 0.1622 0.6656 0.7119 \n",
"1 1956.0 0.1238 0.1866 0.2416 \n",
"2 1709.0 0.1444 0.4245 0.4504 \n",
"3 567.7 0.2098 0.8663 0.6869 \n",
"4 1575.0 0.1374 0.2050 0.4000 \n",
"\n",
" concave points_worst symmetry_worst fractal_dimension_worst \n",
"0 0.2654 0.4601 0.11890 \n",
"1 0.1860 0.2750 0.08902 \n",
"2 0.2430 0.3613 0.08758 \n",
"3 0.2575 0.6638 0.17300 \n",
"4 0.1625 0.2364 0.07678 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Remove last Unnamed:32 column because it is full of empty data and ID Column is also useless.\n",
"df= df.drop(['Unnamed: 32','id'],axis=1)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"B 357\n",
"M 212\n",
"Name: diagnosis, dtype: int64"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Analysing the data\n",
"#Diagnosis column is the dependent variable.\n",
"df['diagnosis'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>diagnosis</th>\n",
" <th>radius_mean</th>\n",
" <th>texture_mean</th>\n",
" <th>perimeter_mean</th>\n",
" <th>area_mean</th>\n",
" <th>smoothness_mean</th>\n",
" <th>compactness_mean</th>\n",
" <th>concavity_mean</th>\n",
" <th>concave points_mean</th>\n",
" <th>symmetry_mean</th>\n",
" <th>...</th>\n",
" <th>radius_worst</th>\n",
" <th>texture_worst</th>\n",
" <th>perimeter_worst</th>\n",
" <th>area_worst</th>\n",
" <th>smoothness_worst</th>\n",
" <th>compactness_worst</th>\n",
" <th>concavity_worst</th>\n",
" <th>concave points_worst</th>\n",
" <th>symmetry_worst</th>\n",
" <th>fractal_dimension_worst</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>...</td>\n",
" <td>25.38</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>...</td>\n",
" <td>24.99</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>...</td>\n",
" <td>23.57</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>...</td>\n",
" <td>14.91</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>...</td>\n",
" <td>22.54</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>"
],
"text/plain": [
" diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n",
"0 1 17.99 10.38 122.80 1001.0 \n",
"1 1 20.57 17.77 132.90 1326.0 \n",
"2 1 19.69 21.25 130.00 1203.0 \n",
"3 1 11.42 20.38 77.58 386.1 \n",
"4 1 20.29 14.34 135.10 1297.0 \n",
"\n",
" smoothness_mean compactness_mean concavity_mean concave points_mean \\\n",
"0 0.11840 0.27760 0.3001 0.14710 \n",
"1 0.08474 0.07864 0.0869 0.07017 \n",
"2 0.10960 0.15990 0.1974 0.12790 \n",
"3 0.14250 0.28390 0.2414 0.10520 \n",
"4 0.10030 0.13280 0.1980 0.10430 \n",
"\n",
" symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n",
"0 0.2419 ... 25.38 17.33 184.60 \n",
"1 0.1812 ... 24.99 23.41 158.80 \n",
"2 0.2069 ... 23.57 25.53 152.50 \n",
"3 0.2597 ... 14.91 26.50 98.87 \n",
"4 0.1809 ... 22.54 16.67 152.20 \n",
"\n",
" area_worst smoothness_worst compactness_worst concavity_worst \\\n",
"0 2019.0 0.1622 0.6656 0.7119 \n",
"1 1956.0 0.1238 0.1866 0.2416 \n",
"2 1709.0 0.1444 0.4245 0.4504 \n",
"3 567.7 0.2098 0.8663 0.6869 \n",
"4 1575.0 0.1374 0.2050 0.4000 \n",
"\n",
" concave points_worst symmetry_worst fractal_dimension_worst \n",
"0 0.2654 0.4601 0.11890 \n",
"1 0.1860 0.2750 0.08902 \n",
"2 0.2430 0.3613 0.08758 \n",
"3 0.2575 0.6638 0.17300 \n",
"4 0.1625 0.2364 0.07678 \n",
"\n",
"[5 rows x 31 columns]"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#The diagnosis column must be encoded from M/B (malignant and benign) to numerical values\n",
"from sklearn import preprocessing\n",
"le = preprocessing.LabelEncoder()\n",
"df['diagnosis'] = le.fit_transform(df['diagnosis'])\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x201da4cd0d0>"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1440x1440 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#Looking for features with strongest correlation to diagnosis by using a correlation matrix.\n",
"#I will use the average values for each feature instead of all values as it gives a general idea of that features' impact on\n",
"#the diagnosis\n",
"plt.figure(figsize = (20, 20))\n",
"sns.heatmap(df.iloc[:,0:11].corr(),annot=True,cmap='coolwarm')"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(569, 30)\n",
"(569,)\n"
]
}
],
"source": [
"#Since we are predicting diagnosis(y = dependent) using the independent(x) variables. The column data must be split before applying any models.\n",
"from sklearn.model_selection import train_test_split\n",
"X = df.drop(['diagnosis'], axis = 1)\n",
"Y = df['diagnosis']\n",
"print(X.shape)\n",
"print(Y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"#Now we must split the data into training and testing sets\n",
"from sklearn.model_selection import train_test_split\n",
"X_train,X_test,Y_train,Y_test = train_test_split(X,Y, test_size=0.3,random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(398, 30)\n",
"(398,)\n",
"(171, 30)\n",
"(171,)\n"
]
}
],
"source": [
"#Previewing dimensions for each group\n",
"print(X_train.shape)\n",
"print(Y_train.shape)\n",
"print(X_test.shape)\n",
"print(Y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"#Before applying the model, the data must be scaled. This allows for standardisation of all the values.\n",
"from sklearn.preprocessing import StandardScaler\n",
"sscaling = StandardScaler()\n",
"X_train = sscaling.fit_transform(X_train)\n",
"X_test= sscaling.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"# Applying the models and evaluation\n",
"\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.svm import SVC\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.metrics import roc_auc_score\n",
"from scipy.stats import loguniform\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import RepeatedStratifiedKFold\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"from sklearn.model_selection import cross_val_score\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn import metrics"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SVC Accuracy: 0.9766081871345029\n",
"ROC Score: 0.974867724867725\n",
" precision recall f1-score support\n",
"\n",
" 0 0.98 0.98 0.98 108\n",
" 1 0.97 0.97 0.97 63\n",
"\n",
" accuracy 0.98 171\n",
" macro avg 0.97 0.97 0.97 171\n",
"weighted avg 0.98 0.98 0.98 171\n",
"\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQzElEQVR4nO3dfZBddX3H8fd3NyGEJ82DeTBQg0rApFpQZKg6RY0tVAUyjrToiBHj7B9ifcIHgjhoB4S2VmHqw7hKJKCgkVITtFKYUEypCERgjJIGEkOSDUsWkyAPiXnY++0fe51ZINm9udnsb+/J+zXzm7v33Ms538yEz37zO79zTmQmkqTh11a6AEk6WBnAklSIASxJhRjAklSIASxJhYw60AfYsuIul1noBc66uLd0CRqB7rrltNjfffx09PENZ847dq3a7+PtDztgSSrkgHfAkjScYnTRpnafGMCSKqV9bHvpEhpmAEuqlLZRdsCSVIRTEJJUiB2wJBViByxJhdgBS1Ih7Ye0zuUNrVOpJDUg2qLhMei+IhZERE9E/KbftvERcXtEPFJ/Hdfvs/kRsToiVkXE6YPt3wCWVCnR3tbwaMC1wBnP23YRsDQzjwOW1t8TETOBc4FZ9f/mGxEx4KJkA1hSpbS1R8NjMJm5DNjyvM1nAwvrPy8E5vTb/oPM3JGZa4HVwCkD1rovfzBJGun2ZQoiIjoiYnm/0dHAISZnZjdA/XVSffs0YEO/73XVt+2VJ+EkVcq+nITLzE6gc4gOvaeWesA7sxnAkiol2g74P+w3RcTUzOyOiKlAT317F3BMv+8dDTw20I6cgpBUKUO5CmIvlgBz6z/PBRb3235uRIyJiGOB44B7B9qRHbCkSmnk5FqjIuJG4M3AxIjoAi4FrgQWRcQ8YD1wDkBm/jYiFgEPAbuBCzJzwCcPGMCSKmU/OtsXyMz37OWj2Xv5/uXA5Y3u3wCWVCnDMAc8ZAxgSZXSPtoAlqQihnIK4kAzgCVVilMQklSIHbAkFWIAS1IhbaN8KrIkFTGUF2IcaAawpEpxCkKSCnEVhCQVYgcsSYUYwJJUiKsgJKkQ54AlqZRwCkKSinAOWJIKcQpCkgqxA5akQlwFIUmF2AFLUinOAUtSGeEyNEkqw1UQklRIeBJOksrwJJwkFRLhFIQklWEHLElleBJOkgpxDliSCol2V0FIUhlOQUhSGV4JJwCefnYbV3zzWtas30hE8LkPf4CezVu5ZtESHt3YzTVXXMKrXjm9dJkqZNLEMVzyiRMYP240mbDk1m5+dMvG0mW1PjtgAXx1wY2ceuKf86VPfZhdu3bzx507OfLww7ji0xfwT9+6rnR5Kqy3N/nagjU8vOYZxo5tZ8FXX8t9D27l0Q3bSpfW0obyJFxEfAL4EJDACuB84DDgh8B04FHg7zJzazP7HzSAI+IE4GxgWr2Ix4AlmbmymQMeLJ7dtp0HVz7M5z/yQQBGjx7F6NGjOPLwwwpXppFi89adbN66E4Dt23t5dMM2Jk4YYwDvryG6ECMipgEfBWZm5vaIWAScC8wElmbmlRFxEXAR8NlmjjFgpRHxWeAHQAD3AvfVf76xfmDtxcZNT/Dio47ksq8v4P2f+gJf+ua1bP/jjtJlaYSaMmkMM15xBA+teqp0KS0v2tsbHg0YBYyNiFH0db6P0deQLqx/vhCY02ytg/2qmAe8PjOvzMzv1ceVwCn1z/YoIjoiYnlELF9405Jma2tpvb01Hv7dOt71N2/hui9/gbFjDuG6//jP0mVpBBp7aBuXz5/F1d9ew7btvaXLaX1t0fDon1X10fGn3WTmRuDLwHqgG/hDZt4GTM7M7vp3uoFJzZY62BREDXgpsO5526fWP9ujzOwEOgG2rLgrmy2ulU2aMI6XTBjHrBkvB+Atp57M9T82gPVc7e3BZfNncdudPSy7+/ely6mEfbkSrn9WvWA/EePo63aPBZ4EfhQR7xuKGv9ksAD+OLA0Ih4BNtS3/RnwSuAjQ1lI1UwY9yImTxjPuo2P87JpU1i+YiXTj35p6bI0wsz/6AzWbdjGDxd3lS6lOoZuGdrbgLWZ+UTfbuNm4A3ApoiYmpndETEV6Gn2AAMGcGbeGhEz6JtymEbf/G8XcF9m+m+lQXxy3nv5wtWd7Nrdy7TJE/ncBR/kznvu5yvX3MCTTz3NhVdczYzpx3DV5z9ZulQV8JqZR3HGW6eweu0zfPfq1wHwrevW8stfbSlcWYsbumVo64FTI+IwYDswG1gOPAvMBa6svy5u9gCReWBnCA7WKQgN7KyL/f2tF7rrltP2u33dfv1lDWfO2PMuGfB4EfFF4O+B3cAD9C1JOwJYRN9swHrgnMxs6rem64AlVcsQ3g84My8FLn3e5h30dcP7zQCWVC3eDU2SyvCJGJJUih2wJBViByxJhXhDdkkqxA5YkgpxDliSCrEDlqRCfCSRJBXiI4kkqZA2V0FIUhl2wJJUiHPAklSIqyAkqRA7YEkqI70UWZIKcQpCkgoxgCWpjHQOWJIKsQOWpELsgCWpDFdBSFIpTkFIUhlpAEtSIc4BS1IZdsCSVIr3A5akMrwQQ5JKcQpCkspI7IAlqQhPwklSKS0UwK1TqSQ1oNbW3vAYTES8OCJuioj/i4iVEfGXETE+Im6PiEfqr+OardUAllQtEY2PwV0N3JqZJwB/AawELgKWZuZxwNL6+6YYwJIqJaOt4TGQiDgK+CvgGoDM3JmZTwJnAwvrX1sIzGm2VgNYUqUk0fAYxMuBJ4DvRsQDEfGdiDgcmJyZ3QD110nN1moAS6qUfemAI6IjIpb3Gx39djUKeC3wzcw8CXiW/Zhu2BNXQUiqln24Ei4zO4HOvXzcBXRl5j319zfRF8CbImJqZnZHxFSgp9lS7YAlVUot2hseA8nMx4ENEXF8fdNs4CFgCTC3vm0usLjZWu2AJVXKEF+I8Q/A9yPiEOB3wPn0Na6LImIesB44p9mdG8CSKmUoL0XOzAeBk/fw0eyh2L8BLKlSvBRZkgrxdpSSVMhgJ9dGEgNYUqV4O0pJKsQ5YEkqxA5YkgqxA+7nrIt7D/Qh1ILm39ox+Jd0EFq133uwA5akQmotdIcFA1hSpaQBLEllOAUhSYUYwJJUiAEsSYUYwJJUSC09CSdJRdgBS1IhBrAkFZJpAEtSETU7YEkqw5NwklSIc8CSVIhzwJJUiB2wJBViByxJhdRKF7APDGBJleIqCEkqxCkISSrEk3CSVEgtS1fQOANYUqXYAUtSIc4BS1IhvQawJJXRSh1w6yyYk6QGZDY+GhER7RHxQET8pP5+fETcHhGP1F/HNVurASypUpJoeDToY8DKfu8vApZm5nHA0vr7phjAkiqllo2PwUTE0cA7gO/023w2sLD+80JgTrO1GsCSKqVWi4ZHRHRExPJ+o+N5u7sK+AzPvcXE5MzsBqi/Tmq2Vk/CSaqUfXkkUWZ2Ap17+iwi3gn0ZOavIuLNQ1PdcxnAkiql0ZNrDXgjcFZEvB04FDgqIr4HbIqIqZnZHRFTgZ5mD+AUhKRKyYyGx8D7yfmZeXRmTgfOBe7IzPcBS4C59a/NBRY3W6sdsKRKGYZ7QVwJLIqIecB64Jxmd2QAS6qUIZyC6LfPvBO4s/7zZmD2UOzXAJZUKV6KLEmFHIgO+EAxgCVVigEsSYXUnIKQpDLsgCWpkN4Wei69ASypUlrpfsAGsKRKcQpCkgrxqciSVIgdsCQVYgBLUiGugpCkQmoGsCSV4RSEJBViAOs5Jk0cwyWfOIHx40aTCUtu7eZHt2wsXZaGyWu+/SUmvf3N7OzZzLKTzgRg9LgXcdINX+Wwl01j27qN3P+ej7P7yacAOPLVx/Pqb3yRUUceQWaN/z313dR27Cz5R2gprbQMzUcSDYPe3uRrC9bwvg8vp+NTD/Cud7yU6cccVrosDZOuhTdz7zs/9Jxtr/hMB5vvuJs7Z57O5jvu5pWf6XsYb7S3c+LCf2HFBZey7MR38svZ76e2a3eJsltWZjY8SjOAh8HmrTt5eM0zAGzf3sujG7YxccKYwlVpuGy5azm7tvzhOdsmnzmbrut/DEDX9T9m8llvA2DiX7+Rp1es4ulfrwJg15YnW+us0gjQ29v4KM0AHmZTJo1hxiuO4KFVT5UuRQWNmTyBHY8/AcCOx59gzKTxABw+41gyk1N++h3edO/NvPzCDw20G+1BZuOjtKYDOCLOH+CzjohYHhHLH193S7OHqJyxh7Zx+fxZXP3tNWzbPgJ+/WrEaWtvZ/wbXscD7/80vzjtvUyZ8zYmvOXU0mW1lFo2Pkrbnw74i3v7IDM7M/PkzDx5ysvO3I9DVEd7e3DZ/FncdmcPy+7+felyVNiOTZsZM+UlAIyZ8hJ29GwBYPvGx9n8P/eya/NWatv/SM/PlvGik2aVLLXlVKYDjohf72WsACYPU42VMP+jM1i3YRs/XNxVuhSNAJt+cgdHnzcHgKPPm8OmW5YC8MRtd3HUq4+nbeyhRHs7E/7q9TyzcnXJUltO1rLhUdpgy9AmA6cDW5+3PYBfHJCKKug1M4/ijLdOYfXaZ/ju1a8D4FvXreWXv9pSuDINhxOv/1cmnHYKh0wcx1vX/pxH/vHfWPPPnbz2xqs45vx3s31DN/ef+zEAdj/5FGuvupY33X0TZNJz6zJ6fvbzwn+C1lKlS5F/AhyRmQ8+/4OIuPOAVFRBv37oKd50pv8THawePO/CPW6/5/QP7HH7xhuWsPGGJQewomqrjYDOtlEDBnBmzhvgs/cOfTmStH9Gwtxuo7wSTlKlGMCSVEithRLYAJZUKVmhk3CS1FJ6e+2AJamIkXCTnUYZwJIqpYVWoRnAkqplJFzh1igDWFKltNAMhLejlFQttVo2PAYSEcdExH9HxMqI+G1EfKy+fXxE3B4Rj9RfxzVbqwEsqVJqvdnwGMRu4MLMfBVwKnBBRMwELgKWZuZxwNL6+6YYwJIqpZbZ8BhIZnZn5v31n58GVgLTgLOBhfWvLQTmNFurASypUvblmXD9Hx5RHx172mdETAdOAu4BJmdmd/1Y3cCkZmv1JJykStmXu6FlZifQOdB3IuII4N+Bj2fmUxGxfwX2YwcsqVKG8okYETGavvD9fmbeXN+8KSKm1j+fCvQ0W6sBLKlSentrDY+BRF+rew2wMjO/0u+jJcDc+s9zgcXN1uoUhKRKGcILMd4InAesiIg/PZTiYuBKYFFEzAPWA+c0ewADWFKlDFUAZ+Zd9D1+bU9mD8UxDGBJldJCVyIbwJKqxXtBSFIh3o5SkgoZbHXDSGIAS6oUpyAkqRADWJIK8anIklSIHbAkFeIqCEkqpHe3qyAkqQg7YEkqJGt2wJJUxL7ckL00A1hSpTgFIUmF1DwJJ0ll1NIAlqQivBBDkgoxgCWpEE/CSVIhNdcBS1IZtd7e0iU0zACWVCnOAUtSIQawJBXiOmBJKsQOWJIK8W5oklSIqyAkqRBvRylJhTgFIUmFeBJOkgpJl6FJUhm13a1zEi5a6c5BrS4iOjKzs3QdGln8e3HwaitdwEGmo3QBGpH8e3GQMoAlqRADWJIKMYCHl/N82hP/XhykPAknSYXYAUtSIQawJBViAA+TiDgjIlZFxOqIuKh0PSovIhZERE9E/KZ0LSrDAB4GEdEOfB34W2Am8J6ImFm2Ko0A1wJnlC5C5RjAw+MUYHVm/i4zdwI/AM4uXJMKy8xlwJbSdagcA3h4TAM29HvfVd8m6SBmAA+P2MM21/9JBzkDeHh0Acf0e3808FihWiSNEAbw8LgPOC4ijo2IQ4BzgSWFa5JUmAE8DDJzN/AR4L+AlcCizPxt2apUWkTcCNwNHB8RXRExr3RNGl5eiixJhdgBS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1Ih/w8+votFqJpcvAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#SVC default accuracy\n",
"\n",
"svc = SVC()\n",
"svc.fit(X_train,Y_train)\n",
"Y_prediction_svc = svc.predict(X_test)\n",
"\n",
"print('SVC Accuracy: {}'.format(accuracy_score(Y_test, Y_prediction_svc)))\n",
"print('ROC Score: {}'.format(roc_auc_score(Y_test, Y_prediction_svc)))\n",
"\n",
"#SVC Confusion Matrix\n",
"svccfm = confusion_matrix(Y_test,Y_prediction_svc,labels=[1,0])\n",
"sns.heatmap(svccfm, annot=True,fmt='g', cmap='coolwarm')\n",
"\n",
"#Printing classification report\n",
"print(classification_report(Y_test, Y_prediction_svc))"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hyperparameters to use:\n",
" {'C': 0.1, 'kernel': 'linear'}\n",
"Best Score: \n",
" 0.9748417721518987\n",
"Tuned SVC Accuracy: 0.9824561403508771\n",
"ROC Score: 0.9794973544973545\n",
" precision recall f1-score support\n",
"\n",
" 0 0.98 0.99 0.99 108\n",
" 1 0.98 0.97 0.98 63\n",
"\n",
" accuracy 0.98 171\n",
" macro avg 0.98 0.98 0.98 171\n",
"weighted avg 0.98 0.98 0.98 171\n",
"\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQMUlEQVR4nO3dfZBddX3H8fd3Nw88KJJEEpYEJWpA8KGgAfGhisYZqIgJ00ahlUYaZ/8BRQcKwdaidUSGqsWxZaYroBlEIFKUAB3bTChmGBGIiEWIEAQSNlkSJoSHQgjJ3m//yMVucLN7c3Ozv71n36+Z39y959w958tM5rNffud3zo3MRJI08jpKFyBJY5UBLEmFGMCSVIgBLEmFGMCSVMi4vX2CTb/9hcss9EdO/XKtdAkahVb85AOxp8e4dfwRDWfOydse2uPz7Qk7YEkqZK93wJI0kmJ80aZ2txjAkiqlc9/O0iU0zACWVCkd4+yAJakIpyAkqRA7YEkqxA5YkgqxA5akQjontM/tDQawpEqJDjtgSSoiOtunA26fSiWpAR2d0fAYTkRcFREbI+K3A7ZNjohlEbG6/jppwL4LI+KRiHgoIk4cttam/yslaRSKjmh4NOAHwEmv2rYIWJ6Zs4Dl9fdExFHAacDb6r9zeUQMeVueASypUjondDQ8hpOZK4CnX7V5LrC4/vNiYN6A7ddl5tbMfAx4BDhuqOMbwJIqJTo6Gh8R3RGxcsDobuAU0zKzD6D+OrW+fTrwxIDP9da37ZIX4SRVyu6sgsjMHqCnVace7BRD/YIBLKlSGrm4toc2RERXZvZFRBewsb69Fzh0wOdmAOuHOpBTEJIqpcUX4QazFFhQ/3kBcNOA7adFxMSImAnMAu4e6kB2wJIqJTpa11dGxLXACcDrI6IXuAi4BFgSEQuBtcB8gMx8ICKWAA8C24GzMrN/qOMbwJIqpXN86wI4M0/fxa45u/j814GvN3p8A1hSpXgrsiQV0sopiL3NAJZUKXbAklSIASxJhXSM81uRJamIEbgRo2UMYEmV4hSEJBXiKghJKsQOWJIKMYAlqRBXQUhSIc4BS1Ip4RSEJBXhHLAkFeIUhCQVYgcsSYW4CkKSCrEDlqRSnAOWpDLCZWiSVIarICSpkPAinCSV4UU4SSokwikISSrDDliSyvAinCQV4hywJBUSna6CkKQynIKQpDK8E04APP/Ci3zj8u/z6NpeIoIvnfU3PLVpM1de/1MeX9fHFZd8mSPfMrN0mSpk6pQJfOmcw5kyaQK1WnLzsg3ccMv60mW1PztgAVx21TUcf8zbufhvz2Lbtu289PLLvHb//bj4/LO59N8Wly5PhfXXkst/8BgPP/oC++7TyRXfOpp77tvMmt4tpUtra5W6CBcRbwXmAtOBBNYDSzNz1V6ura298OIW7nvwYf7+7M8CMH78OMaPH8dr99+vcGUaLTZt3samzdsA2PJSP2t6X+SgKRMN4D3VwhsxIuKLwGfZkX33A2cC+wHXA4cBjwOfzMzNzRx/yEoj4gLgOiCAu4F76j9fGxGLmjnhWLFuw1MceMBr+fq/XMmC8y7iG5dfxZaXtpYuS6PUwQdNZNbM/Xnw4edLl9L2orOz4THkcSKmA58HZmfm24FO4DRgEbA8M2cBy+vvmzLcn4qFwLGZeUlm/rA+LgGOq+/bVeHdEbEyIlYu/vFNzdbW1vr7+3n40TWceuKHWfzNr7LPxIlc/ZNbS5elUWjffTr42gVH8t2rHuPFLf2ly2l/HdH4GN44YN+IGMeOznc9O2YEXplDXAzMa7rUYfbXgEMG2d5V3zeozOzJzNmZOXvB/LnN1tbWpk6ZzEFTJvG2w98MwIffeywPPbqmcFUabTo7g6+dfyTLVmxkxS83lS6nEqKjo/ExoFmsj+5XjpOZ64BvAmuBPuDZzPwvYFpm9tU/0wdMbbbW4eaAvwAsj4jVwBP1bW8A3gKc3exJx4Ipk17HtNdPZs26Pt44vYuV9z/IzBmD/S3TWHbBWbNY0/siS5a6+qFldmMZWmb2AD2DHyYmsaPbnQk8A/w4Ij7dihJfMWQAZ+bPIuJwdkw5TGfH/G8vcE9m+v9Kw/jiwk/z1e/0sG3bdg6ZdhB/d/ZCfn7Xr/j2FdfwzHPPc97FlzHrsEO57B/OK12qCnjHkQdw0oen8vvHX+DKbx8NwPd+uIZf3tvU9Ry9onXL0D4KPJaZTwFExI3A+4ANEdGVmX0R0QVsbPYEw66CyMwa8MtmTzCWHT7zDVx16UU7bfvQe97Nh97z7kIVaTS5f9VzfPDUO0qXUTktvBV5LXB8ROwHbAHmACuBF4AFwCX116YvdLkOWFK1tGgZWmbeFRE3APcC24Ffs2O64jXAkohYyI6Qnt/sOQxgSdXSwhsxMvMi4KJXbd7Kjm54jxnAkirFb8SQpFKqdCuyJLUVO2BJKsQHsktSIXbAklSIc8CSVIgdsCQV4lcSSVIhfiWRJBXS4SoISSrDDliSCnEOWJIKcRWEJBViByxJZaS3IktSIU5BSFIhBrAklZHOAUtSIXbAklSIHbAkleEqCEkqxSkISSojDWBJKsQ5YEkqww5YkkrxecCSVIY3YkhSKU5BSFIZiR2wJBXhRThJKsUAlqQyam20CqJ9/lRIUiMiGh/DHioOjIgbIuJ3EbEqIt4bEZMjYllErK6/Tmq2VANYUqVkdDQ8GvAd4GeZ+VbgT4BVwCJgeWbOApbX3zfFAJZUKUk0PIYSEQcAHwSuBMjMlzPzGWAusLj+scXAvGZrNYAlVcrudMAR0R0RKweM7gGHehPwFPD9iPh1RFwREfsD0zKzD6D+OrXZWr0IJ6laduNOuMzsAXp2sXsc8C7gc5l5V0R8hz2YbhiMHbCkSqlFZ8NjGL1Ab2beVX9/AzsCeUNEdAHUXzc2W6sBLKlSWnURLjOfBJ6IiCPqm+YADwJLgQX1bQuAm5qt1SkISZXS4luRPwdcExETgEeBM9nRuC6JiIXAWmB+swc3gCVVSitvRc7M+4DZg+ya04rjG8CSKsXHUUpSIQ1cXBs1DGBJleLjKCWpEB9HKUmF2AFLUiF2wAPMvXDb3j6F2tCFP+se/kMagx7a4yPYAUtSIbU2usHXAJZUKWkAS1IZTkFIUiEGsCQVYgBLUiEGsCQVUksvwklSEXbAklSIASxJhWQawJJURM0OWJLK8CKcJBXiHLAkFeIcsCQVYgcsSYXYAUtSIbXSBewGA1hSpbgKQpIKcQpCkgrxIpwkFVLL0hU0zgCWVCl2wJJUiHPAklRIvwEsSWW0UwfcPgvmJKkBmY2PRkREZ0T8OiJuqb+fHBHLImJ1/XVSs7UawJIqJYmGR4POAVYNeL8IWJ6Zs4Dl9fdNMYAlVUotGx/DiYgZwMnAFQM2zwUW139eDMxrtlYDWFKl1GrR8IiI7ohYOWB0v+pwlwHns/MjJqZlZh9A/XVqs7V6EU5SpezOVxJlZg/QM9i+iPg4sDEzfxURJ7Smup0ZwJIqpdGLaw14P/CJiPgYsA9wQET8ENgQEV2Z2RcRXcDGZk/gFISkSsmMhsfQx8kLM3NGZh4GnAbclpmfBpYCC+ofWwDc1GytdsCSKmUEngVxCbAkIhYCa4H5zR7IAJZUKS2cghhwzLwduL3+8yZgTiuOawBLqhRvRZakQvZGB7y3GMCSKsUAlqRCak5BSFIZdsCSVEh/G30vvQEsqVLa6XnABrCkSnEKQpIK8VuRJakQO2BJKsQAlqRCXAUhSYXUDGBJKsMpCEkqxADWTi78/OG879gpbH52G3999srS5WiEvfN7FzP1Yyfw8sZNrDjmFADGT3odx/zon9nvjdN5cc067j39C2x/5jkOOf0U3nTuwj/87gHvOII7jjuV537zu1Llt512WobmVxKNgP9YvoFzv3J/6TJUSO/iG7n745/dadubz+9m0213cvtRJ7Lptjt5y/k7vox3/bU3c8fsedwxex6/+cz5bHl8neG7mzKz4VGaATwCfvPAszz3/LbSZaiQp+9Yybann91p27RT5tB79U8B6L36p0z7xEf/6PcO+dTJrL/+lhGpsUr6+xsfpRnAUgETp01h65NPAbD1yaeYOHXyH32ma/7HWHf9rSNdWtvLbHyU1nQAR8SZQ+zrjoiVEbHyyTU3N3sKacw68Lh30r9lC//7wOrSpbSdWjY+StuTDviru9qRmT2ZOTszZx/8xlP24BRSNW3dsImJBx8EwMSDD2Lrxqd32t/1yZNZf53dbzPaqQMechVERPzPrnYB01pfjjQ2bLjlNmacMY/f/9P3mHHGPDbcvPz/d0bQ9ecncedH/qpcgW0sd6u1LfvoyuGWoU0DTgQ2v2p7AL/YKxVV0FfOO5Kj3/E6DjxgPDd+/3iu/NHj3LrsydJlaYQcffW3mPKh45jw+kl85LGfs/ofv8vvL+3hXddexqFn/gVbnujj3tPO+cPnJ//psby07km2PNZbsOr2VaVbkW8BXpOZ9716R0TcvlcqqqCvfHNV6RJU0H1nnDvo9rtO/Myg259ecTe/+MCn9mJF1VYbDZO7DRoygDNz4RD7/rL15UjSnhkNc7uN8k44SZViAEtSIbU2SmADWFKlZIUuwklSW+nvtwOWpCJGw0N2GmUAS6qUNlqFZgBLqpbduxOuLJ+GJqlSWvUsiIg4NCL+OyJWRcQDEXFOffvkiFgWEavrr5OardUAllQptVo2PIaxHTg3M48EjgfOioijgEXA8sycBSyvv2+KUxCSKqXWolUQmdkH9NV/fj4iVgHTgbnACfWPLQZuBy5o5hx2wJIqpZbZ8Bj47PL66B7smBFxGHAMcBcwrR7Or4T01GZrtQOWVCm7swwtM3uAnqE+ExGvAf4d+EJmPhfRukdYGsCSKqWVT0OLiPHsCN9rMvPG+uYNEdGVmX0R0QVsbPb4TkFIqpQWroII4EpgVWZ+e8CupcCC+s8LgJuardUOWFKl9LfuiezvB84A7o+IV56J/iXgEmBJRCwE1gLzmz2BASypUlp1I0Zm3sGuv7NoTivOYQBLqpR2uhPOAJZUKW2UvwawpGqxA5akQnwcpSQV0sJVEHudASypUpyCkKRCDGBJKsRvRZakQuyAJakQV0FIUiH9210FIUlF2AFLUiFZswOWpCJa+UD2vc0AllQpTkFIUiE1L8JJUhm1NIAlqQhvxJCkQgxgSSrEi3CSVEjNdcCSVEatv790CQ0zgCVVinPAklSIASxJhbgOWJIKsQOWpEJ8GpokFeIqCEkqxMdRSlIhTkFIUiFehJOkQtJlaJJURm17+1yEi3Z6clC7i4juzOwpXYdGF/9djF0dpQsYY7pLF6BRyX8XY5QBLEmFGMCSVIgBPLKc59Ng/HcxRnkRTpIKsQOWpEIMYEkqxAAeIRFxUkQ8FBGPRMSi0vWovIi4KiI2RsRvS9eiMgzgERARncC/An8GHAWcHhFHla1Ko8APgJNKF6FyDOCRcRzwSGY+mpkvA9cBcwvXpMIycwXwdOk6VI4BPDKmA08MeN9b3yZpDDOAR0YMss31f9IYZwCPjF7g0AHvZwDrC9UiaZQwgEfGPcCsiJgZEROA04ClhWuSVJgBPAIycztwNvCfwCpgSWY+ULYqlRYR1wJ3AkdERG9ELCxdk0aWtyJLUiF2wJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUyP8BvPPcGKDxLH8AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#SVC with hyperparameters\n",
"\n",
"svc2 = SVC()\n",
"parameters = {'kernel': ['poly', 'sigmoid','linear','rbf'],'C': [50, 10, 1.0, 0.1, 0.01] }\n",
"svc2_grid = GridSearchCV(svc2, param_grid=parameters)\n",
"svc2_grid.fit(X_train,Y_train)\n",
"print(\"Hyperparameters to use:\\n\",svc2_grid.best_params_)\n",
"print(\"Best Score: \\n\",svc2_grid.best_score_)\n",
"svc2_pred = svc2_grid.predict(X_test)\n",
"\n",
"\n",
"print('Tuned SVC Accuracy: {}'.format(accuracy_score(Y_test, svc2_pred)))\n",
"print('ROC Score: {}'.format(roc_auc_score(Y_test, svc2_pred)))\n",
"\n",
"#Tuned SVC Confusion Matrix\n",
"svc2cfm = confusion_matrix(Y_test,svc2_pred,labels=[1,0])\n",
"sns.heatmap(svc2cfm, annot=True,fmt='g', cmap='coolwarm')\n",
"\n",
"#Printing new classification report\n",
"print(classification_report(Y_test, svc2_pred))"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Logistic Regression Accuracy: 0.9824561403508771\n",
"ROC Score: 0.9828042328042329\n",
" precision recall f1-score support\n",
"\n",
" 0 0.99 0.98 0.99 108\n",
" 1 0.97 0.98 0.98 63\n",
"\n",
" accuracy 0.98 171\n",
" macro avg 0.98 0.98 0.98 171\n",
"weighted avg 0.98 0.98 0.98 171\n",
"\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQoklEQVR4nO3df7DVdZ3H8ef7Xn742xAFb2BJG5qYm7Zmlk1a1GhlSru5Q21IDu2dbcxsx1awyX67625t2TZbLWV507QYcwK1LAYjljKN1BWNCBPFq8h1UdSUFO557x+cdq4Il8PhcD/3fHk+Zr5zzvl+D9/v+zJ3Xrz5fD/f7zcyE0nS0OsoXYAk7akMYEkqxACWpEIMYEkqxACWpEJG7O4DbLhzsdMs9AKnXxylS9AwtPT6k3f5F+PGkUc2nDnv2LSy6C+iHbAkFbLbO2BJGkoxsn3+d2UAS6qUzr07S5fQMANYUqV0jLADlqQiHIKQpELsgCWpEDtgSSrEDliSCukc1T6XN7RPpZLUgOiIhpcd7iviWxHRFxF3D1h3UEQsjIhV9dcxA7ZdFBH3RsTKiDh1R/s3gCVVSnR2NLw04ArgtK3WzQEWZeZkYFH9MxExBZgOHF3/M1+NiEEnJRvAkiqlozMaXnYkM5cAj221+kygp/6+B5g2YP33MvPZzFwN3AucMGitO/ODSdJwtzNDEBHRHRHLBizdDRxifGauBai/jquvnwA8OOB7vfV12+VJOEmVsjMn4TJzLjC3RYfeVks96J3ZDGBJlRIdu/0/9usioisz10ZEF9BXX98LHDbgexOBhwfbkUMQkiqllbMgtmMBMLP+fiYwf8D66RExOiImAZOB2wbbkR2wpEpp5ORaoyLiGuAU4OCI6AU+CVwKzIuIWcAa4CyAzLwnIuYBvwU2A+dmZv9g+zeAJVXKLnS2L5CZ79nOpqnb+f4lwCWN7t8AllQpQzAG3DIGsKRK6RxpAEtSEa0cgtjdDGBJleIQhCQVYgcsSYUYwJJUSMcIn4osSUW08kKM3c0AllQpDkFIUiHOgpCkQuyAJakQA1iSCnEWhCQV4hiwJJUSDkFIUhGOAUtSIQ5BSFIhdsCSVIizICSpEDtgSSrFMWBJKiOchiZJZTgLQpIKCU/CSVIZnoSTpEIiHIKQpDLsgCWpDE/CSVIhjgFLUiHR6SwISSrDIQhJKsMr4QTAU08/wyX/dSX3PfgQQfDxD57Nz267g6W/uYuRI0YwYfwhXPzBmey/7z6lS1UBF334CF7/mrE8/sQmzv7QstLlVEcLO+CI+EfgA0ACy4FzgH2A7wOHA/cDf5uZjzez//bp1dvQF6/4Pq971dHM+9JnuOrzF3P4hC5OOGYKV3/hk3z385/gJV3j6Pnhj0uXqUJ+tGgdF3xqeekyKic6ouFl0P1ETAA+DByfma8EOoHpwBxgUWZOBhbVPzdlhwEcEa+IiNkR8R8R8eX6+6OaPeCe4o/PbOSOFas4480nATByxAj233cfTnzVFEbUTxK8cvLL6Fu/oWSZKuh/7nmCJ5/aVLqM6omOxpcdGwHsHREj2NL5PgycCfTUt/cA05otddAKImI28D0ggNuAX9ffXxMRTaf+nuDhvv9lzAH789mv9TBj9ue45OvfYeOfnn3ed67/2S943XFHF6pQqqbo7Gx8ieiOiGUDlu4/7yczHwK+AKwB1gJPZOZPgfGZubb+nbXAuGZr3dE/AbOA12TmpZl5VX25FDihvm3bfwEDfqgrfnB9s7W1tf7+flauXsNfv/VkrvzXj7PXXqPpmX/T/2//9nU/orOzk9Pe8NqCVUoV1BENL5k5NzOPH7DM/fNuImIMW7rdScCLgX0j4n0tLXUH22v1A2+tq75tmwb+UO//m3fuSn1ta9zYMYwbO4ZXTp4EwJtf+2pWrl4DwI0/v4Wlt9/FZ86b1VZnbKV2EB0dDS878BZgdWY+mpmbgOuA1wPrIqILoP7a12ytO5oF8RFgUUSsAh6sr3sJ8HLgQ80edE8w9kUHMm7sGB54+BFe+uJDWXb375g0sYtb7ryb78z/CV//1AXsNXpU6TKl6mldU7MGODEi9gE2AlOBZcDTwEzg0vrr/GYPMGgAZ+ZNEXEEW4YcJrBl/LcX+HVm9jd70D3FR8+Zzie+cjmbN/fz4nEHc/EHZ3LOx/6F5zZv5rzPXQZsORE35+//rnClKuFTHz2KY485kBcdMJLrvn0il199PzcufKR0We2vRdPQMvPWiLgWuB3YDNwBzAX2A+ZFxCy2hPRZzR4jMrMVtW7XhjsX794DqC2dfrFDL3qhpdefvMu/GBuv/FzDmbP3jI8X/UX0QgxJ1eL9gCWpEO+GJkll+EQMSSrFDliSCrEDlqRCvCG7JBViByxJhTgGLEmF2AFLUiFtdIMrA1hStfhQTkkqpMNZEJJUhh2wJBXiGLAkFeIsCEkqxA5YkspIL0WWpEIcgpCkQgxgSSojHQOWpELsgCWpEDtgSSrDWRCSVIpDEJJURhrAklSIY8CSVIYdsCSV4v2AJakML8SQpFIcgpCkMhI7YEkqop1OwrVPpZLUiOhofNnRriJeFBHXRsTvImJFRLwuIg6KiIURsar+OqbZUg1gSZVS6+hseGnAl4GbMvMVwKuAFcAcYFFmTgYW1T83xQCWVC0RjS+D7iYOAN4IXA6Qmc9l5gbgTKCn/rUeYFqzpRrAkiolo6PhZQdeBjwKfDsi7oiIb0bEvsD4zFwLUH8d12ytBrCkSkmi4SUiuiNi2YCle8CuRgCvBr6WmccBT7MLww3b4iwISZWyM7MgMnMuMHc7m3uB3sy8tf75WrYE8LqI6MrMtRHRBfQ1W6sdsKRqadEYcGY+AjwYEUfWV00FfgssAGbW180E5jdbqh2wpEqpRUvvBXEe8N2IGAXcB5zDlsZ1XkTMAtYAZzW7cwNYUqW08kKMzLwTOH4bm6a2Yv8GsKRK8VJkSSqknS5FNoAlVYq3o5SkQlp8Em63MoAlVYpjwJJUiGPAklSIHbAkFWIHPMAZnzbj9UIX3TSrdAkallbu8h7sgCWpkFob3eLGAJZUKWkAS1IZDkFIUiEGsCQVYgBLUiEGsCQVUktPwklSEXbAklSIASxJhWQawJJURM0OWJLK8CScJBXiGLAkFeIYsCQVYgcsSYXYAUtSIbXSBewEA1hSpTgLQpIKcQhCkgrxJJwkFVLL0hU0zgCWVCl2wJJUiGPAklRIfxsFcPvM15CkBmRGw0sjIqIzIu6IiBvqnw+KiIURsar+OqbZWg1gSZWS2fjSoPOBFQM+zwEWZeZkYFH9c1MMYEmVkkTDy45ExETgHcA3B6w+E+ipv+8BpjVbqwEsqVJq2fgSEd0RsWzA0r3V7i4DLuT5VziPz8y1APXXcc3W6kk4SZVSqzV+Ei4z5wJzt7UtIk4H+jLzNxFxSmuqez4DWFKltPCRRCcBZ0TE24G9gAMi4ipgXUR0ZebaiOgC+po9gEMQkiqlVSfhMvOizJyYmYcD04GbM/N9wAJgZv1rM4H5zdZqByypUobgQoxLgXkRMQtYA5zV7I4MYEmVsjvuBZGZi4HF9ffrgamt2K8BLKlSdmJ+b3EGsKRKaadLkQ1gSZViByxJhRjAklRIzSEISSrDDliSCulvo+fSG8CSKsUnYkhSIQ5BSFIhPhVZkgqxA5akQgxgSSrEWRCSVEjNAJakMhyCkKRCDGA9z7ixo/jY+UcwdswoarXk+oXruPaGh0uXpSHyl9/4Z8a9/RSe61vPkuPeCcDIMQdy3NVfYp+XTuCZBx7i9vd8hM0bngRg/2OO5JivfpoR++9HZo1fnPhuas8+V/JHaCvtNA3NZ8INgf5a8tUrVjPjvNv5h9l38a63dfHSiXuXLktDpLfnOm47/QPPW/cXF3az/uZbWDzlVNbffAsvv3DL09Cjs5Njez7P8nM/yZJjT+dXU8+mtmlzibLbVmY2vJRmAA+B9Y9v4vf3PQ3Axj/180DvMxwydnThqjRUHlu6jE2PPfG8dePfOZXeK38IQO+VP2T8GW8B4OC3nsRTy1fy1F0rAdj02Ib2Oqs0DPT3N76UZgAPsUMPGc3kSfvy298/VboUFTR6/FiefeRRAJ595FFGjzsIgH2PmERmcsKN3+QNt13Hyy74wGC70Ta06qnIQ6HpAI6IcwbZ1h0RyyJi2dr7FzR7iMrZe68OPjv7KL7yrdU8s3EY/POrYaejs5ODXv9X3HH2P/HLk9/LodPewtg3nVi6rLZSy8aX0nalA/709jZk5tzMPD4zj+86/IxdOER1dHYGn73wKBYu6WPJr9aXLkeFPbtuPaMPPQSA0YcewrN9jwGw8aFHWP/ft7Fp/ePUNv6Jvh8v4cDjji5ZatupTAccEXdtZ1kOjB+iGith9rmTeaD3GeYtcPaDYN0NNzNxxjQAJs6YxrrrFwHw6E+XcsAxR9Kx915EZydj3/ga/rji3pKltp2sZcNLaTuahjYeOBV4fKv1Afxyt1RUQcccdQCnvWkcf7j/aS7/4rEAfOOqB/jV7Vv/taqKjr3y3xl78gmMOngMb179c1Z95iv84d/m8uprLuOwc97NxgfXcvv08wHYvOFJVl92BW+45VrIpO+mJfT9+OeFf4L2UqVLkW8A9svMO7feEBGLd0tFFbR8xZO88V1LS5ehQu6cccE219966vu3uf6hqxfw0NWeO2lWbRh0to0aNIAzc9Yg297b+nIkadcMh7HdRnklnKRKMYAlqZBaGyWwASypUrJCJ+Ekqa3099sBS1IRw+EmO40ygCVVShvNQvNmPJKqpVVXwkXEYRHxs4hYERH3RMT59fUHRcTCiFhVfx3TbK0GsKRKaeG9IDYDF2TmUcCJwLkRMQWYAyzKzMnAovrnpjgEIalSWnUlXGauBdbW3z8VESuACcCZwCn1r/UAi4HZzRzDAJZUKbWdmAUREd1A94BVczNz7ja+dzhwHHArML4ezmTm2ogY12ytBrCkStmZCzHqYfuCwB0oIvYDfgB8JDOfjIhdK3AAA1hSpbRyGlpEjGRL+H43M6+rr14XEV317rcL6Gt2/56Ek1QptVo2vAwmtrS6lwMrMvOLAzYtAGbW388E5jdbqx2wpEppYQN8EjADWB4Rf74l78eAS4F5ETELWAOc1ewBDGBJldLfojuyZ+ZStjx8YlumtuIYBrCkShkOjxpqlAEsqVIMYEkqpI3y1wCWVC12wJJUiLejlKRCWjULYigYwJIqxSEISSrEAJakQnwqsiQVYgcsSYU4C0KSCunf7CwISSrCDliSCsmaHbAkFdGqh3IOBQNYUqU4BCFJhdQ8CSdJZdTSAJakIrwQQ5IKMYAlqRBPwklSITXnAUtSGbX+/tIlNMwAllQpjgFLUiEGsCQV4jxgSSrEDliSCvFuaJJUiLMgJKkQb0cpSYU4BCFJhXgSTpIKSaehSVIZtc3tcxIu2unOQe0uIrozc27pOjS8+Hux5+ooXcAeprt0ARqW/L3YQxnAklSIASxJhRjAQ8txPm2Lvxd7KE/CSVIhdsCSVIgBLEmFGMBDJCJOi4iVEXFvRMwpXY/Ki4hvRURfRNxduhaVYQAPgYjoBP4TeBswBXhPREwpW5WGgSuA00oXoXIM4KFxAnBvZt6Xmc8B3wPOLFyTCsvMJcBjpetQOQbw0JgAPDjgc299naQ9mAE8NGIb65z/J+3hDOCh0QscNuDzRODhQrVIGiYM4KHxa2ByREyKiFHAdGBB4ZokFWYAD4HM3Ax8CPgJsAKYl5n3lK1KpUXENcAtwJER0RsRs0rXpKHlpciSVIgdsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQV8n/T9gWFXvc9CQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#Logistic Regression\n",
"\n",
"lr = LogisticRegression()\n",
"lr.fit(X_train,Y_train)\n",
"Y_prediction_lr= lr.predict(X_test)\n",
"\n",
"print('Logistic Regression Accuracy: {}'.format(accuracy_score(Y_test, Y_prediction_lr)))\n",
"print('ROC Score: {}'.format(roc_auc_score(Y_test, Y_prediction_lr)))\n",
"\n",
"#LR Confusion Matrix\n",
"lrcfm = confusion_matrix(Y_test,Y_prediction_lr,labels=[1,0])\n",
"sns.heatmap(lrcfm, annot=True,fmt='g', cmap='coolwarm')\n",
"\n",
"#Printing classification report\n",
"print(classification_report(Y_test, Y_prediction_lr))"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hyperparameters to use: {'C': 0.39989280818902445, 'penalty': 'l2', 'solver': 'liblinear'}\n",
"Best Score: 0.9751075949367088\n",
"Tuned LR Accuracy: 0.9941520467836257\n",
"ROC Score: 0.9920634920634921\n"
]
}
],
"source": [
"#Logistic Regression + Random search\n",
"\n",
"lr2 = LogisticRegression()\n",
"cv = RepeatedStratifiedKFold(n_splits=5)\n",
"lrhp = {'penalty': ['none', 'l1', 'l2', 'elasticnet'],'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 'C': loguniform(1e-5, 100)}\n",
"randomsearch = RandomizedSearchCV(lr2, lrhp, n_iter=500, cv=cv, n_jobs=-1, scoring='accuracy')\n",
"lr2_random = randomsearch.fit(X_train, Y_train)\n",
"print('Hyperparameters to use: %s' % lr2_random.best_params_)\n",
"print('Best Score: %s' % lr2_random.best_score_)\n",
"\n",
"#Tuned prediction variable\n",
"lr2_pred = lr2_random.predict(X_test)\n",
"print('Tuned LR Accuracy: {}'.format(accuracy_score(Y_test, lr2_pred)))\n",
"print('ROC Score: {}'.format(roc_auc_score(Y_test, lr2_pred)))\n"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tuned Logistic Regression Accuracy: 0.9941520467836257\n",
"ROC Score: 0.9920634920634921\n",
" precision recall f1-score support\n",
"\n",
" 0 0.99 1.00 1.00 108\n",
" 1 1.00 0.98 0.99 63\n",
"\n",
" accuracy 0.99 171\n",
" macro avg 1.00 0.99 0.99 171\n",
"weighted avg 0.99 0.99 0.99 171\n",
"\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAARPElEQVR4nO3de5RdVX3A8e9vZpIQpEASkxASnjWgoNJoBHxRNHRBfZAsF6xCC40QnVULNlQtRq1FWh/Utqht1a5RgSgWFo0IESyKAQpYXuGhPCIGYZEMhCQSFOWRZOb++kcudIiTyc3Nndm5J9/PWnvde/Y5c86PlVm/+bHP3udEZiJJGnkdpQOQpJ2VCViSCjEBS1IhJmBJKsQELEmFdA33Bdb99CanWeh3zDknSoegHdCN333Ldv9iXD3q4IZzzjs3Plj0F9EKWJIKGfYKWJJGUoxqn/+7MgFLqpTOsZ2lQ2iYCVhSpXR0WQFLUhEOQUhSIVbAklSIFbAkFWIFLEmFdI5un+UNJmBJlRIdVsCSVER0WgFLUhEdne1TAbfPnwpJakB0RMNtq+eKuCAi1kTEfQP6xkfEtRGxvP45bsC+j0XEQxHxYEQcu7Xzm4AlVUrn6I6GWwMuAo7brG8BsCQzpwNL6ttExCHAScCh9Z/5SkQMuS7aBCypUqKjo+G2NZl5I7Bus+7ZwML694XAnAH9l2bm+sx8BHgIOHyo85uAJVXKtgxBRER3RCwd0LobuMTkzFwFUP+cVO+fCqwccFxvvW+LvAknqVK25SZcZvYAPS269GAXHvLh8CZgSZUyAvOAV0fElMxcFRFTgDX1/l5gnwHHTQMeH+pEDkFIqpRWjgFvwWJgbv37XODKAf0nRcSYiDgAmA7cPtSJrIAlVUrnqNbVlRFxCXA08PKI6AXOAc4DLouIecAK4ESAzLw/Ii4DHgD6gDMys3+o85uAJVVKK4cgMvPkLeyatYXjPwN8ptHzm4AlVcp2DC2MOBOwpErxYTySVIgJWJIK6ejyrciSVEQ7PQ3NBCypUhyCkKRCnAUhSYVYAUtSISZgSSrEWRCSVIhjwJJUSjgEIUlFOAYsSYU4BCFJhVgBS1IhzoKQpEKsgCWpFMeAJamMcBqaJJXhLAhJKiS8CSdJZXgTTpIKiXAIQpLKsAKWpDK8CSdJhTgGLEmFRKezICSpDIcgJKkMV8IJgN888yyf++pCfrHyMSLgEx84jRtuu4ub7/wJo7o6mTp5En97xmn83st2LR2qCvjomdN508xxPPXrjbx3/t2lw6kOK2ABfOHCSzhyxqF89iMfYOPGPp7fsIHDnz+ED/zZe+jq7OTLFy/im9/9PmecckLpUFXANdet5rvff5yPzz+odCiVUqmbcBHxSmA2MBVI4HFgcWYuG+bY2tozzz7HPQ8s55NnnA7AqFFdjBrVxRGHHfriMYdOP5Drb72zVIgq7CcPPM1eE8eUDqN62mghxpCRRsRHgUuBAG4H7qh/vyQiFgx/eO3rsdVr2XP33fj0ly/kz//mXD771Yt47vn1Lznmqutv5o0zXl0oQqmaorOz4bbVc0X8dUTcHxH3RcQlEbFLRIyPiGsjYnn9c1yzsW7tT8U84A2ZeV5mXlxv5wGH1/dtKejuiFgaEUsXLlrcbGxtrb9W4+ePrOA9xx7NN//pHMaOGcM3r/jvF/df9J2r6Ozo5Ni3HlkwSqmCOqLxNoSImAr8FTAzM18NdAInAQuAJZk5HVhS324u1K3srwF7D9I/pb5vUJnZk5kzM3Pm3BOObza2tjZp/DgmThjHodMPBOBtb3w9P3/4UQCuvuHH/PjOn3Lu/Pe11R1bqR1ER0fDrQFdwNiI6AJ2ZdMQ7GxgYX3/QmBOs7FubQz4LGBJRCwHVtb79gVeAZzZ7EV3BhPG7cHkCeN59LEn2G/qXiy9dxn7T9ubW+6+j4uvuIavnHs2u4xx/E9quW0oaiKiG+ge0NWTmT0AmflYRPwzsAJ4DvhhZv4wIiZn5qr6MasiYlKzoQ6ZgDPzmog4iE1DDlPZNP7bC9yRmf3NXnRn8aHTT+ZT//o1Nvb1MXXyRD7xl6dx+oJPs7Gvj/n/cD4Ahx50IB/tPrVwpCrh7z50MDMO3YM9du9i0dfewIWXruDqJatLh9X+tmEaWj3Z9gy2rz62Oxs4APgV8F8RcUorQnzBVmdBZGYNuLWVF91ZHHTAvlz4j598Sd+if/9coWi0o/n78x8sHUIltXAp8jHAI5m5FiAiLgfeBKyOiCn16ncKsKbZC7TPfA1JakR0NN6GtgI4MiJ2jU03a2YBy4DFwNz6MXOBK5sN1YUYkqqlRQsxMvO2iFgE3AX0AXezabhiN+CyiJjHpiR9YrPXMAFLqpRWvhEjM88Bztmsez2bquHtZgKWVC1VWoosSW2ljZYim4AlVYsPZJekQqyAJakQx4AlqRArYEkqpI0ecGUCllQtvpJIkgrpcBaEJJVhBSxJhTgGLEmFOAtCkgqxApakMtKlyJJUiEMQklSICViSykjHgCWpECtgSSrECliSynAWhCSV4hCEJJWRJmBJKsQxYEkqwwpYkkrxecCSVIYLMSSpFIcgJKmMxApYkorwJpwklWIClqQyam00C6J9/lRIUiMiGm9bPVXsGRGLIuJnEbEsIt4YEeMj4tqIWF7/HNdsqCZgSZWS0dFwa8CXgGsy85XAYcAyYAGwJDOnA0vq200xAUuqlCQabkOJiN2Bo4BvAGTmhsz8FTAbWFg/bCEwp9lYTcCSKmVbKuCI6I6IpQNa94BTHQisBS6MiLsj4usR8TJgcmauAqh/Tmo2Vm/CSaqWbVgJl5k9QM8WdncBrwM+mJm3RcSX2I7hhsFYAUuqlFp0Nty2ohfozczb6tuL2JSQV0fEFID655pmYzUBS6qUVt2Ey8wngJURcXC9axbwALAYmFvvmwtc2WysDkFIqpQWL0X+IPDtiBgNPAycxqbC9bKImAesAE5s9uQmYEmV0sqlyJl5DzBzkF2zWnF+E7CkSvFxlJJUSAM313YYJmBJleLjKCWpEB9HKUmFWAFLUiFWwAMc/4nacF9Cbehj13Rv/SDthB7c7jNYAUtSIbU2WuBrApZUKWkClqQyHIKQpEJMwJJUiAlYkgoxAUtSIbX0JpwkFWEFLEmFmIAlqZBME7AkFVGzApakMrwJJ0mFOAYsSYU4BixJhVgBS1IhVsCSVEg7vQLCBCypUpwFIUmFOAQhSYV4E06SCqll6QgaZwKWVClWwJJUiGPAklRIfxsl4PaZryFJDciMhlsjIqIzIu6OiKvq2+Mj4tqIWF7/HNdsrCZgSZWS2Xhr0Hxg2YDtBcCSzJwOLKlvN8UELKlSkmi4bU1ETAPeCXx9QPdsYGH9+0JgTrOxmoAlVUotG28R0R0RSwe07s1O90XgbF66wnlyZq4CqH9OajZWb8JJqpRarfGbcJnZA/QMti8i3gWsycw7I+Lo1kT3UiZgSZXSwlcSvRk4PiLeAewC7B4RFwOrI2JKZq6KiCnAmmYv4BCEpEpp1U24zPxYZk7LzP2Bk4DrMvMUYDEwt37YXODKZmO1ApZUKSOwEOM84LKImAesAE5s9kQmYEmVMhzPgsjMG4Ab6t+fBGa14rwmYEmVsg3ze4szAUuqlHZaimwCllQpVsCSVIgJWJIKqTkEIUllWAFLUiH9bfReehOwpErxjRiSVIhDEJJUiG9FlqRCrIAlqRATsCQV4iwISSqkZgKWpDIcgpCkQkzA+h1HvG4c89//Cjo6gquuXcXFi1aWDkkj5LVf+yyT3nE0G9Y8yY0z3g3AqHF7MOM/v8Cu+03l2Ucf466Tz6LvV08TXV28tufT7D7jEDo6u+i9+Ap+8flB3xmpLWinaWi+E24EdHTAh/5iOh/51L2ccsYdHHPUJPbfZ9fSYWmE9C68nNvf9b6X9P3+2d08ed0t3HDIsTx53S284uxNb0OfcsJxdIwezU0zjuemI97Dvu//E8buN7VE2G0rMxtupZmAR8Crpu9O76rneHz18/T1JT+6cQ1vOWJC6bA0QtbdvJSN6379kr7J755F77euAKD3W1cw+fhjNu3IpPNlY4nOTjrH7kJtw0b6nv7tSIfc1vr7G2+lmYBHwMQJo1nzy/Uvbq99cj0TJ4wpGJFKGzN5AuufWAvA+ifWMmbSeABWfecH9D/zHLNW3szbH76eh79wARuf+vVQp9JmWvVW5JHQdAKOiNOG2NcdEUsjYukTj36v2UtURgzybJAd4R9fO549D38tWauxZN+3cv30WRx41umMPWBa6bDaSi0bb6VtTwV87pZ2ZGZPZs7MzJl77ffu7bhENaz55QYmvfz/K96JE8bwy3Xrh/gJVd361U8yZq+JAIzZayLr16wDYO+T3sXaH9xE9vWxYe06nrrlLvZ8/WtKhtp2KlMBR8RPt9DuBSaPUIxt72fLn2afvccyZfIudHUFxxw1iR/f/mTpsFTQ6quuY9qpcwCYduocVn9vCQDPrVjFhLcdAUDnrmPZ8/DD+O2DDxeLsx1lLRtupW1tGtpk4Fjgqc36A/jfYYmogvprcP5/PMT5576Gjo7g6h89wSMrni0dlkbIH3zrX5jwh4cz+uXjePsj/8Pyv/83fvH5Hl53yRfZ57QTeG7lKu46aT4Aj3712xz29c9x1D1XQQS9Cy/nN/c+WPi/oL1UaSnyVcBumXnP5jsi4oZhiaiibr1zHbfeua50GCrgnlM/PGj/bce+93f6+p95lrtOnj/MEVVbbQeobBs1ZALOzHlD7PvT1ocjSdtnRxjbbZQr4SRViglYkgqptVEGNgFLqpSs0E04SWor/f1WwJJUxI7wkJ1G+SwISZXSqqXIEbFPRFwfEcsi4v6ImF/vHx8R10bE8vrnuGZjNQFLqpQWroTrAz6cma8CjgTOiIhDgAXAksycDiypbzfFBCypUlr1LIjMXJWZd9W//wZYBkwFZgML64ctBOY0G6tjwJIqZThWwkXE/sAM4DZgcmaugk1JOiImNXteE7CkSqltwyyIiOgGugd09WRmz2bH7AZ8BzgrM5+OwZ4v2yQTsKRK2ZaFGPVku8WX7kXEKDYl329n5uX17tURMaVe/U4B1jQbq2PAkiqlVe+Ei02l7jeAZZl5/oBdi4G59e9zgSubjdUKWFKltHAM+M3AqcC9EfHCEyE/DpwHXBYR84AVwInNXsAELKlSWrUOIzNvZtOzzwczqxXXMAFLqpT+NnoiuwlYUqXsCK8aapQJWFKlmIAlqZA2yr8mYEnVYgUsSYW00+MoTcCSKsVZEJJUiEMQklSICViSCvGtyJJUiBWwJBXiLAhJKqS/z1kQklSEFbAkFZI1K2BJKmI4Xso5XEzAkirFIQhJKqTmTThJKqOWJmBJKsKFGJJUiAlYkgrxJpwkFVJzHrAklVHr7y8dQsNMwJIqxTFgSSrEBCxJhTgPWJIKsQKWpEJ8GpokFeIsCEkqxMdRSlIh7TQE0VE6AElqpaxlw21rIuK4iHgwIh6KiAWtjtUKWFKlZIumoUVEJ/Bl4I+AXuCOiFicmQ+05AKYgCVVTK2vZTfhDgceysyHASLiUmA20LIEHO305KB2FxHdmdlTOg7tWPy9KCciuoHuAV09L/xbRMQJwHGZ+b769qnAEZl5Zquu7xjwyOre+iHaCfl7UUhm9mTmzAFt4B/CGOxHWnl9E7AkDa4X2GfA9jTg8VZewAQsSYO7A5geEQdExGjgJGBxKy/gTbiR5TifBuPvxQ4oM/si4kzgB0AncEFm3t/Ka3gTTpIKcQhCkgoxAUtSISbgETLcSxrVfiLigohYExH3lY5FZZiAR8CAJY1/DBwCnBwRh5SNSjuAi4DjSgehckzAI+PFJY2ZuQF4YUmjdmKZeSOwrnQcKscEPDKmAisHbPfW+yTtxEzAI2PYlzRKaj8m4JEx7EsaJbUfE/DIGPYljZLajwl4BGRmH/DCksZlwGWtXtKo9hMRlwC3AAdHRG9EzCsdk0aWS5ElqRArYEkqxAQsSYWYgCWpEBOwJBViApakQkzAklSICViSCvk/t1N2+Gk+gkMAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#Testing best parameters with new LR model to validate the accuracy score.\n",
"\n",
"lr3 = LogisticRegression( C = 0.08653165624875402, penalty = 'l2', solver = 'liblinear')\n",
"lr3.fit(X_train,Y_train)\n",
"Y_prediction_lr3= lr3.predict(X_test)\n",
"print('Tuned Logistic Regression Accuracy: {}'.format(accuracy_score(Y_test, Y_prediction_lr3)))\n",
"print('ROC Score: {}'.format(roc_auc_score(Y_test, Y_prediction_lr3)))\n",
"\n",
"#LR Confusion Matrix\n",
"lrcfm = confusion_matrix(Y_test,Y_prediction_lr3,labels=[1,0])\n",
"sns.heatmap(lrcfm, annot=True,fmt='g', cmap='coolwarm')\n",
"\n",
"#Printing new classification report\n",
"print(classification_report(Y_test, lr2_pred))"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KNN Accuracy: 0.9707602339181286\n",
"ROC Score: 0.966931216931217\n",
" precision recall f1-score support\n",
"\n",
" 0 0.97 0.98 0.98 108\n",
" 1 0.97 0.95 0.96 63\n",
"\n",
" accuracy 0.97 171\n",
" macro avg 0.97 0.97 0.97 171\n",
"weighted avg 0.97 0.97 0.97 171\n",
"\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAARIElEQVR4nO3de5BcZZnH8e8zkwuXGMiFXCAo6AYQFiRyERUFDVuggmRVVrCALBV2/lAUXF0kFiWFCxa7q7uwitYGBSMoiIoQsGClgjGoSIzAgpANd0LCkAkJ94SQTD/7x7RbA4aZTmcy7/TJ91P1Vk+fbs55pir88uQ97zknMhNJ0uBrK12AJG2rDGBJKsQAlqRCDGBJKsQAlqRChm3tA6y6/06XWegvfPzcDaVL0BC08OeHx5bu4xfD9244cz6yYekWH29L2AFLUiFbvQOWpMEUw4s2tZvFAJZUKe3bt5cuoWEGsKRKaRtmByxJRTgFIUmF2AFLUiF2wJJUiB2wJBXSPqJ1Lm9onUolqQHRFg2PfvcVcXlEdEXEn3ptGxsRt0bEQ/XXMb0+mx0RD0fE0og4ur/9G8CSKiXa2xoeDfg+cMzrtp0DzM/MqcD8+nsiYl/gRGC/+n/z7Yjoc1GyASypUtrao+HRn8xcCKx53ebjgbn1n+cCM3ptvyYz12fmY8DDwKF91ro5v5gkDXWbMwURER0RsbjX6GjgEBMzsxOg/jqhvn034Mle31te3/aGPAknqVI25yRcZs4B5gzQoTfVUvd5ZzYDWFKlRNtW/4f9yoiYnJmdETEZ6KpvXw7s3ut7U4Cn+tqRUxCSKmUgV0G8gXnAzPrPM4Ebem0/MSJGRsSewFRgUV87sgOWVCmNnFxrVERcDRwJjI+I5cB5wEXAtRExC1gGnACQmfdHxLXAA8BG4DOZ2d3X/g1gSZWyBZ3tX8jMk97go+lv8P0LgQsb3b8BLKlSBmEOeMAYwJIqpX24ASxJRQzkFMTWZgBLqhSnICSpEDtgSSrEAJakQtqG+VRkSSpiIC/E2NoMYEmV4hSEJBXiKghJKsQOWJIKMYAlqRBXQUhSIc4BS1Ip4RSEJBXhHLAkFeIUhCQVYgcsSYW4CkKSCrEDlqRSnAOWpDLCZWiSVIarICSpkPAknCSV4Uk4SSokwikISSrDDliSyvAknCQV4hywJBUS7a6CkKQynIKQpDK8Ek4AvPjyy/zLpZfz6JPLCWD2Gafz5l0n85VvXMrTq55h0i7j+eoXz2D0qB1Ll6oCRgwPvnnhAQwf1kZ7Oyy4YzVXXLOsdFmtzw5YAJd87yreNW1/Ljj7s2zYsJFXXl3PlT+7kYMO2JdTPnYcV153I1dddxOfPvWTpUtVAa9uSM76yn2se6VGe3tw6dcO4M67nuWBB18sXVpLG8iTcBHxeeB0IIH7gNOAHYAfA3sAjwN/l5nPNrP/fv+qiIh9IuJLEfGfEXFJ/ee3N3OwbcnLa9fxPw8s5dijjgBg+PBhvGnHHbl90V186Mj3AfChI9/H7Yv+WLJMFbbulRoAw9qDYe1BZhauqAKirfHR124idgM+BxycmX8NtAMnAucA8zNzKjC//r4pfXbAEfEl4CTgGmBRffMU4OqIuCYzL2r2wFX31Moudh49mq996zIefnwZe791T86cdTLPPvcC48fuDMD4sTvz7PMvFK5UJbW1wWVfP5DdJm3P9Td3suShl0qX1PIGeBXEMGD7iNhAT+f7FDAbOLL++VxgAfClZnbeXwc8CzgkMy/KzKvq4yLg0PpnmxQRHRGxOCIW/+An1zdTV8vr7u7mwUcfZ8bR07niGxew3XYjueq6G0uXpSGmVoNZ/3gPnzh9EftMHcWeb96hdEmtry0aHr2zqj46/rybzFwBfB1YBnQCz2fmL4GJmdlZ/04nMKHpUvv5vAbsuontk+ufbVJmzsnMgzPz4FNPmNFsbS1tl3Fj2WXcWPbb620AfODdh/Dgo08wZufRPLPmOQCeWfMcY3YaXbJMDREvre3mnj89z7umjSldSsuLtraGR++sqo85/7+fiDHA8cCe9OTgjhFx8kDW2l8AnwXMj4ibI2JOfdxCz7zHmQNZSNWMG7MzE8aPZdmKTgAW33s/e+y+K4cfMo2bF9wOwM0Lbud9h76zZJkqaKfRwxi1Q88/l0eMaOOgd+zMEyvWFq6qAiIaH307CngsM1dl5gbgOuA9wMqImNxzqJgMdDVbap9zwJl5S0TsRc+Uw25AAMuBP2Rmd7MH3VZ8/vRTOP/i77BxYze7TtyF2Wf8A5k1vvL1S/nF/IVMHD+Of/7iGaXLVCHjxozgy5/bi/a2INrgV799hjsWN3UyXb0N3DK0ZcBhEbEDsA6YDiwGXgZmAhfVX29o9gCxtc+6rrr/Tk/r6i98/NwNpUvQELTw54dv8RqydVde0HDmbH/KuX0eLyLOBz4JbATupmdJ2ijgWuDN9IT0CZm5pplaXQcsqVoG8H7AmXkecN7rNq+npxveYgawpGrxbmiSVIZPxJCkUuyAJakQO2BJKsQbsktSIXbAklSIc8CSVIgdsCQV4iOJJKkQH0kkSYW0uQpCksqwA5akQpwDlqRCXAUhSYXYAUtSGemlyJJUiFMQklSIASxJZaRzwJJUiB2wJBViByxJZbgKQpJKcQpCkspIA1iSCnEOWJLKsAOWpFK8H7AkleGFGJJUilMQklRGYgcsSUV4Ek6SSmmhAG6dSiWpAbW29oZHfyJi54j4aUT8b0QsiYh3R8TYiLg1Ih6qv45ptlYDWFK1RDQ++ncJcEtm7gO8A1gCnAPMz8ypwPz6+6YYwJIqJaOt4dGXiBgNvB/4HkBmvpqZzwHHA3PrX5sLzGi2VgNYUqUk0fDox1uBVcAVEXF3RHw3InYEJmZmJ0D9dUKztRrAkiplczrgiOiIiMW9RkevXQ0D3gl8JzOnAS+zBdMNm+IqCEnVshlXwmXmHGDOG3y8HFiemXfW3/+UngBeGRGTM7MzIiYDXc2WagcsqVJq0d7w6EtmPg08GRF71zdNBx4A5gEz69tmAjc0W6sdsKRKGeALMT4L/DAiRgCPAqfR07heGxGzgGXACc3u3ACWVCkDeSlyZt4DHLyJj6YPxP4NYEmV4qXIklSIt6OUpEL6O7k2lBjAkirF21FKUiHOAUtSIXbAklSIHXAvf3vOK1v7EGpBs2/p6P9L2gYt3eI92AFLUiG1FrrDggEsqVLSAJakMpyCkKRCDGBJKsQAlqRCDGBJKqSWnoSTpCLsgCWpEANYkgrJNIAlqYiaHbAkleFJOEkqxDlgSSrEOWBJKsQOWJIKsQOWpEJqpQvYDAawpEpxFYQkFeIUhCQV4kk4SSqklqUraJwBLKlS7IAlqRDngCWpkG4DWJLKaKUOuHUWzElSAzIbH42IiPaIuDsibqq/HxsRt0bEQ/XXMc3WagBLqpQkGh4NOhNY0uv9OcD8zJwKzK+/b4oBLKlSatn46E9ETAE+Any31+bjgbn1n+cCM5qt1QCWVCm1WjQ8IqIjIhb3Gh2v293FwNm89hYTEzOzE6D+OqHZWj0JJ6lSNueRRJk5B5izqc8i4ligKzP/GBFHDkx1r2UAS6qURk+uNeC9wEcj4sPAdsDoiLgKWBkRkzOzMyImA13NHsApCEmVkhkNj773k7Mzc0pm7gGcCNyWmScD84CZ9a/NBG5otlY7YEmVMgj3grgIuDYiZgHLgBOa3ZEBLKlSBnAKotc+cwGwoP7zamD6QOzXAJZUKV6KLEmFbI0OeGsxgCVVigEsSYXUnIKQpDLsgCWpkO4Wei69ASypUlrpfsAGsKRKcQpCkgrxqciSVIgdsCQVYgBLUiGugpCkQmoGsCSV4RSEJBViAOs1Jowfybmf34exY4aTCfNu6eQnN64oXZYGyQGXfY0JHz6SV7tWs3DacQAMH7MT0370H+zwlt1Y+8QK7jrpLDY+9wIAb9p/b/b/9vkMe9MoMmv89rBPUFv/aslfoaW00jI0H0k0CLq7k29d/ggnf3oxHV+8m499ZFf22H2H0mVpkCyfex2Ljj39NdvednYHq2+7gwX7Hs3q2+7gr87ueRhvtLdz4Nx/477PnMfCA4/l99NPpbZhY4myW1ZmNjxKM4AHwepnX+XBR14CYN26bh5/ci3jx40sXJUGy5rfLGbDmudfs23icdNZfuX1ACy/8nomfvQoAMb/zXt58b6lvHjvUgA2rHmutc4qDQHd3Y2P0gzgQTZpwkj2etsoHlj6QulSVNDIieNY//QqANY/vYqRE8YCsONee5KZHPqL73L4out46xdO72s32oTMxkdpTQdwRJzWx2cdEbE4IhY//cSNzR6icrbfro0LZ+/HJZc9wtp1Q+CvXw05be3tjH3PQdx96j/xuyM+xaQZRzHuA4eVLqul1LLxUdqWdMDnv9EHmTknMw/OzIMnveW4LThEdbS3BxfM3o9fLuhi4R3PlC5Hha1fuZqRk3YBYOSkXVjftQaAdSueZvXti9iw+llq616h6+aF7DRtv5KltpzKdMARce8bjPuAiYNUYyXM/txePPHkWn58w/LSpWgIWHnTbUw5ZQYAU06Zwcob5wOw6pe/YfT+e9O2/XZEezvj3n8ILy15uGSpLSdr2fAorb9laBOBo4FnX7c9gN9tlYoq6IB9R3PMByfx8GMvccUlBwHwXz94jN//cU3hyjQYDrzyG4w74lBGjB/DBx/7NQ999Zs88q9zeOfVF7P7aZ9g3ZOd3HXimQBsfO4FHrv4+xx+x08hk65bFtJ1868L/watpUqXIt8EjMrMe17/QUQs2CoVVdC9D7zA4cf5P9G26p5TvrDJ7Xce/feb3L7iR/NY8aN5W7GiaqsNgc62UX0GcGbO6uOzTw18OZK0ZYbC3G6jvBJOUqUYwJJUSK2FEtgAllQpWaGTcJLUUrq77YAlqYihcJOdRhnAkiqlhVahGcCSqmUoXOHWKANYUqW00AyEt6OUVC21WjY8+hIRu0fEryJiSUTcHxFn1rePjYhbI+Kh+uuYZms1gCVVSq07Gx792Ah8ITPfDhwGfCYi9gXOAeZn5lRgfv19UwxgSZVSy2x49CUzOzPzrvrPLwJLgN2A44G59a/NBWY0W6sBLKlSNueZcL0fHlEfHZvaZ0TsAUwD7gQmZmZn/VidwIRma/UknKRK2Zy7oWXmHGBOX9+JiFHAz4CzMvOFiNiyAnuxA5ZUKQP5RIyIGE5P+P4wM6+rb14ZEZPrn08Gupqt1QCWVCnd3bWGR1+ip9X9HrAkM/+910fzgJn1n2cCNzRbq1MQkiplAC/EeC9wCnBfRPz5oRRfBi4Cro2IWcAy4IRmD2AAS6qUgQrgzPwNPY9f25TpA3EMA1hSpbTQlcgGsKRq8V4QklSIt6OUpEL6W90wlBjAkirFKQhJKsQAlqRCfCqyJBViByxJhbgKQpIK6d7oKghJKsIOWJIKyZodsCQVsTk3ZC/NAJZUKU5BSFIhNU/CSVIZtTSAJakIL8SQpEIMYEkqxJNwklRIzXXAklRGrbu7dAkNM4AlVYpzwJJUiAEsSYW4DliSCrEDlqRCvBuaJBXiKghJKsTbUUpSIU5BSFIhnoSTpELSZWiSVEZtY+uchItWunNQq4uIjsycU7oODS3+udh2tZUuYBvTUboADUn+udhGGcCSVIgBLEmFGMCDy3k+bYp/LrZRnoSTpELsgCWpEANYkgoxgAdJRBwTEUsj4uGIOKd0PSovIi6PiK6I+FPpWlSGATwIIqIduBT4ELAvcFJE7Fu2Kg0B3weOKV2EyjGAB8ehwMOZ+WhmvgpcAxxfuCYVlpkLgTWl61A5BvDg2A14stf75fVtkrZhBvDgiE1sc/2ftI0zgAfHcmD3Xu+nAE8VqkXSEGEAD44/AFMjYs+IGAGcCMwrXJOkwgzgQZCZG4EzgP8GlgDXZub9ZatSaRFxNXAHsHdELI+IWaVr0uDyUmRJKsQOWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqRADWJIK+T8ZuqG+hELPYgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#KNN default accuracy with K value for highest accuracy score\n",
"\n",
"knn2 = KNeighborsClassifier(n_neighbors=9)\n",
"knn2.fit(X_train,Y_train)\n",
"Y_prediction_knn = knn2.predict(X_test)\n",
"\n",
"print('KNN Accuracy: {}'.format(accuracy_score(Y_test, Y_prediction_knn)))\n",
"print('ROC Score: {}'.format(roc_auc_score(Y_test, Y_prediction_knn)))\n",
"\n",
"#KNN Confusion Matrix\n",
"knncfm = confusion_matrix(Y_test,Y_prediction_knn,labels=[1,0])\n",
"sns.heatmap(knncfm, annot=True,fmt='g', cmap='coolwarm')\n",
"\n",
"#Printing classification report\n",
"print(classification_report(Y_test, Y_prediction_knn))"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Accuracy Score')"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#KNN \n",
"#Testing other number of neighbors\n",
"\n",
"scores = []\n",
"\n",
"#loop appends the accuracy score gained after testing each number of k from 1,31\n",
"for i in range(1,31):\n",
" knn = KNeighborsClassifier(n_neighbors=i)\n",
" knn.fit(X_train, Y_train)\n",
" y_pred = knn.predict(X_test)\n",
" scores.append(accuracy_score(Y_test, y_pred))\n",
"\n",
"\n",
"\n",
"#plotting a graph of the accuracy scores at each number of K\n",
"plt.plot([i for i in range(1, 31)], scores)\n",
"plt.xlabel('Neighbours (K)')\n",
"plt.ylabel('Accuracy Score')"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.9051079024996118, 0.9085700978108988, 0.9191429902189101, 0.9226828132277596, 0.9279459711224964, 0.9209129017233348, 0.9261760596180716, 0.9261915851575843, 0.9314702685918336, 0.9261915851575843, 0.9297003570874087, 0.9314702685918336, 0.9332401800962584, 0.935010091600683, 0.9297314081664337, 0.9297469337059463, 0.9297469337059463, 0.9315013196708586, 0.9297314081664337, 0.9244837758112094, 0.929762459245459, 0.9227138643067846, 0.9227293898462972, 0.9192050923769601, 0.9209594783418723, 0.9174351808725353, 0.9192050923769601, 0.915680794907623, 0.9174351808725353, 0.915680794907623]\n"
]
},
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Cross_val score')"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#KNN cross validation\n",
"\n",
"lrscores = []\n",
"\n",
"for i in range(1,31):\n",
" knn3 = KNeighborsClassifier(n_neighbors=i)\n",
" scores2 = cross_val_score(knn3,X,Y,cv=5,scoring='accuracy')\n",
" lrscores.append(scores2.mean())\n",
" \n",
"print(lrscores)\n",
"\n",
"plt.plot([i for i in range(1, 31)], lrscores)\n",
"plt.xlabel('Neighbours (K)')\n",
"plt.ylabel('Cross_val score')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}