From 8328ef52eee0bdcd559e88c66d2d5ab6cc2fad5b Mon Sep 17 00:00:00 2001 From: Salih Ahmed Date: Fri, 22 Oct 2021 16:13:16 +0100 Subject: [PATCH] Meh --- .../exercise1a-checkpoint.ipynb | 1101 +++++++++++++++++ .../exercise3a-checkpoint.ipynb | 911 ++++++++++++++ .../exercise3b-checkpoint.ipynb | 924 ++++++++++++++ .../exercise4a-checkpoint.ipynb | 202 +++ Week5/exercise1a.ipynb | 288 +++++ Week5/exercise1b.ipynb | 44 +- Week5/exercise2.ipynb | 209 +++- Week5/exercise3a.ipynb | 233 ++++ Week5/exercise3b.ipynb | 198 +++ Week5/exercise4a.ipynb | 212 ++++ .../exercise1a-checkpoint.ipynb | 837 +++++++++++++ .../exercise1b-checkpoint.ipynb | 633 ++++++++++ .../exercise2-checkpoint.ipynb | 566 +++++++++ Week6/exercise1a.ipynb | 837 +++++++++++++ Week6/exercise1b.ipynb | 563 +++++++++ Week6/exercise2.ipynb | 668 ++++++++++ 16 files changed, 8395 insertions(+), 31 deletions(-) create mode 100644 Week5/.ipynb_checkpoints/exercise1a-checkpoint.ipynb create mode 100644 Week5/.ipynb_checkpoints/exercise3a-checkpoint.ipynb create mode 100644 Week5/.ipynb_checkpoints/exercise3b-checkpoint.ipynb create mode 100644 Week5/.ipynb_checkpoints/exercise4a-checkpoint.ipynb create mode 100644 Week5/exercise1a.ipynb create mode 100644 Week5/exercise3a.ipynb create mode 100644 Week5/exercise3b.ipynb create mode 100644 Week5/exercise4a.ipynb create mode 100644 Week6/.ipynb_checkpoints/exercise1a-checkpoint.ipynb create mode 100644 Week6/.ipynb_checkpoints/exercise1b-checkpoint.ipynb create mode 100644 Week6/.ipynb_checkpoints/exercise2-checkpoint.ipynb create mode 100644 Week6/exercise1a.ipynb create mode 100644 Week6/exercise1b.ipynb create mode 100644 Week6/exercise2.ipynb diff --git a/Week5/.ipynb_checkpoints/exercise1a-checkpoint.ipynb b/Week5/.ipynb_checkpoints/exercise1a-checkpoint.ipynb new file mode 100644 index 0000000..7f7e079 --- /dev/null +++ b/Week5/.ipynb_checkpoints/exercise1a-checkpoint.ipynb @@ -0,0 +1,1101 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing linear regression using relevant independant variable(s) from a wide-ranging data set to calculate a single dependant\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.linear_model import LinearRegression # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"headbrain.csv\") # import dataset (already contains headers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 237 entries, 0 to 236\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 Gender 237 non-null int64\n", + " 1 Age Range 237 non-null int64\n", + " 2 Head Size(cm^3) 237 non-null int64\n", + " 3 Brain Weight(grams) 237 non-null int64\n", + "dtypes: int64(4)\n", + "memory usage: 7.5 KB\n" + ] + } + ], + "source": [ + "data.info() # show basic stats" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Gender 0\n", + "Age Range 0\n", + "Head Size(cm^3) 0\n", + "Brain Weight(grams) 0\n", + "dtype: int64" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.isnull().sum() # no null values" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().any() # no duplicated data\n", + "# no further preprocessing needed" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GenderAge RangeHead Size(cm^3)Brain Weight(grams)
01145121530
11137381297
21142611335
31137771282
41141771590
\n", + "
" + ], + "text/plain": [ + " Gender Age Range Head Size(cm^3) Brain Weight(grams)\n", + "0 1 1 4512 1530\n", + "1 1 1 3738 1297\n", + "2 1 1 4261 1335\n", + "3 1 1 3777 1282\n", + "4 1 1 4177 1590" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head() # show first couple values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Variable extraction " + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1530 1297 1335 1282 1590 1300 1400 1255 1355 1375 1340 1380 1355 1522\n", + " 1208 1405 1358 1292 1340 1400 1357 1287 1275 1270 1635 1505 1490 1485\n", + " 1310 1420 1318 1432 1364 1405 1432 1207 1375 1350 1236 1250 1350 1320\n", + " 1525 1570 1340 1422 1506 1215 1311 1300 1224 1350 1335 1390 1400 1225\n", + " 1310 1560 1330 1222 1415 1175 1330 1485 1470 1135 1310 1154 1510 1415\n", + " 1468 1390 1380 1432 1240 1195 1225 1188 1252 1315 1245 1430 1279 1245\n", + " 1309 1412 1120 1220 1280 1440 1370 1192 1230 1346 1290 1165 1240 1132\n", + " 1242 1270 1218 1430 1588 1320 1290 1260 1425 1226 1360 1620 1310 1250\n", + " 1295 1290 1290 1275 1250 1270 1362 1300 1173 1256 1440 1180 1306 1350\n", + " 1125 1165 1312 1300 1270 1335 1450 1310 1027 1235 1260 1165 1080 1127\n", + " 1270 1252 1200 1290 1334 1380 1140 1243 1340 1168 1322 1249 1321 1192\n", + " 1373 1170 1265 1235 1302 1241 1078 1520 1460 1075 1280 1180 1250 1190\n", + " 1374 1306 1202 1240 1316 1280 1350 1180 1210 1127 1324 1210 1290 1100\n", + " 1280 1175 1160 1205 1163 1022 1243 1350 1237 1204 1090 1355 1250 1076\n", + " 1120 1220 1240 1220 1095 1235 1105 1405 1150 1305 1220 1296 1175 955\n", + " 1070 1320 1060 1130 1250 1225 1180 1178 1142 1130 1185 1012 1280 1103\n", + " 1408 1300 1246 1380 1350 1060 1350 1220 1110 1215 1104 1170 1120]\n" + ] + } + ], + "source": [ + "# We want to use the correlation of COMBINED yet only RELEVANT variables to determine our model\n", + "# therefore, we essentially split the data into our single dependancy...\n", + "y = data.iloc[:,-1].values # dependant\n", + "print(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Brain Weight(grams) Gender Age Range Head Size(cm^3)\n", + "Brain Weight(grams) 1.000000 -0.465266 -0.169438 0.799570\n", + "Gender -0.465266 1.000000 -0.088652 -0.514050\n", + "Age Range -0.169438 -0.088652 1.000000 -0.105428\n", + "Head Size(cm^3) 0.799570 -0.514050 -0.105428 1.000000\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcMAAAFaCAYAAABi7zZVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABCzElEQVR4nO3dd5xU5dnG8d8liiJIR6oKKkZRFKQoVmygWMASI/a8djQxJvbYjbEksSWKYknsxC4qBtRosAuKimDBLgKCIKCIIOz9/nHOwrAssDizewbm+vqZD3POeeace8bdved5zlMUEZiZmZWy1bIOwMzMLGtOhmZmVvKcDM3MrOQ5GZqZWclzMjQzs5LnZGhmZiXPydDMzIqGpNslTZH07lKOS9L1kj6S9I6krQtxXSdDMzMrJv8C9lzG8b2A9unjeGBgIS7qZGhmZkUjIkYA05dRpC9wZyReBRpKapnvdZ0MzcxsZdIa+DJne0K6Ly+r53sCK04/ffOJ59lLNdlg96xDKBp7N90y6xCKxu239Mo6hKJRp9cA5fP6Ffl7U7vZRieQNG+WGxQRg/K5fiE4GZqZWX4W/FTlomniyyf5fQWsl7PdJt2XFzeTmplZfsrKqv7I3xDgyLRX6bbAzIiYlO9JXTM0M7O8RBQkyQEg6T6gJ9BU0gTgQmCN5DpxEzAU6AN8BPwA/LoQ13UyNDOz/BSmxgdARPRfzvEATi7YBVNOhmZmlp8C1gyz4mRoZmb5KVuQdQR5czI0M7P8LJifdQR5czI0M7O8FLIDTVacDM3MLD8F7ECTFSdDMzPLj2uGZmZW8tyBxszMSp470JiZWclzM6mZmZU8d6AxM7NSF+F7hmZmVurcTGpmZiXPzaRmZlbyVmBx32LlZGhmZvlxM6mZmZU8N5OamVnJK4WaoaQFwBhAwALglIh4eUUuImkocGhEzKhC2YbAx0DTiAhJPYCXgfUiYoKkBsCn6fEl/g9IagVcHxEHLec630dEvUr29wM+jIhxOfuuBR6OiBHLi786SeoI/CEijs4yjuU5789XM+Kl12ncqCGP3n1T1uHUiKv+cgG9evfkhzk/ctIJZ/D2W2OXWnbw/YNo2249tu22FwD/vON62m+yIQANGtRn5sxZ7NBjnxqJuzocddGxdNqlC/PmzGXg6dfz2bufLFHm4DMOY6cDdqFug7r8usOihc2POP//6NCjIwBr1qlN/SYNOXbLw2os9ury0rjPuOqh/1FWFuzfY3P+r1e3xY5Pmj6L8+9+mu/mzKWsrIzf7rc9O27eLqNof4YSqRnOiYhOAJJ6A5cDO+cWkLR6RCx1Pp6I6FPVgCJihqRJwGbAOGA7YHT67/3AtsDrlSXC9PUTgWUmwuXoBzyRXhtJTYBtI+J3VT3B8j6PnysixkhqI2n9iPii0OcvlH599uDQA/fj3Ev/mnUoNaJX755stHFbOm25K926deKaay9l154HVFp23/16M3v2D4vt+/VRv134/LLLz2XWzO+qNd7q1GmXLrRo15LTdj6JjTtvwjF/OpHz+525RLk3nxnJ8DuGcs3zNy62/65Lb1/4vPfRe9N2ZUoIS7GgrIzLH3iem07en+YN63HYXwazc8cN2ahlk4Vlbhk2kl6d23Pwjlvy8aRpnHLTYzx18Ur03leBZLjaCpavD3wLIKmnpBckDWFR4nhU0huSxko6vvxFkj6T1FRSW0nvSbolLTNcUp1KrvMySfIj/feaCtsvSaol6S+SRkp6R9IJ6bXaSno3fb62pPsljZP0iKTXJHXNiesySW9LelVSc0nbAfsBf5H0lqSNgAOB/+S8po+k99P3eb2kJ9L9F0m6S9JLwF1pHC9IejN9bJfzuf1P0mOSPpF0haTDJL0uaUx6TST9UtK7aXy5NdLHgUNW8P9bjeraqSMN6q+TdRg1ps/eu3PfvY8AMHLkWzRoUJ/mLZotUa5u3bU55TfHcNWV/1jqufY/oA8PPvB4tcVa3brs0Z0XHnoegI9Gf8ja9evScN1GS5T7aPSHzJjy7TLPtd1+O/LyYy9UR5g16t3Pv2a9pg1o07QBa6xei95dNuH5MYvXliWY/eM8AL7/cR7NGizRaFXUYsFPVX4Uq6okwzppYngfuBW4NOfY1sCpEbFJuv1/EdEF6Ar8Nq1VVdQeuCEiNgdmkCSbil5iUfLbEHggPSfp/peBY4CZEdEN6AYcJ6niV6kBwLcR0QE4H+iSc6wu8GpEbAWMAI5Lm3+HAGdERKeI+BjYHngDQNJawM3AXun7rPgXrwOwe0T0B6YAe0TE1sCvgOtzym0FnEhS+z0C2CQiupN8vr9Jy1wA9E7j2y/ntaOAHSv5zCwjrVq1YMKESQu3v5o4mVYtWyxR7rwLfs/fr7+VOT/MqfQ8223fjSlTpvHxx59VV6jVrnGLxkyb+M3C7emTp9G4eeMVPk/T1s1ott66vPvymEKGl4kpM76nRaNFXw6bN6zHlBnfL1bmxL225cmR79Pr/Ns4ZeBjnH3QzhVPU9yirOqPKpC0p6QPJH0k6exKjq8v6TlJo9PKUJVbH5emKslwTpoYNgX2BO6UpPTY6xHxaU7Z30p6G3gVWI8k8VX0aUS8lT5/A2hbSZmXge3S5PZZRPwISFI9koT2GtALOFLSW+l2k0qutwMwGCAi3gXeyTk2j6Q5dFlxALQEpqbPNwU+yXnP91UoOyQiyv/SrQHcImkMSTLvkFNuZERMioi5JPdHh6f7x+TE8RLwL0nHAbVyXjsFaLWUWK1IddxyM9q1W58nHh++1DIH/XI/HnxgSA1GVbx67LsDrw99hVgFmt+q4j9vfMB+23Rg+KXH8I+T+nLeXcMpK4usw6q6srKqP5ZDUi3gBmAvkr+b/SV1qFDsPOD+iOhM0lJ2I3laoWbSiHgFaMqiGtHs8mOSegK7Az3S2sxoYK1KTjM35/kCKrlvGRHjgYbAvsAr6e43gF+TJMfvSTr0/CZN1J0iol1ELP0vzZJ+iojyn7ZK40jNWcr7qMzsnOenAV+T1AK7ArVzjuV+BmU522XlcUTEiST/w9cD3sipZa+VxrQEScdLGiVp1K13VszTVkjHHX8EL77yBC++8gSTJ0+hTZuWC4+1btWCiZMmL1a+e/et6bx1R8aMG8GwZ+5n443b8eRT9y48XqtWLfbr25uHH3yyxt5Doexx5F5cPvQaLh96DTOmfEuTVk0XHmvcognTv56+wufcbr8deWlIpv3VCmbdhvWY/O2i+8Bfz/iedRsu3gz6yCtj6bV18l1+q3YtmfvTfGbMrrwFoSgVtmbYHfgoIj6JiHkkFZq+Fa9IctsOoAEwMd+3sELJUNKmJLWUaZUcbkDSJPlDWm7bPGN7FTiVRcnwFeB3JDUmgGHASZLWSGPbRFLdCud4CTg4Pd4B6FiF634H5N7weg/YOH3+AbChpLbp9q+WcZ4GwKS0o88RLF67Wy5JG0XEaxFxAUnNdL300CbAu5W9JiIGRUTXiOh67JH9KytiBXLLoLvYocc+7NBjH558/Gn6H7o/AN26dWLWrO/4evLUxcrfdus9/GLjHnTssBO9dz+Yjz76lL33OnTh8V123Z4PP/iYiRMXT6Irg6fvfIpz+pzGOX1OY9Tw19jxwJ4AbNx5E374bvZy7w1W1Gqj1tStX4/xb3xQDdHWvM3Xb84XU2fw1Tcz+Wn+Aoa98SE7d9xwsTItG63Dax98CcAnk6cz76cFNKpXWXeKIrUCNcPcL+3p4/gKZ2sNfJmzPSHdl+si4HBJE4ChLLq99LNVpTdpnbQpEpLa2FERsWBRS+lC/wFOlPQeSdJ4Nc/YXgL6kNwjgyQZbkjShArJ/bW2wJtps+1Ukp6guW4E7pA0DngfGAvMXM51B5M0b/6WpFfqk8AJwK0RMUfSAOA/kmYDI5dxnhuBhyQdSfLZzF5G2cr8RVJ7ks/8WeDtdP8uaUxF64wLr2Dk6HeYMWMWu/U7nAHHHMGB+/bOOqxqM2zYc/Tq3ZO3xzzHD3N+ZMAJi3pPvvjKE1UaJnHgQfus1B1nyo3+7xt02qUL1464iblz5nLz6YtulV8+9BrO6XMaAIeecxTb9d2R2nXW5B+v3spzg5/hoWsHA9Bj3x15+fGVv+NMudVrrcbZv+zJSTc+SlkEfbftwMYtm3Djk6/QYf3m9Oy4Ib/ff0cuue9Z7nluNAguPnwPKvkbW7xWYHHfiBgEDMrziv2Bf0XE35QMv7tL0hZLG2VQFVrUUrjqSdue14iIH9Nems8Av0ir3itynheBfdJhH/Ui4vs0Ad8AjI+IawoffaVxrAn8D9hheUM3fvrmk1X3f+wKarLB7lmHUDT2brpl1iEUjdtv6ZV1CEWjTq8BeWXeOU9eW+W/N3X2/t0yr5Umt4sione6fQ5ARFyeU2YssGdEfJluf0IyBG7KzwgfWPGhFSubtYEX0049jwADVjQRpv4ArJ8+Py6tKY8laQq9uRCBVtH6wNnVMYbRzOxnK+w9w5FAe0ntJNUm6SBTsWfZF8BuAJI2I+lLMZU8rNLTsUXEdywakpHPeV7LeX4NybjHGpd2LBqfxbXNzJaqgL1+I2K+pFNI+oXUAm6PiLGSLgFGRcQQkgrKLZJOI+lMc3Tk2cy5SidDMzOrAQWemzQihpJ0jMndd0HO83EkY8ALxsnQzMzyswqMB3UyNDOz/KxAb9Ji5WRoZmb5cc3QzMxK3iowRM/J0MzM8uOaoZmZlTwnQzMzK3kFHlqRBSdDMzPLz4IFWUeQNydDMzPLj5tJzcys5DkZmplZyfM9QzMzK3VR5nGGZmZW6jwdm5mZlTzXDM3MrOS5A42ZmZU8J0MzMyt5nqjbzMxKnmuGZmZW8jwdm5mZlTz3JrVi1WSD3bMOoWhM+/yZrEMoGn/oek7WIRSNdfa5LOsQisb8eQPyen0UuJlU0p7AdUAt4NaIuKKSMgcDFwEBvB0Rh+ZzTSdDMzPLTwFrhpJqATcAewATgJGShkTEuJwy7YFzgO0j4ltJ6+Z73dXyPYGZmZW4KKv6Y/m6Ax9FxCcRMQ8YDPStUOY44IaI+BYgIqbk+xacDM3MLD/zF1T5Iel4SaNyHsdXOFtr4Muc7QnpvlybAJtIeknSq2mzal7cTGpmZvlZgWbSiBgEDMrziqsD7YGeQBtghKSOETHj557QNUMzM8tPYZtJvwLWy9luk+7LNQEYEhE/RcSnwIckyfFnczI0M7P8lEXVH8s3EmgvqZ2k2sAhwJAKZR4lqRUiqSlJs+kn+bwFN5OamVleCjm0IiLmSzoFGEYytOL2iBgr6RJgVEQMSY/1kjQOWACcERHT8rmuk6GZmeWnwIPuI2IoMLTCvgtyngfw+/RREE6GZmaWH0/HZmZmJc/TsZmZWakLJ0MzMyt5ToZmZlbyvJ6hmZmVPNcMzcys1MUC1wzNzKzUuWZoZmYlz8nQzMxKnYdWmJmZORmamVmpi/lOhmZmVupcMzQzs5K38o+scDJcEZKaA9cA2wLfAvOAqyLikTzP2xM4PSL2yTfGmnTVXy6gV++e/DDnR0464QzefmvsUssOvn8Qbdutx7bd9gLgn3dcT/tNNgSgQYP6zJw5ix16rFRvv0rO+/PVjHjpdRo3asijd9+UdTjV5sALj6bDLp2ZN2cu95w+kAljP12izHpbtOOwvw5gjbVqM+650Tx08b8AaN1hA3512XGsvuYalM1fwP3n38YXb39Mnfp1OfQvJ9J0/ebMn/sT9555E5M+/LKG31l+rrn6Evbac1d+mDOHY445jdFvvbtEmWeffoAWLZszZ86PAOzVpz9Tp05jxx224W9/u5gtO27GoYcP4OGHn6zp8KtsVehA45Xuq0iSSFZXHhERG0ZEF5IVmNtkEEvmX2J69e7JRhu3pdOWu3LqKedyzbWXLrXsvvv1ZvbsHxbb9+ujfssOPfZhhx77MOSx//D4Y8OqO+RM9OuzBzdd/aesw6hWHXp2olm7Flza81T+fe4tHHzZMZWWO/hPxzL4nEFc2vNUmrVrwWY9OwHQ9+zDeOq6B7mqz1kMvfp++p5zGAC9Tu7HV+M+58q9zuSuP9zAARceVVNvqSD22nNX2m/cjk077MBJJ53FDf+4fKlljzzyFLp260XXbr2YOjVZo/aLL7/imGNP477Bj9ZQxHkoW4FHkXIyrLpdgXkRsfDrfUR8HhF/l1RL0l8kjZT0jqQTIKnxSXpe0oOS3pd0T5pUkbRnuu9N4IDyc0qqK+l2Sa9LGi2pb7r/aElDJP0XeLZG33kl+uy9O/fdm1SIR458iwYN6tO8RbMlytWtuzan/OYYrrryH0s91/4H9OHBBx6vtliz1LVTRxrUXyfrMKpVx17deP3hEQB8Nno8ddapS/1mDRcrU79ZQ9Zapw6fjR4PwOsPj2DLXt0ACGCtenUAWKv+2sz8+lsAWrRvw4cvJzWpKR9PpEmbZqzTtEENvKPC2Hff3tx1z4MAvPb6mzRo2IAWLdat8us//3wCY8a8R9lKMO9nlEWVH8XKybDqNgfeXMqxY4CZEdEN6AYcJ6ldeqwz8DugA7AhsL2ktYBbgH2BLkCLnHP9EfhvRHQHdgH+Iqluemxr4KCI2Llg7+pnatWqBRMmTFq4/dXEybRq2WKJcudd8Hv+fv2tzPlhTqXn2W77bkyZMo2PP/6sukK1atageSNmTJy2cHvG5Gk0aNF48TItGjNj0vRFZSZNp0HzRgA8fPEd9D3ncC5++Qb6nXsEj191HwBfvfc5W+3ZHYD1t9qIRq2b0bDCeYtZ61YtmPDlxIXbX02YROtWS/6OANx669WMGjmcP577uxqKrrBiftUfxcrJ8GeSdIOktyWNBHoBR0p6C3gNaAK0T4u+HhETIqIMeAtoC2wKfBoR4yMigLtzTt0LODs91/PAWsD66bGnI2I6SyHpeEmjJI2aN39WYd5oHjpuuRnt2q3PE48PX2qZg365Hw8+MKQGo7Jis8Phe/DIpXdw4XYn88ild3DolScC8MzAx6hTvy5nDr2SnY/akwljP1spakkr6oijfkPnrXen5y77s8P23Tn88IOyDmnFrQLNpJnfe1qJjAUOLN+IiJMlNQVGAV8Av4mIxW58pR1j5ubsWsDyP3MBB0bEBxXOtQ0we1kvjIhBwCCA+nU3LHh7xHHHH8FRv/4VAG++8Q5t2rRceKx1qxZMnDR5sfLdu29N5607MmbcCFZfvRbNmjXhyafuZe+9DgWgVq1a7Ne3Nzttv1+hQ7VqtuMRvejRfzcAvnj7Yxq2arLwWMMWTZg5efHvbDMnT6dhy0W1uoYtGy9sDu1+4M4LO9OMfvJV+l9xAgA/fj+He88YuPA1F774d6Z9MaVa3k+hnHTiURxzTHLPc9Sot2izXquFx1q3aclXEycv8ZqJ6b7vv5/NfYMfpVvXTtx994M1E3CBRIGTnKQ9geuAWsCtEXHFUsodCDwIdIuIUflc0zXDqvsvsJakk3L2rZ3+Oww4SdIaAJI2yWnarMz7QFtJG6Xb/XOODQN+k3NvsXNBoi+AWwbdtbDTy5OPP03/Q/cHoFu3Tsya9R1fT566WPnbbr2HX2zcg44ddqL37gfz0UefLkyEALvsuj0ffvDxwj8GtvJ44a7hXNXnLK7qcxbvDB9J9wN2AqBt5/b8+N0PzJo6Y7Hys6bO4Mfv5tC2c9Jg0v2AnRgzfCQAM6d8y8bbdgBgk+22YOpnyc9DnfprU2uNWgD0OGRXPn7tfX78vvLm9mIx8KY7FnaEGTJkGEccltTytum+NbNmzmLy5MWTea1atWjSJGkuXn311dl7790ZO/aDJc5b9ApYM5RUC7gB2Ivk9lJ/SR0qKbcOcCpJa1zeXDOsoogISf2AaySdCUwlqamdBTxA0vz5ZprEpgL9lnGuHyUdDzwp6QfgBaC8l8WlwLXAO5JWAz4Fim7MwbBhz9Grd0/eHvMcP8z5kQEnnLnw2IuvPFGlYRIHHrTPKttxptwZF17ByNHvMGPGLHbrdzgDjjmCA/ftnXVYBTXuudFsvktnLvjfdcybM497cmpzZw69kqv6nAXA/effxmF/HUDttdZg3PNvMe75twAYfPbNHHjh0ay2ei1+mjuPwecMAqD5xq05/K8DiIDJ4ydw75kr19CUoU89y5577soH773ED3PmcOyxv194bNTI4XTt1os116zN0CfvZY01VqdWrVo8++wL3HrbPQB07bIVDz5wG40aNWCfvffgwgv+wFadds3q7SxTgWuG3YGPIuITAEmDgb7AuArlLgWuBM4oxEWV3LKyVU11NJOurKZ9/kzWIRSNP3Q9J+sQisaNE1/MOoSiMX/eV8rn9VN227nKf2/WffZ/y7yWpIOAPSPi2HT7CGCbiDglp8zWwB8j4kBJz5OM086rmdQ1QzMzy0ssqHouTVvFjs/ZNSjt71DV168GXA0cXeWLVoGToZmZ5WVFmklzO/otxVfAejnbbdJ95dYBtgCeT7tWtACGSNovn9qhk6GZmeUlyvJqZa1oJNA+Hav9FclMXwt73kXETKBp+babSc3MrCgUsgNNRMyXdApJz/pawO0RMVbSJcCoiKiWgclOhmZmlpeIgtYMiYihwNAK+y5YStmehbimk6GZmeWlbH5hk2EWnAzNzCwvq8IIPSdDMzPLS4E70GTCydDMzPLiZGhmZiXPzaRmZlbyXDM0M7OSV7YC07EVKydDMzPLS1mBxxlmwcnQzMzyUuhB91lwMjQzs7z4nqGZmZU89yY1M7OS55qhmZmVvAVlq2UdQt6cDM3MLC9uJjUzs5LnoRVmZlbyPLTCzMxKnptJrWjt3XTLrEMoGn/oek7WIRSNv426POsQisb8rmdnHcIqwx1ozMys5PmeoZmZlbxVoJWUlb9ua2ZmmSoLVflRFZL2lPSBpI8kLdGeLen3ksZJekfSs5I2yPc9OBmamVleIlTlx/JIqgXcAOwFdAD6S+pQodhooGtEbAk8CFyV73twMjQzs7yUrcCjCroDH0XEJxExDxgM9M0tEBHPRcQP6earQJt834PvGZqZWV4WFLYDTWvgy5ztCcA2yyh/DPBUvhd1MjQzs7yUUfVkKOl44PicXYMiYtDPua6kw4GuwM4/5/W5nAzNzCwvsQLJME18y0p+XwHr5Wy3SfctRtLuwB+BnSNibpUDWArfMzQzs7wU+J7hSKC9pHaSagOHAENyC0jqDNwM7BcRUwrxHlwzNDOzvKxIzXC554qYL+kUYBhQC7g9IsZKugQYFRFDgL8A9YAHJAF8ERH75XNdJ0MzM8vL/AKfLyKGAkMr7Lsg5/nuBb6kk6GZmeWnkDXDrDgZmplZXspW/lzoZGhmZvlZkaEVxcrJ0MzM8rIqTNTtZGhmZnmp4pCJouZkaGZmeVkgN5OamVmJc83QzMxKnnuTmplZyXNvUjMzK3nuTWpmZiXPzaQrGUn9gEeAzSLi/QKd8yLgOGAqUBu4NCLuK8S5i91RFx1Lp126MG/OXAaefj2fvfvJEmUOPuMwdjpgF+o2qMuvO/RfuP+I8/+PDj06ArBmndrUb9KQY7c8rMZiL4QDLzyaDrt0Zt6cudxz+kAmjP10iTLrbdGOw/46gDXWqs2450bz0MX/AqB1hw341WXHsfqaa1A2fwH3n38bX7z9MXXq1+XQv5xI0/WbM3/uT9x75k1M+vDLJc67Mjrvz1cz4qXXadyoIY/efVPW4dSIX174azbfpTM/zZnLnaffyJeV/Izsd/ohbHPATtRpUI/fb37kwv0bd9+Mgy44itabbsDtv7mW0U+9VpOhr5AFWQdQAKW2hFN/4MX030K6JiI6AX2BmyWtUeDzF51Ou3ShRbuWnLbzSdxyzo0c86cTKy335jMjOa/vGUvsv+vS2zmnz2mc0+c0ht0xlJHDXqnukAuqQ89ONGvXgkt7nsq/z72Fgy87ptJyB//pWAafM4hLe55Ks3Yt2KxnJwD6nn0YT133IFf1OYuhV99P33OSLwK9Tu7HV+M+58q9zuSuP9zAARceVVNvqdr167MHN139p6zDqDGb9+zMuu1acFHP33LPuYM45LJjKy33zrNvcGXfc5fYP33iN9x1+o2MeuzF6g41b2Wq+qNYlUwylFQP2AE4hmR9rPL9q0m6UdL7kp6WNFTSQemxLpL+J+kNScMktVzWNSJiPPAD0Ch9/UBJoySNlXRxzjU/k3SxpDcljZG0abq/WRrDWEm3SvpcUtP02OGSXpf0lqSbJdUq8Ee0Qrrs0Z0XHnoegI9Gf8ja9evScN1GS5T7aPSHzJjy7TLPtd1+O/LyYy9UR5jVpmOvbrz+8AgAPhs9njrr1KV+s4aLlanfrCFrrVOHz0aPB+D1h0ewZa9uQHKPZa16dQBYq/7azPw6+YxatG/Dhy+/C8CUjyfSpE0z1mnaoAbeUfXr2qkjDeqvk3UYNWbLXl15LednZO1KfkbKj82aOmOJ/dMnTOWr97+gLIr/jlyB1zPMRMkkQ5Ja238i4kNgmqQu6f4DgLZAB+AIoAdAWrv7O3BQRHQBbgcuW9YFJG0NjM9ZbPKPEdEV2BLYWdKWOcW/iYitgYHA6em+C4H/RsTmwIPA+ul5NwN+BWyf1kAXAJm2KTZu0ZhpE79ZuD198jQaN2+8wudp2roZzdZbl3dfHlPI8Kpdg+aNmDFx2sLtGZOn0aDF4u+/QYvGzJg0fVGZSdNp0Dz5wvDwxXfQ95zDufjlG+h37hE8flXSsv7Ve5+z1Z7dAVh/q41o1LoZDVus+Odq2WvYvDHf5vyOfDt52ir7/9LJcOXSHxicPh/MoqbSHYAHIqIsIiYDz6X7fwFsATwt6S3gPKDNUs59mqSxwGssnjAPlvQmMBrYnCThlns4/fcNkmRcHstggIj4D1BepdoN6AKMTGPZDdiwKm+62PXYdwdeH/oKUVbMvyaFt8Phe/DIpXdw4XYn88ild3DolUkz8zMDH6NO/bqcOfRKdj5qTyaM/YyyEvtsbOUTqvqjWJVEBxpJjYFdgY6SgmT15JC05M2snJcBYyOiRxUucU1E/FXSfsBtkjYCWpLU+LpFxLeS/gWslfOauem/C1j+/wcBd0TEOcssJB0PHA/QtfFWbFyvbRVCr7o9jtyLXQ/pBcAn74ynSaumC481btGE6V9PX9pLl2q7/Xbk9vNvLliM1WnHI3rRo/9uAHzx9sc0bNVk4bGGLZowc/Li73/m5Ok0bLmoJtCwZeOFzaHdD9x5YWea0U++Sv8rTgDgx+/ncO8ZAxe+5sIX/860L6ZgK4edjujN9unPyOdvf0yjVk2BDwBo1KIJMyav+O/IyqDQi/tmoVRqhgcBd0XEBhHRNiLWAz4FdgReAg5M7x02B3qmr/kAaCZpYbOppM2XdZGIGAKMAo4C6gOzgZnpefeqQpwvAQen1+tFeu8ReBY4SNK66bHGkjao5PqDIqJrRHQtdCIEePrOpxZ2ehk1/DV2PLAnABt33oQfvpu93HuDFbXaqDV169dj/BsfFDzW6vDCXcO5qs9ZXNXnLN4ZPpLuB+wEQNvO7fnxux+WuO8za+oMfvxuDm07tweg+wE7MWb4SABmTvmWjbdNGgo22W4Lpn42GYA69dem1hrJ7eAeh+zKx6+9z4/fz6mJt2cFMOKuYVze50wu73Mm7wx/nW1yfkbmVPIzsqqIFXgUq5KoGZI0iV5ZYd9D6f6TSZodxwFfAm8CMyNiXtqR5npJDUg+q2uBscu51iXAvcBmJM2j76fnfakKcV4M3CfpCOAVYDLwXUR8I+k8YLik1YCf0rg/r8I5q8Xo/75Bp126cO2Im5g7Zy43n379wmOXD72Gc/qcBsCh5xzFdn13pHadNfnHq7fy3OBneOjapLW6x7478vLjK1fHmXLjnhvN5rt05oL/Xce8OfO4J6c2d+bQK7mqz1kA3H/+bRz21wHUXmsNxj3/FuOefwuAwWffzIEXHs1qq9fip7nzGHzOIACab9yaw/86gAiYPH4C95656gxBOOPCKxg5+h1mzJjFbv0OZ8AxR3Dgvr2zDqvavPvcaDbfZWsu/t/1zJszj7vOuHHhsXOGXsXlfc4EYP+zD6Nr3x2oXac2l70ykJf//V+evPYBNthyI46/+XTWblCXjrt1Ye/TDuZPvf6Q1dtZpmLuJVpVipWgp1J1k1QvIr6X1AR4naSjyuQM4lgTWBAR89Ma6cC0w8wK679BP/+PTTXTmlmHUDT+NuryrEMoGqd2PTvrEIrGjZ/dn1c6u2b9w6v89+a0L+4uytRZKjXD5XlCUkMWDZqv8USYWh+4P639zSMZzG9mVtQK3cVL0p7AdST9O26NiCsqHF8TuJOkY+E04FcR8Vk+13QyBCKiZ9YxwMJxip2zjsPMbEUUshkqHUN9A7AHMIGkF/2QiBiXU+wY4NuI2FjSISS3wX6Vz3VLpQONmZlVk/mq+qMKugMfRcQnETGPZLhZ3wpl+gJ3pM8fBHaT8lth2MnQzMzysiK9SSUdn87MVf44vsLpWpN0Oiw3Id1XaZmImA/MBJqQBzeTmplZXspWoKE0IgYBg6ovmp/HNUMzM8tLgadj+wpYL2e7Tbqv0jKSVgcakHSk+dmcDM3MLC8FHnQ/EmgvqZ2k2iQLKwypUGYIyeQmkEyq8t/Ic5ygm0nNzCwvhRxakY6zPgUYRjK04vaIGCvpEmBUOtPXbcBdkj4CppOzEtHP5WRoZmZ5ma/CzvEREUOBoRX2XZDz/Efgl4W8ppOhmZnlZVWY7srJ0MzM8rIqLDLmZGhmZnlZkaEVxcrJ0MzM8rLyp0InQzMzy9P8VSAdOhmamVleVv5U6GRoZmZ5cgcaMzMrebEK1A2dDM3MLC+uGZqZWcnz0AozMyt5C5wMzcys1LmZ1MzMSp470JiZWclzzdCK1u239Mo6hKKxzj6XZR1C0Zjf9eysQyga1426IusQVhmuGZqZWclzzdDMzEregnDN0MzMSpzHGZqZWcnzPUMzMyt5q8I9w9WyDsDMzFZuZUSVH/mQ1FjS05LGp/82qqRMJ0mvSBor6R1Jv6rKuZ0MzcwsLwuIKj/ydDbwbES0B55Ntyv6ATgyIjYH9gSuldRweSd2MjQzs7xERJUfeeoL3JE+vwPoV0ksH0bE+PT5RGAK0Gx5J3YyNDOzvKxIM6mk4yWNynkcvwKXah4Rk9Lnk4HmyyosqTtQG/h4eSd2BxozM8vLinSgiYhBwKClHZf0DNCikkN/rHCekLTUqqaklsBdwFERsdwQnQzNzCwvhRxaERG7L+2YpK8ltYyISWmym7KUcvWBJ4E/RsSrVbmum0nNzCwvNdWbFBgCHJU+Pwp4rGIBSbWBR4A7I+LBqp7YydDMzPKyIKLKjzxdAewhaTywe7qNpK6Sbk3LHAzsBBwt6a300Wl5J3YzqZmZ5aWmZqCJiGnAbpXsHwUcmz6/G7h7Rc/tZGhmZnnx3KRmZlbyCjB+MHNOhmZmlhfXDM3MrOR51QozMyt5XtzXzMxKnptJzcys5K0KydCD7i1vL437jL6X3sG+F/+L24ePXOL4pOmzOPb6h/jVlffyy8vv5oWxn2YQZfW65upLeH/ci7z5xtN07rRFpWWeffoBxr47glEjhzNq5HCaNWsCwI47bMPrr/2HH3/4nAMO2Lsmw64Wv7zw11z0/PX88am/sN7m7Sots9/ph3DZyzdy9dg7F9u/cffNOPuJK/j7R/fRea9taiLcTJz356vZae9D6Hf4iVmHUhA1uGpFtamRZCjp+wrbR0v6R4HO/bykrpXs30fSaElvSxon6YR0/4mSjvyZ1+on6YJ8Y845XwdJUyT9R9LqOfvXkvR6GvtYSRfnHBssqX2hYsjXgrIyLn/geW44qR8P//EI/vPGh3w8adpiZW4ZNpJendvz77MO5Yqj9+LP9z+XUbTVY689d6X9xu3YtMMOnHTSWdzwj8uXWvbII0+ha7dedO3Wi6lTk8/piy+/4phjT+O+wY/WUMTVZ/OenVm3XQsu6vlb7jl3EIdcdmyl5d559g2u7HvuEvunT/yGu06/kVGPvVjdoWaqX589uOnqP2UdRsHU4HRs1WaVbCaVtAbJrOjdI2KCpDWBtgARcVMepz4T2C//CEFSK+B+YH/gAJJ4/y89PBfYNSK+T9/Li5KeSiecHZjGcVwh4sjXu59/zXpNG9CmaQMAenfZhOfHfMJGLZssLCPB7B/nAfD9j/No1qBeJrFWl3337c1d9yRTIL72+ps0aNiAFi3WZfLkSucQXsLnn08AoKxsReb+L05b9urKaw+PAOCz0eNZe5261G/WkFlTZyxW7rPR4yt9/fQJUwEoK+IaRCF07dSRryZ9nXUYBVO2/EUhil7mzaSSmkl6SNLI9LF9ur+7pFfS2t3Lkn6R7q+T1o7ek/QIUKeS065DkuinAUTE3Ij4IH39RZJOl9QqZ966tyQtkLTBMuLZBJgbEd+k280lPZLW3t6WtJ2ktpLel/QvSR9KukfS7pJekjQ+XVurfEb1fwPHR8RLEfEHYKqkS9J4IyLKa9NrpI/yvw4vALvn1iSzNGXG97RotM7C7eYN6zFlxmINAZy417Y8OfJ9ep1/G6cMfIyzD9q5psOsVq1btWDClxMXbn81YRKtW1W2Ag3ceuvVjBo5nD+e+7saiq5mNWzemG8nfrNw+9vJ02jYonGGEVlNcM2w6upIeitnuzHJ7OMA1wHXRMSLktYHhgGbAe8DO0bEfEm7A38GDgROAn6IiM0kbQm8WfFiETFd0hDgc0nPAk8A9+WuaZWugNwJQNLJwM4R8bmke5cSz/YVrnU98L+I2F9SLaAe0AjYGPglSS1vJHAosANJjfJcoF9EzAJ2rBDzWbnb6TnfSM93Q0S8lpYrk/QRsFV6vOj9540P2G+bDhy529a8/ekkzrtrOA+eczirraasQ6tRRxz1GyZOnEy9enV54N+3cPjhB3H33VWeVN+saBXzvcCqqqlkOCciOpVvSDoaKL/PtzvQQVr4h7G+pHpAA+CO9P5YkNSOIJmN/HqAiHhH0juVXTAijpXUMT3/6cAewNEVy6U1v+NIEtay4mkJTM156a7Akem1FgAzJTUCPo2IMem5xwLPpotQjiFtqq2K9JydJDUEHpG0RUS8mx6eArSiQjJMV4w+HuDvp/bnmD47UN3WbViPyd9+t3D76xnfs27DxZtBH3llLDcO6AfAVu1aMven+cyYPYfG66xd7fFVl5NOPIpjjjkMgFGj3qLNeq0WHmvdpiVfTZy8xGsmpvu+/3429w1+lG5dO60SyXCnI3qzff9k7uTP3/6YRq2aAh8A0KhFE2ZMnp5hdFYTirnGV1WZN5OSxLBtRHRKH63TJsJLgeciYgtgX2CtFT1xRIyJiGtIEuGBFY+ni0PeBhyc0yy5tHjmVDGGuTnPy3K2y/gZXz4iYgbwHLBnzu610ngqlh0UEV0jomtNJEKAzddvzhdTZ/DVNzP5af4Chr3xITt33HCxMi0brcNrH3wJwCeTpzPvpwU0qldZ6/bKY+BNdyzsCDNkyDCOOOwgALbpvjWzZs5a4n5hrVq1aNKkEQCrr746e++9O2PHflDjcVeHEXcN4/I+Z3J5nzN5Z/jrbHPATgC07dyeOd/9sMT9Qlv1xAr8V6yKIRkOB35TvpGz7lQD4Kv0+dE55UeQND0iaQtgy4onlFRPUs+cXZ2AzyuUWQN4ADgrIj6sQjzvkTRZlnuWpMkWSbUkNVjK+1th6X3LhunzOiTJ/P2cIpsA71by0hq3eq3VOPuXPTnpxkfZ/7K72GPr9mzcsgk3PvkKz4/5BIDf778jD7/8Lgdffg9n/+spLj58D3Jq3iu9oU89yyeffsEH773ETTddxSm/WdRLctTI4QCsuWZthj55L2++8TRvjBrOV19N4tbb7gGga5et+OyTURx04D4MvOFK3n7rv5m8j0J497nRfPPFFC7+3/UcdvkJDD7/1oXHzhl61cLn+599GJe9MpDadWpz2SsD2ft3vwRggy034rJXBrJ1n23p/+fjOW/432r8PdSEMy68gsNOOI3PvpjAbv0O56HHh2UdUl7KIqr8KFaqibZeSd9HRL2c7aOBrhFxiqSmwA0k9+VWB0ZExImSegB3ALOBJ4HDI6Jtmhz+SXLP7D2gNXByup5V+fnXIemgshFJDWo2cGpEjJJ0EfA9yf28YSyeZPoA85YSz9rpa7ZImz2bk/QA3RBYQJIYJwFPpLVZJP0r3X5QUtvcY8v5vLZM33stki8s90fEJemx5sDjEdF9WeeYM/zG4v2pq2Hr7HNZ1iEUjeNbbZ91CEXjulFXZB1C0Vij6YZ5fTvdbN3uVf57896U14vym3CNJMNVhaTrSBLRMxnGcBowKyJuW1Y5J8NFnAwXcTJcxMlwkXyT4abrdqvy35v3p4wsymRYDM2kK5M/A1n3+phBUms0MysKq0IzaVGMVVtZRMTXLBoSklUM/8zy+mZmFRVzx5iqcs3QzMzyUlM1Q0mNJT2dTmLydDqcbWll60uaoCpO/elkaGZmeSmLBVV+5OlskrHb7Ul69J+9jLKXkow+qBInQzMzy0sNTsfWl0V9Ju4A+lVWSFIXoDnJULkqcTI0M7O81OASTs0jYlL6fDJJwluMpNWAv5HMPFZl7kBjZmZ5WZEaX+60kalBETEo5/gzQGUz3f8xdyMd713ZhQcAQ9MVi6ocl5OhmZnlZUVqfGniG7SM47sv7ZikryW1jIhJ6XSala2T1gPYUdIAkgUUaqcTvyzr/qKToZmZ5acGxw8OAY4Crkj/faxigYg4rPx5zmxny0yE4HuGZmaWp7Ioq/IjT1cAe0gaT7LC0BUAkrpKunWZr1wO1wzNzCwvNbWEU0RMA3arZP8o4NhK9v8L+FdVzu1kaGZmeVkV5rh2MjQzs7wU85yjVeVkaGZmeXHN0MzMSl5N3TOsTk6GZmaWlwVlefcSzZyToZmZ5WVVWMLJydDMzPLiDjRmZlby3IHGzMxKnptJzcys5JW5A42ZmZW6lb9eCFoV2nqtOEk6PnedslLmz2IRfxaL+LMoHl61wqrT8csvUjL8WSziz2IRfxZFwsnQzMxKnpOhmZmVPCdDq06+F7KIP4tF/Fks4s+iSLgDjZmZlTzXDM3MrOQ5GZqZWclzMjQzs5LnGWisICS1AQ4BdgRaAXOAd4EngaciYuWfr8l+FkkCDgM2jIhLJK0PtIiI1zMOLROSVgO2Iuf3JCKmZBuVuQON5U3SP4HWwBPAKGAKsBawCbAL0AU4OyJGZBZkDZNUC3gmInbJOpasSRoIlAG7RsRmkhoBwyOiW8ah1ShJGwFnAbsD44GpLPo9+QG4GbjDXxyz4WRoeZO0RUS8u4zjtYH1I+KjGgwrc5KeBQ6IiJlZx5IlSW9GxNaSRkdE53Tf2xGxVdax1SRJ9wEDgReiwh9eSesChwLfRsQdWcRX6txManmrLBGm3/7Xi4h3ImIeUFKJMPU9MEbS08Ds8p0R8dvsQsrET2lNOQAkNSOpKZaUiOi/jGNTgGtrLhqryMnQCkbS88B+JD9XbwBTJL0cEadlGlh2Hk4fpe564BFgXUmXAQcB52UbUjYkdQciIkZK6gDsCbwfEUMzDq3kuZnUCqa8GUzSsSS1wgslvRMRW2YdW1Yk1SFpIv4g61iyJGlTYDdAwLMR8V7GIdU4SRcCe5F8WXwa2AZ4DtgDGBYRl2UYXslzMrSCkTQG6AXcAfwx/fZbsslQ0r7AX4HaEdFOUifgkojYL9vIapakxpXs/i4ifqrxYDKU/n50AtYEJgNtImJW+oXptVL9PSkWHmdohXQJMAz4KE2EG5L0mitVFwHdgRkAEfEWsGF24WTmTZKekx+yqBflZ5LelNQl08hq1vyIWBARPwAfR8QsgIiYQwneQy02ToZWMBHxQERsGRED0u1PIuLArOPK0E+V9CQtxT96TwN9IqJpRDQhaSp8AhgA3JhpZDVrnqS10+cLvwRIakBp/lwUFXegsYKR1A74DdCWnJ+tUmsWzDFW0qFALUntgd8CL2ccUxa2jYjjyjciYrikv0bECZLWzDKwGrZTRMwFqDCWcA3gqGxCsnJOhlZIjwK3AY/jb7qQfDH4IzAXuI+kCfnSTCPKxiRJZwGD0+1fAV+nwy1K5uekPBFWsv8b4JsaDscqcAcaKxhJr0XENlnHYcVFUlPgQmCHdNdLwMXATEpzMoaLIuKirOOwxTkZWsGkTYLtgeEktSEAIuLNzILKgKTHSQeYV6aEm41LWjon6S3AlIg4J+t4bHFuJrVC6ggcAezKouavSLdLyV/Tfw8AWgB3p9v9ga8ziShDkjYBTmfJe8ml9nPxBDDWibA4uWZoBSPpI6BDOv1ayZM0KiK6Lm/fqk7S28BNJLMSLSjfHxFvZBZUBiR9DfSLiFeyjsWW5JqhFdK7QEOSVSsM6kraMCI+gYW9betmHFMW5kfEwKyDKAK7AP+WdGxEvJZ1MLY4J0MrpIbA+5JGsvg9w1K9R3Ya8LykT0imIdsAOCHbkDLxuKQBJPOT5v5cTM8upJoXEeMk9SbpVbtT1vHY4txMagUjaefK9kfE/2o6lmKRjqPbNN18f2nd61dlkj6tZHdERCnOxoOkdSLiu6zjsMU5GZpVI0nbsWTHkTszC8jMKuVmUisYSdsCfwc2A2oDtYDZEVE/08AyIukuYCPgLRZ1HAmg5JKhpC2ADiQruwOl+6VAUleSyRg2IPkbLJKasifqzpCToRXSP4BDgAeArsCRwCaZRpStriS9a0u6+SVduqgnSTIcSjI36YuU4JeC1D3AGcAYSmgGnmLnibqtoNLZRGqls/P/k2Tx0lL1Lsk4w1J3EMlahpMj4tfAVkCDbEPK1NSIGBIRn0bE5+WPrIMqda4ZWiH9IKk28Jakq4BJlPYXrqbAOEmvU9q9a+dERJmk+ZLqkwy9WS/roDJ0oaRbgWdZ/Ofi4exCMidDK6QjSJLfKSTDCtYDSnkJp4uyDqBIjJLUkGQqsjeA74FSHnj+a5Iexmuw+ExNToYZcm9SK4h0BYI7I+KwrGMpJpI2ANpHxDPpWna1SrlbvaS2QP2IeCfrWLIi6YOI+EXWcdjiSrkJywooIhYAG6TNpAZIOg54ELg53dWaZJmrkiGpVrpqRbmJwLaS3ssqpiLwsqQOWQdhi3MzqRXSJ8BLkoYAs8t3RsTV2YWUqZOB7sBrABExXtK62YZUcyQdQvJFYLak8cBlwO3ASKCUWxC2Jbmv/inJPUMPrSgCToZWSB+nj9WAdTKOpRjMjYh5kgCQtDrLWNppFXQe0CUiPpK0Ncl9woMi4vGM48paKfewLlpOhlYwEXFx1jEUmf9JOheoI2kPYABQSolgXvnCvRHxpqTxToQAtCRZyuk7gLSH7WaAh1dkyB1orGCWsqjtTGAUcHNE/FjzUWUnXcz1GKBXumtYRNyaYUg1StIEILeJ/Pe526XafC5pNLB1+WQM6c/JqIjYOtvISps70FghfULSbf6W9DEL+I5kFppbMoyrRknqK+nkiCiLiFtIpt3qCpwr6aCMw6tJt5A0l5c/Km6XKuXOShQRZbiVLnOuGVrBSBoZEd0q2ydpbERsnlVsNUnSS8AhEfFluv0WsCtQD/hnROyWYXiWMUkPA88D5Ws8DgB2iYh+WcVkrhlaYdWTtH75Rvq8Xro5L5uQMlG7PBGmXoyI6RHxBaW5uK8t7kRgO+ArYAKwDXB8phGZq+ZWUH8AXpT0MUl38XbAAEl1gTsyjaxmNcrdiIhTcjab1XAsVmQiYgrJhPZWRNxMagVVYTHbD0qt0wyApHuA59P7hbn7TwB6RkT/bCKzLEk6D7gxIqYv5fiuwNoR8UTNRmbgZGgFIGmHiHhxGcfrA+tHxLs1GFZm0oH1j5IMqH4z3d0FWBPoFxFfZxRaJiQ1B/4MtIqIvdLZV3pExG0Zh1ajJPUFzgR+JPm5mEqyvmN7oBPwDPDniJiaVYylzMnQ8ibpGpL7Hv8hmYi5/Jd8Y2AXkt6Uf4iIkZkFmYH0m355p6GxEfHfLOPJiqSngH8Cf4yIrdLJB0ZHRMeMQ8uEpPbA9iTjDecA7wEjImJOpoGVOCdDKwhJjUlWqKj4S/7ksmqNturL6VE8OiI6p/veiohOGYeWKUlrR8QPWcdhCXegsYKIiOmSnqnkPlm7rGKyojFbUhPSCRkkbUsyGUNJktQDuI2kp/X6krYCToiIAdlGVto8tMIK6aFK9j1Y41FYsfk9MATYKB2DeSfwm2xDytS1QG9gGkBEvA3slGVA5pqhFYCkTUnujTWQdEDOofok9w6thKXzku4M/IJkyM0HEfFTxmFlKiK+LJ/APbUgq1gs4WRohfALYB+gIbBvzv7vgOOyCMiKR4UvSACbSJoJjEnH3JWaLyVtB4SkNYBTSe6vW4bcgcYKRlKPiHgl6zisuEh6EugBPJfu6knS67gdcElE3JVRaJlIFzu+DtidpKY8HDg1IqZlGliJc83QCumjdMmituT8bEXE/2UWkRWD1YHNysdXpuMO7yQZjjMCKKlkCNSJiMUWN5bUIqtgLOFkaIX0GPACyeBh3wOxcutVmGhgSrpvuqRSvHf4qaQHgP/LGVs4FPASThlyMrRCWjsizso6CCs6z0t6Angg3T6IZOHjusCMzKLKzhiSL40vSfplRJTP5WsZcjK0QnpCUp+IGJp1IFZUTgYOAHZIt++IiPIhN7tkE1KmIiJulPQ28Liks1hyUWyrYe5AY3mT9B3JL7NIliiaC/yUbkdE1M8wPCsyknYkWe/x5KxjyUKFmXhaAvcDXSJi7WwjK22uGVreIqKUVy23KpDUGegPHAx8CjycbUSZ6lP+JCImSdqFZH1Dy5CToRWMpMo6AMwEPo+I+TUdj2VL0iYkCbA/8A3wb5LWqFJsGkXS4RFxN9C/woD7ciNqOCTL4WRohXQjSY+4Mel2R+BdkplpToqI4ZlFZll4n6SjyD4R8RGApNOyDSlTddN/3ZJShHzP0ApG0sPA+RExNt3uAFxCsobbw6W+SkGpkdSPZEX37UmW9xoM3BoRnrzdio4n6rZC2qQ8EQJExDhg04j4JMOYLCMR8WhEHAJsSjL7zO+AdSUNlNQr0+AyIOm4dC1DlLhd0kxJ76T3VC1DToZWSGPTP3Q7p48bgXGS1iTpXWolKCJmR8S9EbEv0AYYDZTieNRTgc/S5/2BrYANSVb1uD6jmCzlZlIrGEl1gAEsGk/2Esl9xB9JBuR/n1VsZlnLXdBY0r3AaxFxXbr9ZkR4BpoMORmamdUASW8CewPfAp8Du+bcX38vIjbLMr5S596kljdJ90fEwZLGUMlMGhGxZQZhmRWbC4BRQC1gSE4i3BnwffWMuWZoeZPUMh08vEFlxyPi85qOyawYSVodWCcivs3ZV5fkb7FvI2TIydAKKk2I7SPimfQe4uoR8V3WcZmZLYt7k1rBSDoOeBC4Od3VBng0s4DMzKrIydAK6WSSAdazACJiPLBuphGZmVWBO9BYIc2NiHnl8y6m90fcDm/GUufuXSgi3qypWGxJToZWSP+TdC5QR9IeJGMOH884JrNi8bf037WArsDbJMucbUnSy7RHRnEZ7kBjBSRpNeAYoBfJL/kwkrko/UNmlkrn8L0wIsak21sAF0XEQdlGVtqcDC1vkqYBr5HMOPMyycwaP2QblVlxkjQ2IjZf3j6rWU6GljdJ9YFtSRYo3Q7oQrKA60vASxFxf4bhmRUVSfcBs4G7012HAfUion92UZmToRVcOoj41ySrFLSLiFrZRmRWPCStBZwE7JTuGgEMjIgfs4vKnAwtb5JasahW2C3d/QbwKvCKZ6Axs2LnZGh5k1QGvAlcAzwQEfMyDsmsaKVrGl4OdCDpWQpARGyYWVDmQfdWENsD9wL7A69IekjS6ZK2T9cyNLNF/gkMBOYDuwB3suj+oWXENUMrOEltgX1JFjNtExFrLfsVZqVD0hsR0UXSmIjomLsv69hKmQfdW0FI2pRF9w23BxqS3DO8KcOwzIrR3HRM7nhJpwBfAfUyjqnkuWZoeZP0DTAReIV0rGFEfJRtVGbFSVI34D2SL4yXAvWBv0TEq1nGVeqcDC1vkhpExMys4zBbmUha25NTFA93oLG8ORGaVZ2kHpLGAe+n21tJujHjsEqek6GZWc26FugNTAOIiLdZNADfMuJkaGZWwyLiywq7FmQSiC3k3qRWMOmYwgOBtuT8bEXEJVnFZFaEvpS0HRCS1iAZgvRexjGVPCdDK6THgJkkU7HNzTgWs2J1InAd0JpkWMVw4ORMIzL3JrXCkfRuRGyRdRxmZivKNUMrpJcldSxftNTMFpH0d2CptY+I+G0NhmMVOBlaIe0AHC3pU5JmUgEREVtmG5ZZURiV8/xi4MKsArEluZnUCkbSBpXt9xJOZouTNDoiOmcdhy3imqHlTVL9iJgFfJd1LGYrCddCioyToRXCvcA+JL1Ig6R5tFwAXqfNzIqam0nNzGqApO9YVCNcGyifl7T83nr9TAIzwMnQCkxSI6A9i6/gPSK7iMzMls/NpFYwko4lXdAXeAvYlmRZp10zDMvMbLk8N6kV0qlAN+DziNgF6AzMyDQiM7MqcDK0QvoxIn6EZJ7SiHgf+EXGMZmZLZebSa2QJkhqCDwKPC3pW8BjDM2s6LkDjVULSTsDDYD/RMS8rOMxM1sWJ0MrCEm1gLERsWnWsZiZrSjfM7SCiIgFwAeS1s86FjOzFeV7hlZIjYCxkl4HZpfvjIj9sgvJzGz5nAytkM7POgAzs5/D9wytWkhqCkwL/4CZ2UrA9wwtb5K2lfS8pIcldZb0LvAu8LWkPbOOz8xseVwztLxJGgWcSzKUYhCwV0S8KmlT4D6v22Zmxc41QyuE1SNieEQ8AEyOiFcB0hlozMyKnpOhFUJZzvM5FY656cHMip6bSS1vkhaQDKUQUIfF12lbKyLWyCo2M7OqcDI0M7OS52ZSMzMreU6GZmZW8pwMzcys5DkZmplZyXMyNDOzkudkaGZmJe//ATO6YwwxZTR4AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# ...& the rest, after being evaluated which variables have some promise regarding correlation, being used as combined independants\n", + "# to do this, we'll create a 'correlation matrix', which determines the correlations between each&every variable\n", + "# 'heatmaps' are the visual equivalent of a given correlation matrix\n", + "# then, we'll select the ones which predict the weight of one's brain (and have a high correlation to it)\n", + "# this process is called 'feature selection'!\n", + "numeric_col = [\"Brain Weight(grams)\", \"Gender\", \"Age Range\", \"Head Size(cm^3)\"] # only interested in the latter 3's relation to the brain weight\n", + "corr_matrix = data.loc[:,numeric_col].corr() # correlation matrix formation\n", + "print(corr_matrix) # print out pure text values\n", + "sns.heatmap(corr_matrix, annot=True) # use heatmap to visualise the correlation matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[4512]\n", + " [3738]\n", + " [4261]\n", + " [3777]\n", + " [4177]\n", + " [3585]\n", + " [3785]\n", + " [3559]\n", + " [3613]\n", + " [3982]\n", + " [3443]\n", + " [3993]\n", + " [3640]\n", + " [4208]\n", + " [3832]\n", + " [3876]\n", + " [3497]\n", + " [3466]\n", + " [3095]\n", + " [4424]\n", + " [3878]\n", + " [4046]\n", + " [3804]\n", + " [3710]\n", + " [4747]\n", + " [4423]\n", + " [4036]\n", + " [4022]\n", + " [3454]\n", + " [4175]\n", + " [3787]\n", + " [3796]\n", + " [4103]\n", + " [4161]\n", + " [4158]\n", + " [3814]\n", + " [3527]\n", + " [3748]\n", + " [3334]\n", + " [3492]\n", + " [3962]\n", + " [3505]\n", + " [4315]\n", + " [3804]\n", + " [3863]\n", + " [4034]\n", + " [4308]\n", + " [3165]\n", + " [3641]\n", + " [3644]\n", + " [3891]\n", + " [3793]\n", + " [4270]\n", + " [4063]\n", + " [4012]\n", + " [3458]\n", + " [3890]\n", + " [4166]\n", + " [3935]\n", + " [3669]\n", + " [3866]\n", + " [3393]\n", + " [4442]\n", + " [4253]\n", + " [3727]\n", + " [3329]\n", + " [3415]\n", + " [3372]\n", + " [4430]\n", + " [4381]\n", + " [4008]\n", + " [3858]\n", + " [4121]\n", + " [4057]\n", + " [3824]\n", + " [3394]\n", + " [3558]\n", + " [3362]\n", + " [3930]\n", + " [3835]\n", + " [3830]\n", + " [3856]\n", + " [3249]\n", + " [3577]\n", + " [3933]\n", + " [3850]\n", + " [3309]\n", + " [3406]\n", + " [3506]\n", + " [3907]\n", + " [4160]\n", + " [3318]\n", + " [3662]\n", + " [3899]\n", + " [3700]\n", + " [3779]\n", + " [3473]\n", + " [3490]\n", + " [3654]\n", + " [3478]\n", + " [3495]\n", + " [3834]\n", + " [3876]\n", + " [3661]\n", + " [3618]\n", + " [3648]\n", + " [4032]\n", + " [3399]\n", + " [3916]\n", + " [4430]\n", + " [3695]\n", + " [3524]\n", + " [3571]\n", + " [3594]\n", + " [3383]\n", + " [3499]\n", + " [3589]\n", + " [3900]\n", + " [4114]\n", + " [3937]\n", + " [3399]\n", + " [4200]\n", + " [4488]\n", + " [3614]\n", + " [4051]\n", + " [3782]\n", + " [3391]\n", + " [3124]\n", + " [4053]\n", + " [3582]\n", + " [3666]\n", + " [3532]\n", + " [4046]\n", + " [3667]\n", + " [2857]\n", + " [3436]\n", + " [3791]\n", + " [3302]\n", + " [3104]\n", + " [3171]\n", + " [3572]\n", + " [3530]\n", + " [3175]\n", + " [3438]\n", + " [3903]\n", + " [3899]\n", + " [3401]\n", + " [3267]\n", + " [3451]\n", + " [3090]\n", + " [3413]\n", + " [3323]\n", + " [3680]\n", + " [3439]\n", + " [3853]\n", + " [3156]\n", + " [3279]\n", + " [3707]\n", + " [4006]\n", + " [3269]\n", + " [3071]\n", + " [3779]\n", + " [3548]\n", + " [3292]\n", + " [3497]\n", + " [3082]\n", + " [3248]\n", + " [3358]\n", + " [3803]\n", + " [3566]\n", + " [3145]\n", + " [3503]\n", + " [3571]\n", + " [3724]\n", + " [3615]\n", + " [3203]\n", + " [3609]\n", + " [3561]\n", + " [3979]\n", + " [3533]\n", + " [3689]\n", + " [3158]\n", + " [4005]\n", + " [3181]\n", + " [3479]\n", + " [3642]\n", + " [3632]\n", + " [3069]\n", + " [3394]\n", + " [3703]\n", + " [3165]\n", + " [3354]\n", + " [3000]\n", + " [3687]\n", + " [3556]\n", + " [2773]\n", + " [3058]\n", + " [3344]\n", + " [3493]\n", + " [3297]\n", + " [3360]\n", + " [3228]\n", + " [3277]\n", + " [3851]\n", + " [3067]\n", + " [3692]\n", + " [3402]\n", + " [3995]\n", + " [3318]\n", + " [2720]\n", + " [2937]\n", + " [3580]\n", + " [2939]\n", + " [2989]\n", + " [3586]\n", + " [3156]\n", + " [3246]\n", + " [3170]\n", + " [3268]\n", + " [3389]\n", + " [3381]\n", + " [2864]\n", + " [3740]\n", + " [3479]\n", + " [3647]\n", + " [3716]\n", + " [3284]\n", + " [4204]\n", + " [3735]\n", + " [3218]\n", + " [3685]\n", + " [3704]\n", + " [3214]\n", + " [3394]\n", + " [3233]\n", + " [3352]\n", + " [3391]]\n" + ] + } + ], + "source": [ + "# relevant variable(s) shown as: [\"Head Size(cm^2)\"]\n", + "x = data.iloc[:, 2:3].values\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABO00lEQVR4nO29eZhcV3Xo+1s1d3f1oB7Uas2TJSQLjI2MrGAcxxgsDLnOzQ0BEoKTcJ9DAs4NF3++EPIwCSGXEIIDJpMJvmY2kBCuXwAbmclAPEmekCVL1jy1Wj2oq8eqrmG9P86pUnWrpu6u6q7qXr/vq09V++xzzqrTpb32XmuvtURVMQzDMIxCeOZbAMMwDKP6MWVhGIZhFMWUhWEYhlEUUxaGYRhGUUxZGIZhGEUxZWEYhmEUxZSFUVZE5LiI3DgH91krIioivmme96ci8i+VkquE+98vIn9ZxuvN9/f5bRH5fol9f1dEflZpmYzKMK3/aIZR66jqX823DOVkvr+Pqn4F+Eo5riUiPwa+rKrzpvyM/NjKwjCqlOmumgyjkpiyMCrBK0XkeRGJiMjXRSSUPiAibxaRZ0VkUET+U0RekXXsAyJyRESGRWS/iPzXrGNeEfmkiPSJyFHgTYUEEJH/JSJn3GsdFJHXue0fEZEvu+8/KyIjWa+EiHzEPbZcRP5NRHpF5JiI/HGe+wTc73N7lpw/F5EPFxCvXUR2u7L9RETWZF1PReQ9IvIS8JLb9mkROSUiQyKyV0Rem9U/+/ukTXO3ishJ91l9KI/c69y/gcf9/DkROZ91/Esi8ifu+2YR+byIdLvP9C9FxOsem2RaEpE3uM87IiL/4H6//z7l3p8UkQvuc32j2/Yx4LVA+m/y2QLPz5gPVNVe9irbCzgOPAksB1qBA8C73WNXAueBHYAXuNXtH3SPv8U9zwO8FRgFutxj7wZeBFa51/0RoIAvhwybgVPAcvfzWmCD+/4jOKaOqee8Euh1ZfQAe4EPAwFgPXAUuCnPd94GXAC2AB8CHge8efreDwwD1wFB4NPAz7KOK7Db/Y51bts7gDYcs/H7gXNAaOr3cb+nAp8D6oArgBiwJY8sJ4FXue8Put9xS9axK933/w78M9AALHX/vn/gHvvdtPxAOzAE/Lor6/8A4sB/z+obB/4f9+//h8BZQNzjP073tVf1vWxlYVSCz6jqWVUdAP4/nIEY4Dbgn1X1CVVNquoXcAazawBU9ZvueSlV/TrOzPrV7rm/Cfydqp5yr/u/C9w/iTMQbxURv6oeV9Uj+TqLSAfwbeB2VX0GuBroUNW/UNUJVT2KMwC/Ldf5qroP+Ev3GncAv6OqyQLyfUdVH1XVGI5y2Skiq7KO/29VHVDVcff6X1bVflVNqOrfut9tc4Hr/7mqjqvqc8BzOEojFz8BfllElrmf/9X9vA5oAp4TkU7gZuBPVHVUVc8Dd+d5FjcDL6jqt1Q1AXwGR7Flc0JVP+c+ny8AXUBnge9iVAmmLIxKkD1AjAFh9/0a4P2u+WNQRAZxVgrLAUTknVkmqkGcGXu7e+5ynNVCmhP5bq6qh4E/wZl1nxeRB0Rkea6+IuLHGSS/qqoPZMm5fIqcf0rhQe0L7nnfVdWXCvQj+3uo6ggwgPsMph53ZbxDRA64pp1BoJmLzyUX+Z7/VH4CXI+zynkUZ2b/y+7rp6qacr+TH+jOehb/jLPCmMqkv5GqKnA6n2yqOua+zSefUUWYsjDmklPAx1S1JetVr6pfc+32nwPeC7SpaguwDxD33G4cxZJmdaEbqepXVfVanMFOgb/O0/UeHNPJn02R89gUORtV9eYCt/wH4D+Am0Tk2kKyZX8PEQnjmJzOZoufdfy1wJ04K6sl7nOJcPG5zIaf4PgJrnff/wx4DY6y+Inb5xTO6q8961k0qerlOa7XDazMkl2yP5eApcCuYkxZGHPJ54B3i8gOcWgQkTeJSCOOPVxx/AaIyO/hrCzSfAP4YxFZKSJLgA/ku4mIbBaRG0QkCESBcSCVo98f4AyMv+3OotM8CQy7TvI612m9TUSuznO/3wFehWOT/2PgC64SyMfNInKtiASAjwKPq+qpPH0bgQTOc/G5jvOmAtcuGXcFNI7jE/mJqg4BPcB/w1UWqtoNfB/4WxFpEhGPiGwQkV/OccnvAC8XkV8TZyfXe4BlOfrlowfHP2RUIaYsjDlDVffgODc/i+MQPowzwKKq+4G/BR7DGTReDvw86/TPAQ/j2OCfBr5V4FZB4ONAH47ZYynwwRz93o4zOJ2Vizui/tS1p78Zx9dyzL3Ov+CYfyYhIquBvwPeqaojqvpVYA+OXT8fXwXuwjE/vQpnsM7Hw8BDwCEc01uUKWaqWfIToD9LWf0EZ9XydFafd+I4+vfj/N3+FcfXMAlV7cPZpPAJoB/YivMsYiXK8mngN9ydUp+Z/lcxKkl6F4JhGEZZcbflnsZZuf1ovuUxZoetLAzDKBsicpOItLgmwD/FWaU8Ps9iGWXAlIVhGOVkJ3AEx3T3q8CvpbcAG7WNmaEMwzCMotjKwjAMwyjKgkxU1t7ermvXrp1vMQzDMGqKvXv39qlqR65jC1JZrF27lj179sy3GIZhGDWFiOTNjGBmKMMwDKMopiwMwzCMopiyMAzDMIpiysIwDMMoiikLwzAMoygLcjeUYRjGYuNAd4SH9vVwZnCcFS117NrWyZauS3JfzhhbWRiGYdQ4B7oj3PvoMSLjcbqaQ0TG49z76DEOdEfKdg9TFoZhGDXOQ/t6aK7z01znxyOSef/Qvp6y3cOUhWEYRo1zZnCcxtBkr0JjyMeZwfLlcDRlYRiGUeOsaKljOJqY1DYcTbCipa5s9zAHt2EYBpV3EFeSXds6uffRY4CzohiOJoiMx3nr1dMpgV4YW1kYhrHomQsHcSXZ0tXMbdeto7nOT3ckSnOdn9uuW1dWZWcrC8MwFj3ZDmIg8+9D+3pqZnWxpau5orLaysIwjEXPXDiIax1TFoZhLHrmwkFc61RMWYjIfSJyXkT2TWm/XUReFJEXROQTWe0fFJHDInJQRG7Kat/lth0WkQ9USl7DMBYvu7Z1EhmPExmPk1LNvN+1rXO+RasaKrmyuB/Yld0gIr8C3AJcoaqXA59027cCbwMud8/5BxHxiogX+HvgjcBW4O1uX8MwjLIxFw7iWqdiDm5VfVRE1k5p/kPg46oac/ucd9tvAR5w24+JyGHg1e6xw6p6FEBEHnD77q+U3IZhLE4q7SCudeZ6N9Qm4LUi8jEgCtyhqk8BK4DHs/qddtsATk1p35HrwiJyG3AbwOrVq8sstmEY5aKW4xkWM3OtLHxAK3ANcDXwDRFZX44Lq+q9wL0A27dv13Jc0zCqnVobeNPxDM11/knxDGbyqX7mejfUaeBb6vAkkALagTPAqqx+K922fO2GseipxUCyuUh4Z1SGuVYW3wZ+BUBENgEBoA94EHibiARFZB1wGfAk8BRwmYisE5EAjhP8wTmW2TCqkloceC2eoXapmBlKRL4GXA+0i8hp4C7gPuA+dzvtBHCrqirwgoh8A8dxnQDeo6pJ9zrvBR4GvMB9qvpCpWQ2jFrizOA4Xc2hSW3VPvCuaKkjMh7PREiDxTPUCpXcDfX2PIfekaf/x4CP5Wj/LvDdMopmGAuCWhx45yLhnVEZLDeUYVQZpTqta3HgTcczZH+/t1690pzbNYA4VqCFxfbt23XPnj3zLYZhTJvs3ULZCiDfbqFa2w1lVDcisldVt+c6ZisLw6gippv91ALJjLnClIVhVBG16LSeDrYSql0s66xhVBELOftpLcaFGBcxZWEYVcRCzn5ai3EhxkVMWRhGFbGQs59aQF5tYz4Lw6gyFqrTuhbjQoyL2MrCMIw5YSGb2BYDpiwMw5gTFrKJbTFgZijDMOaMhWpiWwzYysIwDMMoiq0sDMMwZshiCjI0ZWEYRs0zH4P2Yqv6Z8rCMIx5ZzaD/XwN2tPN41XrmLIwjCpgMZkzpjLbwX6+Bu2FnsdrKubgNox5ZrHnTJptGpD5igxfyHm8cmHKwjDmmcWeM2m2g/18DdqLLcjQlIVhzDOLPWfSbAf7cg3aB7oj3L37EHd88znu3n2o6MpusQUZms/CMOaZxZ4zqRzlYev9Hp48NoCiXLmqZdqD9neeP8M9PzxCPJmirSFAPJHk3kfHil5nMQUZVmxlISL3ich5EdmX1fYRETkjIs+6r5uzjn1QRA6LyEERuSmrfZfbdlhEPlApeQ1jvlhs5oypzGaGnvb3+H1eXrdlKTvWtTEeT03r/ge6I9zzgyMAtDUEiCVSHOwZIZVKLRpTYClUcmVxP/BZ4ItT2u9W1U9mN4jIVuBtwOXAcuAREdnkHv574PXAaeApEXlQVfdXUG7DmFPSg2X2bqi3Xr1y0cxYYeYz9HLshHpoXw+JlNLa4EdECPm9AHRHovh93mnLtFCpmLJQ1UdFZG2J3W8BHlDVGHBMRA4Dr3aPHVbVowAi8oDb15SFsaBYTOaMclKO7atnBsdpbfATS6QyiiLo89A/OsHODe1llbeWmQ+fxXtF5J3AHuD9qnoBWAE8ntXntNsGcGpK+45cFxWR24DbAFavXl1umQ1jUVLt8R/l8PesaKljIp7k0PkRwFEUQ9EEfq9n0ZgCS2Gud0P9I7ABeCXQDfxtuS6sqveq6nZV3d7R0VGuyxrGoqUW4j/K4e/Zta0Tr9fDpqVhgj4PA6NxULj9hg1VpRjnmzldWahqxlskIp8D/sP9eAZYldV1pdtGgXbDMCrEge4Idz24n4GRCVrDATZ2NNDR6Jh75jqdRaHVTTn8PdnXCPi97NxQfSuoamBOlYWIdKlqt/vxvwLpnVIPAl8VkU/hOLgvA54EBLhMRNbhKIm3Ab81lzIbxmIjvaLoH4nRWu8nFk/y9MlBrlrdQls4OKfxH6WkAimHv8d8RsWpmLIQka8B1wPtInIauAu4XkReCShwHPgDAFV9QUS+geO4TgDvUdWke533Ag8DXuA+VX2hUjIbhnFxh1F7OEg0nsw4fQ/3jhLweec0/mOxJeurZiq5G+rtOZo/X6D/x4CP5Wj/LvDdMopmGEYB0juMNi5tYO+JQQACXmFgZGLawXLlkiWbxRTdXk1YBLdhzAHVvqsom/QOo/ZwiFetaeHw+VH6RmK0hYNzns5isUe3VxOWG8owKkwt7CpKc6A7Qt9wlB++eJ4fHzxPMqVs6WriFStb+PP/snXOFdxij26vJkxZGEaFqZWsstmpM16zsRWA/zwywEQiOW8J8hZbsr5qxsxQxqJlrkxDtWJ3n+xM9rN080UT0HwOzrZTqTqwlYWxKJlL01CtFMlZ7KnSjcKYsjAWJXNpGqoVu3utKDVjfjBlYSxK5nIWXSt290JKbbqFgYyFh/ksjEXJXG/JrAW7e77UGUDRKGpj4WPKwliUlKM620Ikl1K7e/chi6I2TFkYi5OFXHCo3Lu8amU311xQS8GV5caUhbFoqQXT0HQpJfHedLEoaodKPNtawpSFYVQRs525ViLxXtpkd2E0RnckSv/oBH6vh9tv2DCt69T6rHyxJzU0ZWEYVUKhmSuQGWiDXkGBiaReMuhWwmS0pauZG7d0cM8PjpBIKW0NAZY1hXjkQC/rO8IlDZQLYVa+2M1xpiwMo0rIN3P90mMnGI+naK7z4/fCY0cHEODqdUsuGXTLbTJKrwZ27+8h6Pdw1fKmTBGkyHi85Fn1QpiVL3ZznMVZGEaVkC/245lTg5mB9mjvGI0hH+GQj6N9Y5cEE5YzAPBAd4RPPnyIHx88z+kLo3RHxnnsSD+9w9GMbKXOqhdCdHitBFdWClMWhlEl5IugFiQz0A5F4wR9HoI+DyNu3+xBt5wBgF9+7ATH+kYBCAf9JFNwYSzOL85EMrKVOqvO/m59I1EeP9rPd3/RzcmBsZoJ8KuV4MpKYWYow6gS8sV+XLmqmeFoguY6P00hP9F4EgCvBx4/2p+pNXGgO5LZ4VWOAeyZUxHCQS8hv5fWhgDdkSgege5INDOrLjUuJdtJ/uK5YUQEv8fDssbgnPouZutkX4g76ErFVhaGUSXkm7m+Y+eazODcWu/jRP8YL50f5kT/KD2R8UmDbjln6Yoi7vuGoI+u5hA+rxBPpqY9q05/t+6hGMkUNNX5uWpNC+s6wnOWrr2W6opUI7ayMIwZUKltoPlmrrddt44vP3aCF7pH6GwK0jcSJZ6E4ViSLV1NrOsIT8vhXApXrmrhiaMDIELQ58HrERqDfm7c0sr7Xr9p2tfb0tXM6tZ6dqxrxSOSaZ8r38VCcLLPJ7ayMIxpMh8z1C1dzbQ3hnjlymaa6wMkUhAO+ggHvTx/JsL3959j/9kI+8sow+/sXMPqtnqAjL9hdVs9v7NzzYyud6A7wsmBMb73i3M8dvSio3yudhQtBCf7fFIxZSEi94nIeRHZl+PY+0VERaTd/Swi8hkROSwiz4vIVVl9bxWRl9zXrZWS1zBKZb4q371wNsKhnhFi8SQNAS9jEwm6I1GGxuM0Bn0MRROc6h8vm9La0tXMnbs2c/3mpVy+opnrNy/lzl2bZzQLTyvYrqYgXg8Mjcd5+sQgx3pH5mxHkaVgnx2VNEPdD3wW+GJ2o4isAt4AnMxqfiNwmfvaAfwjsENEWoG7gO2AAntF5EFVvVBBuQ2jIJUKzipm2hqKJkAg5PfSFg5y4fwI4PzHiCVSAGzqDM/arFIJE1u2gg2HfBw+P0rfSIxzw7E5q+1dLHlkrUeYV5qKrSxU9VFgIMehu4E7cX7jaW4BvqgOjwMtItIF3ATsVtUBV0HsBnZVSmbDKIVKzFBLMW011/lQVaLxJHV+Lz6v4HFN/0G/l6tWt7CmvWFWSqtSJrZsE1B7OMQ169u4+eVdrG6tn7MBudDWV3N+F2dOHdwicgtwRlWfkywHF7ACOJX1+bTblq/dMOaNSqQ3L8X5urWrmXq/l3NDMXqHo6iC1+Ohqc7Pxo4GOhqdQW42SquYHDOdfVdL9HO+DQTm/C5O0ZWFiLyllLYSrlMP/Cnw4emeW+L1bxORPSKyp7e3txK3MAygMsFZpThfd23rxOPxsKwpiN/roaMxiFcg4JGy2f8LyTGb2Xe1Rz+b87s4pZihPlhiWzE2AOuA50TkOLASeFpElgFngFVZfVe6bfnaL0FV71XV7aq6vaOjYwbiGUbpbOlq5n2v38Qn33IF73v9plnPPksxbU2NVehoDHHtZe10NtcRT6U4NxybtdIqJMdsHPvVHv1szu/i5DVDicgbgZuBFSLymaxDTUAi91n5UdVfAEuzrn8c2K6qfSLyIPBeEXkAx8EdUdVuEXkY+CsRWeKe9gZmpqgMo6op1bSVK1bhMiClSnfE2Yp69+5D0zYTpc1LL5yNcPrCOJs7w6xua5gkx+d/drwkx34+U1U+E1A1OJatcmJxCq0szgJ7gCiwN+v1II7juSAi8jXgMWCziJwWkXcV6P5d4ChwGPgc8EcAqjoAfBR4yn39hdtmGAuK6cy8882CA16ZkZko27y0pauJTUvDHDw3wovnhibJUcrse7qmqmpxLFf7yqcaEFUt3EHEr6rxOZKnLGzfvl337Nkz32IYVUg1zGKLUUzG7zx/JlNborXBz7KmEF6vh3q/B7/PO8mJnHYq54u4PtAd4a4H99M/EqM9HGTj0gbaw6Gc52XXpMiefWcPqnfvPnSJI7uQDNPtb1QWEdmrqttzHSvFZ/FqEdktIodE5KiIHBORo2WW0TAqTrXMYgtRTMYD3REeOdDL5mVhWhv89I9OcOj8CDdu6SCW1Gk5adP3GhiZoLXeSVC498QgfSPRnOeVMvsu5iC/e/ch7vjmc9y9+xAHuiPmWK4hStk6+3ngfTgmqGRlxTGMylEL2yOLyZh9fG17GHBm4od6Rqe9PTV9rdZwgFg8ScjvBeDw+VG2dHlznlcs62o+GdImsqmV8ur9nkxG3VJkNuaPUlYWEVX9nqqeV9X+9KvikhlGmamFWWwxGQsdn+721PS1NnY0EEukiMaTBLxC30hs2tta06uGF85GePxoP8f7RibJIJBzJ5VCVW+pNS6SV1mIyFVujqYficjfiMjOdFt27ibDqBVqYXtkMRkLHc82E714boj93UMMR51MtLlMbelrdTSGuGp1C0G/lwtjCdrCwWk5d7NNZ+3hAKmU8uODvfzH82eZSCS57bp1eU1kE0k1x3KNUMgM9bdTPmc7PRS4ofziGEb5mOoo3tTZwCMHnIDNat0eWWwLZ67jpwbGmGgKcsc3n8t8z5MDY6xoqacx5LukTneue7WFgwR83ksc1qWQNmdNJJI8eypCXcDLmrZ6RITxuJOzqpCJbDEXFKoliu6GqkVsN5SRb+fOjVs6ONQzOq3dULmUTr5rlGO3VbFrZB8PeoWzkShr2hoy3/PxI/1sXhbO+DQg/w6jcsh7xzefo6s5xBPHBjK+D1VlOJZgx7o2muv8GcVUaCeVMf8U2g1V1MEtIv8zR3ME2Kuqz85SNsOoCPkcxYd6Rqe1JfNAd4RPPHSQgdEJJhIpnj81yANPTnDN+jZWtzVMmrUDmb5D0Tg/PZTkgadO8rqXLc3UgHhoXw/7uyNExhM0hXxcvrw5Y5/Pd2zqYJo9E79796FJ22Wb6/wkUk6AXrayyPZ7lHv7cHrVMBJNEA46TvJYIkVTyJ+5b9pEln3ft1690hRFDVHKbqjt7uv/cz+/GXgeeLeIfFNVP1Ep4QxjppQrjfiXHjvByf4xwiEfjSEfx/tHicZTHOkdZW17eNJupd7hKCf7x/B5hchYHI8IiZTy1PEBt361EA56Odk/hogwNBan3u/lEw8N5j1276NjBWffU79n73CUsYkEPUPjiEgmbiJt8slecWXvSprNDD+9avB7hVg8CSLEEim2rWia5G8xc1NtU8puqJXAVar6flV9P/AqnLQd1wG/W0HZDGPGlMuZ/cypQcJBLyG/FxEhpRDyezg3FM30SSuhdN+RWAK/10PQ7yHgEy6MxRkYnXDqNwzFCPmdlUDQ7+HccKzgsWK5l7K/Z+9wlKdPDhLwSsb/sOf4BY73XUwwWInCTelVw+XLm7gw7shy5epm/F6v7WxaQJSyslgKxLI+x4FOVR0XkViec4wqpBail6fDge4IX3rsBM+cGkQQrlzVzDt2rmFLV3PZcv0IMqnwSsDnITqRxOO5mGI/rYT2nx3KFCIKeN15mHvyRCKFokwkUzQGnf92QZ+HkWgCVc17LN9qKP233N8d4VT/OJs6wxkFFgr42Lq8ib7ROAMjE3QPXSwwVGp+p+mypauZv/r1V0z6jS1t9JupaQFRirL4CvCEiPxf9/OvAl8VkQZgf8UkM8pKJcwP80nal3Cyf4xw0IsCjx0d4NxQjDtu2lTQRj4dpXnlqmYeOzqAiBD0eQgHfAyNxVmxpI6U6iQl1Dcc5bGjA3hFSKRSCMJEUlm5pA6vq1xCfi9R1wkcS6QIh3yO6SbPsVyroey/5cuWNVHv93Kwx1k9rFpSx2WdYdrDoUkJBtPfr9J1JSplaio0MTDmhqLKQlU/KiLfA17jNr1bVdNbjX67YpIZZaUWopenw0P7ehgYnSAc8mUij0WcgLL0d8o1cE1Xab5j5xrODcXoG4kxFI1TF/CydXkTG5eG6Y5EJymhdN/TA2OcjYwT9Hlpqfezvr2B4Vgi45d48dywUwZVYU1rfcFjuVZDU/+Wa9vDLGkIsr97iGWNQV46P8ozJyOEQz6WNQZZ13HR0V2L2VVLmRgYladQivImVR1y62AfdV/pY62W/bW2qFTd6PnizOA4E4nUpECvoM/DUDRe8DtNV2lu6Wrmjps2lbQSye6bvatpXUd40o6nsXhyWsem3ivf31JUeebUIA1BH41BL0PjcboHx7l8ReOktOVTtw9Xu6molIlBOVlo5tpyUWhl8VWcnU97cSyvMuXf9RWXzigb1VLWslysaKnjpR5nFp4eQGKJFEFf7pxGaWaiNKdjWinWd6bHssn3t1QRrlrdwrkhZxXUXOdnWVOQf3+6m2s2tGVWUo8c6K0p8+NMJwYzYaGZa8tJXmWhqm92/103d+IYlaIWzQ9TmRqM5vcKF0bjoIoCI7Ek69obCu6+WQhKM9/fsrnOx+q2hknxFY8d6SOR0rLX1J5LZjoxmAkLzVxbTkoJyhMc38Q613+xGlimqk9WXDqjbNR6UNTUGZ+TqTRARzjI8YExBGHn+tacTs/sATHgFXqGYtBan1NpTnfwnHptAWJJndHAW+q98/0tH9rXc4ki7B+doK0hMOn8qTW1q30WvWtbJ8+fHuRk/9i0JgYzYaGZa8tJKcWP/hFIATeo6ha3xOn3VfXquRBwJli6j4XHTIvk5Er7caJ/lOXNoUmDOjgBeD873M+Sej9blzcS9PkKpqTIvnY0nuCpYxdQYMf6JUXPLUXO7PNLUSS5rvH40X42LQ1PcnJnP8daKTw0V7uhFnsxplml+wB2qOpVIvIMgKpeEJFAsZMMo5zMdMaXy6ywpq1h0n/+9CB7tHeEJXXOf4lnTkZ41ZqWTMBartxM399/joDXw7YVTRztc6K8AY72jnHN+rbM/UsZ0AqZP4CSVgC5Vhy337CBRw70EhmPX7KSmm7MxXyarNJxHJVmIZhrK0UpyiIuIl7c8CIR6cBZaRjGnLGipY7jfSMZ521TyHHerm0PFxzESlEy6YE6nlTCQSdSG5wiQK9e1zqpb/bsHQVV5ecv9RGJJvAIBH1eBHgciIxP4BFPSYNqITlLsaNPfQbvunbtpHt+4bGT9AxF6WwKcevO1Zma2qX6b2rFZDVbat1cW0lKURafAf4dWCoiHwN+A/izikplGFPY1NnAt54+ndkWGhmPc3ZwnK3LGwsOYqUMiGcGx/F7ncG9O5KkPuBlSb2foWjqkr7ZA3djnZ8LozEGowniiSQNQR9jEwlGognG40kUCHg9fOKhg9y5a/OMKsytaKkrqvAKDeQAjxzoZWtXEzvWtTIcTfDIgV7Wu9tyS51FP7Svh1QqxYHuoUnKeiE6fi2HVW6K5oZS1a8AdwL/G+gGfk1Vv1nsPBG5T0TOi8i+rLaPisjzIvKsiHxfRJa77SIinxGRw+7xq7LOuVVEXnJft87kSxq1z6GeUa5c1UJTnZ/RiRRNdX6uXNXCD17sK5jrqJTqcUGv8MTRC9QHvHhEiMVTnBoYJ5XUS/pmV6rb2NHAwGgcn4DX40Rrj8QSiMBYPJGpDneyf4wvPXai4PcrJGexPFeF8j0VOparpvaNWzp4aF/PpDrZAPu7I7x4bphoPElj0Ec0nuTFc8Psr6L65UZlKWU31EeBR4H7VXV0Gte+H/gs8MWstr9R1f/Xve4fAx8G3g28EbjMfe0A/hHY4QYE3oWT9VaBvSLyoKpemIYcxgLgzOA4a9obJjlqU6o8dybCjnWtk/pmz7pLMSukg4fqAz5SqRRnB2PEkimiA6Os72iYdO0VLXUc6x3h3HCMkWiCZEoRlIDPy7KmEC+ddwZ1QVjuDua9w1G+84tuOhpDeWthFJOz0Aqg2MrD54H93UNOCvGQj/Xt9YzEEpnnk23KyrdCiYwnEJHM1tV0SpLI+GQlZixcSjFDHQXeDnxGRIaBnwKPqur/LXSSqj4qImuntA1lfWwgk2aNW4AvqrM163ERaRGRLuB6YHc6WlxEdgO7gK+VILexgMhnpulsSm+jzW9mKmZWmEgql3U2sPfEIBfGnJl9g9+Dz+flUM/IJDPSVHOY1+OsRHZuWMJlnU2cG4qSSimhgDOodkeigOLzCMf7RvjW06e5clULa9obLjGZ5ZOzmCIpZMLqHY7yxNEBwiEf4aCXWDzJU8cusGP9ZAV7oDvCXQ/uZ2BkgtZwgI0dDXQ0OgrooX09NIWcnFjReJKgz5NJSdIUKmUIMRYCpZih/o+q/j7wK8CXgbe4/84IEfmYiJzCid34sNu8AjiV1e2025av3Vhk5DPT3Lpzdeb9+eFxfnzwPD988Ty9w9GcdadzEfQKh3pG8YgQ8DkJAydSzgAfDvkYGJ3ImLUO9Yxy1Wpnl9TIRJKu5hAt9QHODEZJqdLWEGAikSIccM4TIJmC5c11nBuK0RD0cW44Nu304Fu6mnnf6zfxybdcwftePzkfUiETVjrdQjbplVSa9IqifyTGknonseHTJwfpHY5mViiXL29mU2eYoN/LSCxJ0O9lU2eYy5ebbX+xUFRZiMi/iMh/4piGfDgO7iUzvaGqfkhVV+Fks33vTK8zFRG5TUT2iMie3t7ecl3WmCcOdEe4e/ehjO0cmGRfn0gkqfd7+NHBPur9Hs5Hxvn5YSdd2S9taCXg83Lvo8dKUhjpwXMimSKVUkRAFUaicU5fGOPM4DiPH+nj7t2H+PazZ+iORNnQ0cAbti7jdVs6ufayNiaSKbojUda3N9Bc56M7Mk7PUJTI+ASKsqq1jqFonMagl5Es/0O50oNP9T2kVyuxpLJj/RJCfi/DsQQhv5cd65cQS15UIWm/Rns4yERSCfm9BH0eDveOZlYomzobOHR+hP6RGOGgl66mIF6vx2pVLCJKKX7UBniBQWAA6FPVchgqvwL8N/f9GWBV1rGVblu+9ktQ1XtVdbuqbu/o6CiDeMZ8kZ7pRsbjk2znAO97/Sbede1axuMp/D4vXc0h/D4vR/vHeOXKZq7fvJTOprppzdonksrV65ZQF/ACQioFoCiC1yMkkikOnBvmWO8InY1BhqKJzMwbIOT38Yaty3jXtWsJBXxctjRMwOfB5xU8HqGlzs/RvjG8IgzHkpl4DChfqpF8K48VLXUEfT6uWd/GG7Yu45r1bQR9vkt2gzWGfGxc2kAskSIaTxLwCgMjE0TG42zqbOCRA71sWhqmrSHAwGicg+dGuHFLh+0aWkSUkqL8vwKIyBbgJuBHIuJV1WlHqYjIZar6kvvxFuBF9/2DwHtF5AEcB3dEVbtF5GHgr9yocYA3AB+c7n2N2qJYXEGu4/FkinND0UkO8Gg8wRPH+ovWtU7b/K/f1MFjR/o5GxkHBI/HUSQphbZwgHPDMS7rDLP3xCAAh8+PZCrSpdNtNNf5OTs4zsol9SRTyunBcYajCcYnkozHk66pKsj3XzhHwOehtSHAb7xqxaSssOUMditle2z6+7eHQ6xvr+fZ0xGGx+M0urujDvWMZp53+vlGxuMc6hnlTWWR0qgFSjFDvVlE/hq4D/gD4Idc9DUUOu9rwGPAZhE5LSLvAj4uIvtE5Hmcgf9/uN2/i+NIPwx8DvgjANex/VHgKff1F5YavTqZajYq1V+Qi+ztqWmyzTW5jqdnvGl6h6M8dewCyaRysn+MofE4pwfGOdY7col5Km3zD/i87FjfStDnOKcDXg8rW+poDwdobwgwEk3QHg7xqjUtNIV89LhlT9Mmn7RcQ9E4QZ+HhqCP1jofoxMJ4inFI0JDwMdQNMF43Fmcj8YS/OveM5esombz/LIpZKKa+v2P941wpHeUppCfFS31bF+9hEcO9PLC2UjBv4exOChlK8MunB1Qn1bVs6VeWFXfnqP583n6KvCePMfuw1FURpVS7ujeYoF0uY53NYcYGk9k0lq84JY4TakyPJ4gqYpH4GifsH1t66RgsuzdRiOxBC/raqLLjQ4HePxoP5HxOE3u/drDIfxeLzun5AtKy9UU8mcq3o1MJGmpC7CsOUTfSIz2cBBwtp5es76NHx88TywxwStWtgDOKmlgJMZdD+5ndWt9WVYapaRNv+26ddz14H6SKWgN+zO7oSLjThrwYjvOjIVPKbuh3quqX5+OojAWF4UCv2ZCsUC6XMc9Hg+3v24DE4kkjxzo4Xj/GIJmtq4GvB5QOHVhjGg8ccmsOF23e0VLHc11Pg6eG+FY7wgpVZY1BRmNJVjWGMwpT3pV9cLZCI8f7afO7yEaT3J+OEpkLM5oLM7pC+MITh2GdC0GgFgiyUTiYvac3uEoh3ocR/JMVhozXeFt6WpmdWs9b3z5Mnaub8tsm20M+Wiu8xUNbDQWPqU4uA2jIMXMRtOlmOkk3/H1HWHG4yl2rGtjc2eYyHiCREqJJ50dTiIQ8nk50D2ct651ZDzOy5Y1sXlZmEPnRzjQPcTa9jAfeONm1nWEL5En+7wtXU1sWhrmXCRGwOdhaDxBwOuhPuijrSHAeDxFZDxOLJGiKeTM0oM+LwHfxf+Gh3tHQaA9HJy24s23MaBUhZEvUnxrCaYsY+FjETXGrMlnNgp4ZcaO21JMJ1OP3737UGZw3bg0zMGeEUI+D2MTTo1rBVrrvVwYu3RWnK+udXZ22lzO3KnnresI0xp26mH/6hXLmUg4MQtBn4dWdfwcrfUBti5vdJ3KF1crjSEfAyMTeD2wcenFyPFSFe9sC/cUcoZbviTDVhbGrMllFjrRP0rPUKxijttcZK9wOhpDrFxSR8jvwef1EPR7aAx5qQ/6ee3Gtpx1raezOkqbe7797Bn2n43QNxKddF7PkBPQ1tEY4qrVLQT9XnxeLy11PnaubyWedAbzO27axJ27NtNc5+dA9xBj8QRjE0leOj+a2Zpbqn9gtiu8UpzhxuKllNxQrwE+Aqxx+wuOT9pqcBtA7nQUy934h7ksTzl1hXPFymaeOJpkWXMd123qyMyU37FzTdFzobR03em4i70nBnnVmhbaw6FL0pB0NIYyzuJCRXRODoxx9ZolvHhumKHxOE+fGGRTZxiv11NSPYVylIydjxVELZR2NUpbWXwe+BRwLXA1TlK/qq2SZ8wPU4PCYkmd8+2WU1c4AZ+X1W31bFveVHCmfKA7Qu9wlB++eJ4fHzzP+eHxgk7cbHPPxqUX4zpe6hnJmYakFKdw+ppr28NsX7vEiR1JpTg3HCt5dl9Kht1qY7Z+FmPuKMVnEVHV71VcEqOmmTo7DHgl73bLSs0kp65wgl6hK6t86qbOhktKc65uq+Pfn+4mkVIaQ17GJhI8vO8cPo8Hr9fDmcFxbt25mje94mJKsuwsr2kz0+HzI/QMx9hZ58/Y+Nd3hEsuopN9zfZwiPZwiJQ6u7mm4+eZusK7em0LD+3r4fM/O16Vs/bZ+lmMuaMUZfEjEfkb4FtALN2oqk9XTCqjpsgVZ+H4KyaIJ5WJRGpStHIlK66lzSjZMrWFfRzrHeGBJ0/g93ppa/CjwI8P9jI4HmdpY5COxiCxRIqB6ATReIr6gIc1LSGGxuN8/HsHATIKY6q5p6MxRMB3adzFdEw65TAhTb1nLVS3m2m5XGPuKakGt/tvdhFvBW4ovzhGLTJ1dhhPJjk5MMrZSJSGgJeGgJeAL4BHhJ8f7p/WTHKmq5CpMp0bjuGEMyQJBZwBOJGKEU+miCVSiAjJlNI7EiOVAiVBNJHKnP+Fx05mlEV619CF0RjdkSj9oxP4vR5uv2HDjJ9hoZ1I5XoG1ThrL5eSNCpPKbmhfmUuBDFql+zZYd9IlJ8f7uf8UJREKoUEfCjCthVN+L1enjjWz41bJtvQ880kZzIzTg+s3372DOGAU6BncDzOcDSBaipTvAfIRHWPx5OMxhJ0R6Kk3GSsHhHODkZZ3hKiMeilZ+jibqctXc3cuKWDe35whETKSUu+rCmUKVc6k4E4X82Ko70j3PPDI8STKdoaAsQTSe59dKyk1UEtzNqnU9rVmF/yKgsReYeqfllE/meu46r6qcqJZcwn053JZs8OnzsVITIWJ6ng9zr7JyJjcZ47FeFXXrYU4aIvo28kyuHzo/SNxGgLBznQHZl0n+nOjLOVSzjg5Xj/qFNbIujD64FoHMYmHMXQEPThFSHkc0qp9g7H8HnAI5BSaAh6EYSB0QkaQ346myYPuod6RrlmQ9ukGXFkPD6rWftUs9WB7gj3/OAIiJP7KpZIcbBnhM2d4ZLuUwuz9lIqGRrVQaGVRToqqHEuBDGqg+nM5tNKZX93hFP942zqDNMdGcfrDrpBnxefVwDh3FCU4WiCK1c1ExmPc2E0xovnhhER/B4PyxqDl9yn0Mw4l0KblHbEIyRSisfjIZ5SQj4vE/EUgtA7HMUjIXweQQJeXraskV+ciZBMKUGfl5QqqRT4vcpwNIlH5BITUymz9tk68h/a10MipbQ2+CeVNO2ORPH7vEXOrp1ZuwX81QZ5lYWq/rP775/PnTjGfFPqbD5bqbxsWRP1fi8He0aIxlM0BL0sbw4xOJ4gkUyRUiWZcmbet123DiBv0rrs++SbGQe9klOhDUedlBvgVKcL+TwkUxBPOr6HJfV++kcniCZSJFJw/eYOfmljG4d6Rjk/PIHfK1y+vIkLoxM8dybC8HiCxjofH3jj5km7obJliyeTHD4/ylA0TsDrYdvypkuez0ydy2cGx2lt8BNLXDSfBX0e+kcn2Lmhvej5Nms3ykkpQXkh4F3A5UBmKuWWWjUWGC+cjTA0HmfELdKzsaOBtnDwEjt3vvQYfo8wOuGcG/J76RuZYDyeYl1b/aSBcnVrPTvWteKRiwU+p87M8zmS17fVs7S5bpJD/WjvCGcGx+kdjrFtRRPhkI+g30syBc1+PyuX1BONJwmH/Fy/eWlm19KB7giHekZZ3hLi9IVxRmMJNnY20tlcl1FuuQbXXds6+eTDhzjWN0o46CXgEUaiCc5GopkVxWydyyta6piIJzl0fgTATUCYwD+NCnU2azfKRSlBeV8CluEUPvoJTrW64UoKZcwPB7ojnL4wzlA0QTjozdRiPtE3eomdO19qiWUtIda1OxbMpCrLmkPsWNfKJ97yiksKDuVKWpd9n7Qj+eC5EQZG47Q1BNi0NMy+7mFiCefcvpEoe08MouoEAY5EEzx2ZIDWeh8Br/MdGgJeohMJRqIJWhsCk7LFTk0CePDcCC+eGyqa6mJLVzOdTUHnnrEEfaMTJDXFwOgEX37sRFmSK+7a1onX62HT0jBBn8ep16Fw+w0bcpoEy1VPxDByUcrW2Y2q+hYRuUVVvyAiX8Wpb2EsMB7a18PmTicBXyyRIujzEEukONQzwh/+ymSbfT4T0VY31Xe26WNTZ8MlgWHpVcPASIxzQ1EGRuP4PMLtr5t8n0M9o2zqDHOsf5Sjfc4rlUzx9PFBdr28jsPnRwm6WVs7GkNs7GjghbNDnByI8vIVTbzkBsv5vR5evWYJt994WWagzZcEsFBKjmwmksrW5Y08czJCY8hP0OekJv/p4X62dTXyaM8w8aRmVmgBn3faqTfSZqSA38vODbn9HrUQT2HUPqUoi3T5sUER2QacA5ZWTiRjvjgzOM7qtgbiyRTPno4wGktQH/Cypq3hkkGn1AylhQayG7d0TNoW2tV86fbTx4/08WLPMOMTSXweIejzkEgpxwdGOd43QmR8gqDXQyypXL68iY7GENdtCnKge4hQwMe1l3VMkm/q953N1tIVLXX8+OB5gj5PxqcgIgR9wv5zw3hEnBXaRIInjg6wuq2eO3dtntbfpBQzUi3EUxi1TylmqHvdGth/hlMrez/w1xWVypgXVrTUcbJ/lKN9Y3SEg2zubKS5LsDgaPwSs0apGUoLFUY61DPKNevbePMrlrNzQztr28OTajcc6I5wuHeUaNxRFCIQTaQy5Um7h2J4xAMiXLW6JVOwZziaYMjdnluoIFMpprBC7NrWyYWxOKiiqkTjSWJutLrXI+zc0EpdwMdEylldLG8OVWTwLnc9EcPIRcGVhYh4gCFVvQA8Clim2QXMrm2dvO/r5wAyJiiATXn29Zcy6y02ey907KF9PdQHPAxHwdEJAqrEEim6WkKsbq3nXdeu5d5HjxFwt7ymVxDNdb68A2iuLb9r2htybi0ttP11S1cz125s44WzQ5kNAZcvb+LJYxdoawhkcjwBmTxPM6WQHLUQT2HUPgWVhaqmRORO4BtzJI8xj2zpamblkrpJu6EuX96UczdUqRQbyAodcxRNHaMxZ8aeTDkR116vh6aQnxUtdXm3hz60rydvQaZcW37H4kkuX948aWtpPhPajVs6ONQzmklW2NoQYE1bQ8bc5fPIJUpwJoN3PqU21SdRK/EURm1Tis/iERG5A/g6MJpuVNWBikllzDnpgensYBS/V3jlquaMWScyHp/xLLXYQJY+Fksk2H92mAtjca7d2MaB7ggrWuqIJ5L0Dse4MBYn4BNSKSWRYtKupnwrnFz3rfd7ilbES5PLFzAwEuOeHx7hmvVtdDU7NSs8IkwkknRHHIVw++s28MiB3kz1u5kM3tmKKjIWB4FD50cIuwWV0vKlv7vFUxiVphRl8Vb33/dktSlFTFIich/wZuC8qm5z2/4G+FVgAjgC/J6qDrrHPogTz5EE/lhVH3bbdwGfBrzAv6jqx0v6ZkbJZA9MV6xq4omjF3ji6AAblzZwbsgZqF/rDuDTHYCKDWS3XbeOLz92gp8fHmBJvZ9f2tBKwOfNzOBPDozxipXNHDk/wplIlKQqr1nfxh9n7Wqazn0//7PjtIZLs+/nMqGdG4pmgvzAdSa31l+ibKaTnjwX2YpqOJagKeQjlkhxuHeUjsbQJTJbPIVRaUpJJLhuhte+H/gs8MWstt3AB1U1ISJ/DXwQ+F8ishV4G07g33Kc1Uz6f97fA68HTgNPiciDqrp/hjIZOZg8g/azc4Pw1LELPHZ0gHXtDbxmYyt+dwCfyXbMQgPZlq5m2htD3PCypZNMRuBsm71xSwdfeOwkwxNJLutsvKS2xHTvOx37fq6+6XiPbHIpm9kO3tmKqinkJxpPEvR5GHEd8uaTMOaaUiO4/winUp7ixFj8k6oW9Nap6qMisnZK2/ezPj4O/Ib7/hbgAVWNAcdE5DDwavfYYVU96srygNvXlEUZmTqDbg+HaKn3k1K4fvPkXdKz3Y6Zy1Gbzwm+vzvCyYExtnY1sWNdK8PRxKSttTPJvTQd+36uvuXyRxQjW1FtXNrA3hODxBIpmkK+TAU880kYc0kpW2e/iDPjvwdnpXA5TlT3bPl9IF2BbwVwKuvYabctX/sliMhtIrJHRPb09vaWQbzFQ64tpP2jE7Q2TJ7pz3Y7Zr4SmkGvcLJ/lB8e6OErT5zgK0+c4OF95+gejObd/jrTcpylbvnN1/f2123A4/FUvHRpdonU1oYgmzud8q1N7jOwgDtjrinFZ7FNVbdmff6RiMxqZi8iHwISwFdmc51sVPVe4F6A7du3a7muuxjINYP2ez0sayrvDDpf8FhPZJxnTg0yGksS8guqcGpgFAVGJxKZyOxsW/1sAtGmYyLK1Xe2/ohS75vtc1nbHubd11+a5sMw5opSlMXTInKNqj4OICI7gD0zvaGI/C6O4/t1qpoe1M8Aq7K6rXTbKNBulIlczuDbb5j9jp6p5DM3PXFsjMaQD1Unn5SI4vM6kdqj0QRNISdH1VWrWzIpM+azsM9cOZPNaW1UE4WKH/0Cx0fhB/5TRE66n9cAL87kZu7OpjuBX1bVsaxDDwJfFZFP4Ti4LwOeBAS4TETW4SiJtwG/NZN7L2ZKse3nKrxTf7ifJ48NoChXrmrhxi0dl+R4yo5JKHaPfM5lQfB6hDVt9YgIpy+MISJMJFJE3cDAoFd44ewQ6zvCBeMoKuH0nW1dCsNYCMjFyf2UAyJrCp2oqicKXljka8D1QDvQA9yFs/spCPS73R5X1Xe7/T+E48dIAH+iqt9z228G/g5n6+x9qvqxYl9q+/btumfPjBc/C4rsbbHZK4RipUnT56TjH3qGo4S8Hl6xsmVStHO6PkUp9zjQHeETDx1kYHSCCTctRmtDgOXNIfadHQIg5PdypHcED07EdmPIx1g8yUg0TsDn5c//y1be9IoVM/pec/X8DKNWEZG9qro917FCxY8KKoNiqOrbczR/vkD/jwGXKAJV/S7w3dnIspiZSWnSux7cT/9IjDq/l9FYgqY6P6mUMpxI5gwMS1+3lHuk61comvn8SxvbeOn8CId6RhCBiUQSTUFLQ4BESgkHfCSSKeIJdcqMAm96xYo5CUSzJH2G4VCKz8KoYdK2/XS966FonMagj+Z6/yV907PogZEJWuv9nBwYJ5ZIEQ75SKbUzajqyRkYVor/4KF9PaxqrWfbiouDbGQ8zs8P9zt1rhuD9I3G8IiQ8oDPI/g9wvnhGAisaAmRVLjnh0cy22crPWDPp2/EMKoJUxYLnBUtdRzvG+FgzwhBn4fGoI+haIKh8QTfef5MJsfRipY6+oad7aGt4QCxeJKkKgGfMDA6gc/j1NLOFxhWzH9woDvC7v09pDRFc12AjUsbaA87CueRA/3sWNc2SYkc7xvh50f6GXOD0ToagzQEfagq/aMTczazn+pn6R2O8sLZIeJJ5e7dh8rmvzC/iFHtlBJnYdQwu7Z1crDnYlnOdCbZZU0B7vnBkUlxCj893E8skWBjRwOxRAqPAKqMTSSpD/io83syVfSy4wuyYwJyxR6kVyx+rxD0OgWC9p4YpG8kmnFwT80Qu7qtgaWNIVrqAqxpq6ch6ByPJZzaF4Vm9uWsGpf93XqGxnni6ADD0QRXrGoqObajGDONGTGMucSUxQInnUm2KeRjJJYk6Pdy1eoWxuIpEimdFPC2pN7P/rPDdDSGuGp1C0sbQ4zHlYDXw471rVyxqgW4NDCsWKBb2u5/+fImYknHVxHwCvvODBEZj3PlquacdSWuXNWMzyMMRROT6kV0NYfy7noq98Cb/d2eOx0hHPKxc0MrSxvrctbImAmFan4YRrVgZqhFwOXLmy8xEz1xbOCSHEdbuhr5zyMDRMbjtIWDbF/bSkdjkOXNIWJJLRgYVsh/kLb7e8TPVatbONw7yvB4HEQn7aYCxx9wsn+Ugz0jrFxSx/r2evafG85U01vbVo/H48kbMV0Jh3T6u138HpI5Vg7/hflFjFrAlMUCplCRn1wR2iG/j2s3ttFc58/Yzu/ctXnWtvNsu39HY4iOxlDmc3b22XyyjidSGYVVzJ5fyYG3UkWGrHiRUQuYsligZMcH5Cryky9CuxLxA6Uk70vP3u/efYgVLfWTVgZr2hpy1pvIRSUH3koVGbLiRUYtYD6LBcpUO/ja9jDXrG/j8uXNvO/1mzJxCqUk1Jst00neN9t60sWc7XP1ParhuoZRTmxlsUApxRxTqTiFfNtAS7lXrpXBib5Rzg3HuOObzxU1Q1W6alylnpnlgTKqHVMWC5T5soPnq1td6kx5qknmRN8oz5xykgiWej0beA2j/JiyWKDMlx281N1IhVYf2SuDc8Mxrlrdwtr2cMHrTcWC3AyjvJjPooaYTrDZfNnBS/E5FIuF2NLl+FXede1ahqNxDvWM8NjRfnqHozmvNxULcjOM8mMrixphJuad2ZhjZjozL8X8Vcrq40B3hE8+fIj+kRgTiRQDozF6h6Ls3NCWqWmRj5kkT7RViGEUxlYWNcJcRvnOZmZeym6kUlYfX37sBMf6RmmpC+D1eEik4MLYBHtPXCi6u2k6O6psFWIYpWErixphOsFms50pz7Zkab7dSJkgwbNDvNQzzOXLmzKpzqeuPp45FcEryuhEkmRKSaRS4CYRLGZOm45z31KQG0ZpmLKoEUodAGe7GwlmHwWdy/yVLdcVq5p44ugFnjg6wNXrlhDy+y5xvo9NxBkcSxD0CfUBD8mUEEso4aC36PeYjnPfUm0YRmmYGapGKDXYrBzmqhUtdTkT+81m2222XEsb69i5oZVwyMdzpyM5ne/hoFNwyams67xSKSUcvLQOx1Sm49yvxHc1jIWIrSxqhFKDzcoxUy42M5+JmWuqXO3hENdtCtIdieZM49HVEmI4GicaTzGRSOH1CEsa/HS1hC7pm4tSnfuWasMwSsOURQ1RygBYjmC8Yn6HUsxcUxVKwCsMRxMly7W1q5l6v5dzQzGGonGaQn6WNQUz8RblotIR34axUDBlscAo10w5n2Ka6hCeSCQ52jvC+7/xPK/f2pkxi01VKD1DMVKqrGlrKEku53uMsaWraVL/UnI8TXflYxHfhlGcivksROQ+ETkvIvuy2t4iIi+ISEpEtk/p/0EROSwiB0Xkpqz2XW7bYRH5QKXkXSjMNhivWOBf9rbU3uEoT58cBFVSmsqsMr702IlL/CarWutZ3hwqWa6Zfg/bCmsYlaGSK4v7gc8CX8xq2wf8OvDP2R1FZCvwNuByYDnwiIikDdl/D7weOA08JSIPqur+Cspd88x0plyKiSnbzHW4d5Sgz5lvNNcFMquNJ471c+OWySuAxpCP7kiiYJrxXCuCUtKSZ2NbYQ2jMlRsZaGqjwIDU9oOqOrBHN1vAR5Q1ZiqHgMOA692X4dV9aiqTgAPuH2NClDKTqrsXVnD43FUlVgixcalDYCjFASZ9g6jcq0IZpvi3DCM3FSLz2IF8HjW59NuG8CpKe07cl1ARG4DbgNYvXp1BUSsHiqVnqLUtOZphzACIsKr1jTTHr4YXHflKqeMa/r8Uvwm5VoRWNU5w6gMCybOQlXvVdXtqrq9o6NjvsWpGJW0yZcac5BO9Pep37yC9R1h/F7vpNiPd+xcM21/Q7lWBJUsfmQYi5lqWVmcAVZlfV7ptlGgfVFSSZv8dHdSFdt2Oh8rAtsKaxiVoVqUxYPAV0XkUzgO7suAJ3FCdy8TkXU4SuJtwG/Nm5RVwGxzRAF5TVgzGWjLte20nMFxthXWMMpPxZSFiHwNuB5oF5HTwF04Du97gA7gOyLyrKrepKoviMg3gP1AAniPqibd67wXeBjwAvep6guVkrkY1ZDKejY5oj758KFMrEO+3U6VGGhLeW62IjCM6kZUdb5lKDvbt2/XPXv2lPWa2YNv9sy3nAWFShlUS5Xj7t2HLlEqPz54HoDrNy/NtKX7THeL6nS+U6Wfm2EY5UFE9qrq9lzHFoyDu9JUup5EqY7rUoPVcjmMY4kkE4nUpLZKbyudyzochmFUjmrxWVQ9lU5lPR3H9UxzRAV93kv6VXpbqaUAN4yFgSmLEqn0/v30oNo7HOVw7ygj0QThoJemuuIpuXORy2HcHg5mtpPOxok8Hd+NxT0YxsLAzFAlUun9+yta6jjRN8rTJweJxZOEg16GoglOXxifUQxFLnPVHTdt4s5dm2ecNwqmH+dhcQ+GsTAwB/c0qORuqAPdEd73wHMg0BTyEUukiCVSbO4Ms7Y9XDEH9HTJ5Tgv5iSvhl1khmEUp5CD28xQ06CS+/e3dDWzqq2OyFic4ViCppCfbSuaaG0IVpV9fyY+CIt7MIzax5RFFbG1qznnrL2a7PvmgzCMxYn5LKqIWrDv14KMhmGUH1MWVcRsCxfNBbUgo2EY5cfMUFVGLdj3a0FGwzDKi60sDMMwjKKYsjAMwzCKYmaoCmLxBYZhLBRMWVSIXCnCp6YDnwsZTFkZhlEOzAxVIeY722oly68ahrH4MGVRIcpVU3qmzLeyMgxjYWFmqAqRjnSeSCQzWWT9XuHy5U1zcn9LDW4YRjmxlUWF2LWtkxP9ozxxdIDYRAKfx0mL0TMUmxNT0IqWOoajiUltlpbDMIyZYsqiQmzpamZ5c4hwyMdESqkL+Ni5oZVVrfVzYgqytByGYZQTM0NVkFhSuW5TBx6RTFtKdU5MQem0HNm7od569UrbDWUYxoyomLIQkfuANwPnVXWb29YKfB1YCxwHflNVL4iIAJ8GbgbGgN9V1afdc24F/sy97F+q6hcqJXO5me8MrZaWwzCMclFJM9T9wK4pbR8AfqCqlwE/cD8DvBG4zH3dBvwjZJTLXcAO4NXAXSKypIIylxUzBRmGsVComLJQ1UeBgSnNtwDplcEXgF/Lav+iOjwOtIhIF3ATsFtVB1T1ArCbSxVQ1WIZWg3DWCjMtc+iU1W73ffngPQUewVwKqvfabctX/sliMhtOKsSVq9eXUaRZ4eZggzDWAjM224odYp/l60AuKreq6rbVXV7R0dHuS5rGIZhMPfKosc1L+H+e95tPwOsyuq30m3L124YhmHMIXOtLB4EbnXf3wr836z2d4rDNUDENVc9DLxBRJa4ju03uG2GYRjGHFLJrbNfA64H2kXkNM6upo8D3xCRdwEngN90u38XZ9vsYZyts78HoKoDIvJR4Cm331+o6lSnuWEYhlFhxHEdLCy2b9+ue/bsmW8xDMMwagoR2auq23Mds3QfhmEYRlFMWRiGYRhFMWVhGIZhFMWUhWEYhlEUyzqbhdWsNgzDyI2tLFysZrVhGEZ+TFm4WM1qwzCM/JiycDkzOE5jaLJVzmpWG4ZhOJiycLGa1YZhGPkxZeFihYoMwzDyY8rCxQoVGYZh5Me2zmZhhYoMwzByYysLwzAMoyimLAzDMIyimLIwDMMwimLKwjAMwyiKKQvDMAyjKAuyUp6I9OKUbS0X7UBfGa9XLqpRrmqUCapTrmqUCapTrmqUCapTrtnItEZVO3IdWJDKotyIyJ58pQbnk2qUqxplguqUqxplguqUqxplguqUq1IymRnKMAzDKIopC8MwDKMopixK4975FiAP1ShXNcoE1SlXNcoE1SlXNcoE1SlXRWQyn4VhGIZRFFtZGIZhGEUxZWEYhmEUZVEqCxFZJSI/EpH9IvKCiPwPt/3rIvKs+zouIs+67WtFZDzr2D9lXetVIvILETksIp8REZmFXCEReVJEnnPl+nO3fZ2IPOHe4+siEnDbg+7nw+7xtVnX+qDbflBEbqqATF9xr71PRO4TEb/bfr2IRLKe1YezrrXLPeewiHxgpjIVket+ETmWdf9Xuu3i/n0Oi8jzInJV1rVuFZGX3NetFZDpp1nynBWRb7vtc/Kssq7pFZFnROQ/3M/z9rsqINO8/q4KyDVvv6sCMs3t70pVF90L6AKuct83AoeArVP6/C3wYff9WmBfnms9CVwDCPA94I2zkEuAsPveDzzhXvsbwNvc9n8C/tB9/0fAP7nv3wZ83X2/FXgOCALrgCOAt8wy3eweE+BrWTJdD/xHjut4XTnWAwFXvq0zkamIXPcDv5Gj/83u30fcfk+47a3AUfffJe77JeWUaUqffwPeOZfPKuu6/xP4avqe8/m7KiDTvP6uCsg1b7+rfDLN9e9qUa4sVLVbVZ923w8DB4AV6eMiIsBv4vxY8yIiXUCTqj6uzl/ji8CvzUIuVdUR96PffSlwA/CvbvsXsu5xi/sZ9/jrXNlvAR5Q1ZiqHgMOA68up0yq+l33mOIozJVFLvVq4LCqHlXVCeABV84ZUeBZ5eMW4IvueY8DLe7f7yZgt6oOqOoFYDewqxIyiUgTzt/y20UuVdZn5d57JfAm4F/cz8I8/q5yyQQw37+rfHIVoOK/q2IyzdXvalEqi2zcJfaVOLPANK8FelT1pay2de4S8Cci8lq3bQVwOqvPabKUzgzl8Ypj/jqP8wM7AgyqarpAePY9VgCnANzjEaAtu70cck2VSVWfyDrmB34HeCjrlJ2uKeZ7InL5VFnLIVMRuT7mmgTuFpFgkfvP2bPCGYx/oKpDWW1z8qyAvwPuBFLu5zbm+XeVQ6YM8/m7KiDXvP2uCsgEc/S7WtTKQkTCOMu3P5nyoN/O5FVFN7BaVa/EXQq62rzsqGpSVV+JM6N6NfCyStxnOkyVSUS2ZR3+B+BRVf2p+/lpnPwyVwD3UHy2U265PojzzK7GMQH8r0rdfxoypZn6u5qTZyUibwbOq+reSlx/JpQg07z8rgrINW+/qxKe1Zz8rhatsnBnLv8GfEVVv5XV7gN+Hfh6us1ddve77/fizPY3AWeYvExe6bbNGlUdBH4E7MRZ2qZL4Gbf4wywKkvuZqA/u72ccmXJtMu9511AB44CTfcZSptiVPW7gF9E2isl01S5XBOjqmoM+D9cNJPku/9cPat2V5bvZPWZq2f1GuC/iMhxHNPDDcCnmd/f1SUyiciX3XvO5+8qp1zz/Lsq9Kzm7nels3QE1eILxxn1ReDvchzbBfxkSlsHriMPxzl0Bmh1P091cN88C7k6gBb3fR3wU+DNwDeZ7Ij8I/f9e5jsiPyG+/5yJjsijzJzB3c+mf478J9A3ZT+y7gY7Plq4KT7bHyuHOu46Fy7vALPqivrb/x3wMfdz29isiPySbe9FTiG44Rc4r5vLadM7ud3A1+Yj2c15Z7Xc9FpO2+/qwIyzevvqoBc8/a7yifTXP+uZv1Aa/EFXIvjeHweeNZ93eweux9495T+/w14we33NPCrWce2A/twVhufTf+RZijXK4BnXLn2cXE31nocpXTY/Q8edNtD7ufD7vH1Wdf6kCvTQWa3QyufTAn3+unnl25/r/usngMeB34p61o34+w8OwJ8aJZ/w3xy/RD4hdv2ZS7uThLg7917/wLYnnWt33ef4WHg98otk3vsxzgrn+z+c/Ksptzzei4OgPP2uyog07z+rgrINW+/q3wyzfXvytJ9GIZhGEVZtD4LwzAMo3RMWRiGYRhFMWVhGIZhFMWUhWEYhlEUUxaGYRhGUUxZGMYUxMkyvK9C1z7uBkiV0vcvROTGSshhGNPFV7yLYRjzgap+uHgvw5gbbGVhGLnxisjnxKlL8X0RqQMQkQ0i8pCI7HXrCbzMbf9VcWo/PCMij4hIp9ve5p7/goj8C04Q1yTc5IP3i1PD4Rci8j63/X4R+Q0R2Z5Vm+AXIqKFZDGMSmDKwjBycxnw96p6OTCIE8UPcC9wu6q+CrgDJ+EdwM9waldciZO/5063/S7gZ+51/h1YneNerwRWqOo2VX05Tu6hDKq6R1VfqU6CwoeATxaRxTDKjpmhDCM3x1T1Wff9XmCtm6X4l4BvysWCiOlU1SuBr7u1DAI4uYAArsNJTImqfkdELuS411FgvYjcg5MQ7vu5BBKRtwJXAW8oIothlB1TFoaRm1jW+yROYkAPTg2IV+bofw/wKVV9UESuBz5S6o1U9YKIXIFTMOfdOIW3fj+7j5vq/CPAdaqaFJFCshhG2TEzlGGUiDo1T46JyFsgU3/5CvdwMxfTPd+addqjwG+5/d+Ik4F0Eu7uKI+q/hvwZzirh+zjLTj1Ct6pqr0lyGIYZceUhWFMj98G3iUiz+Fk9kyXpfwIjkloL9CX1f/PgetE5AUcc9TJHNdcAfzYrbD3ZZxCO9ncAqwBPpd2dBeRxTDKjmWdNQzDMIpiKwvDMAyjKKYsDMMwjKKYsjAMwzCKYsrCMAzDKIopC8MwDKMopiwMwzCMopiyMAzDMIry/wNK5IRie4efMAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Now lets visualise the plot itself\n", + "plt.title('head size x brain weight')\n", + "plt.xlabel('head size')\n", + "plt.ylabel('brain weight')\n", + "plt.scatter(x, y, alpha=0.5) # ...where alpha is size of points\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], + "source": [ + "model = LinearRegression()\n", + "model.fit(x_train, y_train)\n", + "predictions = model.predict(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test accuracy using testing values" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted values:Actual values
01306.6226751280
11295.3628661321
21385.4413381425
31255.4417251250
41321.4651501350
51286.9180091408
61502.1339041440
71257.7448681210
81385.9531471422
91339.1225781405
101406.4255271362
111138.4932541150
121202.7253461175
131334.7721971430
141221.4063931120
151233.6898211192
161283.0794381163
171355.7563871360
181243.9260111160
191278.2172481355
201238.5520111225
211418.1971461370
221353.4532441440
231271.0519151300
241327.0950551275
251312.7643891350
261165.1073481127
271285.1266761355
281487.2914281620
291084.7532561027
301189.9301091142
311272.0755341250
321243.9260111103
331378.5319101280
341342.1934351340
351227.0362981322
361326.8391501374
371375.9728621296
381332.2131501240
391246.7409631132
401340.9139111390
411145.6585871340
421144.3790631168
431345.5201971405
441441.9962881485
451264.3983911255
461223.9654411140
471158.4538251202
481147.9617301080
491139.0050631022
501224.2213451220
511290.7565801230
521269.0046771245
531418.4530511405
541222.1741071195
551192.7450611265
561139.5168731078
571184.8120141250
581307.3903891470
591105.7374461060
\n", + "
" + ], + "text/plain": [ + " Predicted values: Actual values\n", + "0 1306.622675 1280\n", + "1 1295.362866 1321\n", + "2 1385.441338 1425\n", + "3 1255.441725 1250\n", + "4 1321.465150 1350\n", + "5 1286.918009 1408\n", + "6 1502.133904 1440\n", + "7 1257.744868 1210\n", + "8 1385.953147 1422\n", + "9 1339.122578 1405\n", + "10 1406.425527 1362\n", + "11 1138.493254 1150\n", + "12 1202.725346 1175\n", + "13 1334.772197 1430\n", + "14 1221.406393 1120\n", + "15 1233.689821 1192\n", + "16 1283.079438 1163\n", + "17 1355.756387 1360\n", + "18 1243.926011 1160\n", + "19 1278.217248 1355\n", + "20 1238.552011 1225\n", + "21 1418.197146 1370\n", + "22 1353.453244 1440\n", + "23 1271.051915 1300\n", + "24 1327.095055 1275\n", + "25 1312.764389 1350\n", + "26 1165.107348 1127\n", + "27 1285.126676 1355\n", + "28 1487.291428 1620\n", + "29 1084.753256 1027\n", + "30 1189.930109 1142\n", + "31 1272.075534 1250\n", + "32 1243.926011 1103\n", + "33 1378.531910 1280\n", + "34 1342.193435 1340\n", + "35 1227.036298 1322\n", + "36 1326.839150 1374\n", + "37 1375.972862 1296\n", + "38 1332.213150 1240\n", + "39 1246.740963 1132\n", + "40 1340.913911 1390\n", + "41 1145.658587 1340\n", + "42 1144.379063 1168\n", + "43 1345.520197 1405\n", + "44 1441.996288 1485\n", + "45 1264.398391 1255\n", + "46 1223.965441 1140\n", + "47 1158.453825 1202\n", + "48 1147.961730 1080\n", + "49 1139.005063 1022\n", + "50 1224.221345 1220\n", + "51 1290.756580 1230\n", + "52 1269.004677 1245\n", + "53 1418.453051 1405\n", + "54 1222.174107 1195\n", + "55 1192.745061 1265\n", + "56 1139.516873 1078\n", + "57 1184.812014 1250\n", + "58 1307.390389 1470\n", + "59 1105.737446 1060" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with\n", + "pred_vs_actual = pd.DataFrame({\"Predicted values:\": y_test_pred, \"Actual values\": y_test})\n", + "pred_vs_actual" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean squared error: 5264.955252408745\n", + "Mean absolute error: 59.995743221875\n", + "Model accuracy: 0.669\n" + ] + } + ], + "source": [ + "print(\"Mean squared error:\", mean_squared_error(y_test, y_test_pred))\n", + "print(\"Mean absolute error:\", mean_absolute_error(y_test, y_test_pred))\n", + "accuracy = model.score(x_test, y_test) # or simply called score method to use the models inherent predictions vs a dataset / subset we give it \n", + "print(\"Model accuracy: {:.3f}\".format(accuracy)) " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week5/.ipynb_checkpoints/exercise3a-checkpoint.ipynb b/Week5/.ipynb_checkpoints/exercise3a-checkpoint.ipynb new file mode 100644 index 0000000..81a9e24 --- /dev/null +++ b/Week5/.ipynb_checkpoints/exercise3a-checkpoint.ipynb @@ -0,0 +1,911 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing logisitic regression using multiple variables with a little bit of preprocessing\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder # method to split dataset into 4\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.linear_model import LogisticRegression # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"breast_cancer.csv\") # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing\n", + "### Check for possible issues" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".sum of 0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "564 False\n", + "565 False\n", + "566 False\n", + "567 False\n", + "568 False\n", + "Length: 569, dtype: bool>" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().sum" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 569 entries, 0 to 568\n", + "Data columns (total 33 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 569 non-null int64 \n", + " 1 diagnosis 569 non-null object \n", + " 2 radius_mean 569 non-null float64\n", + " 3 texture_mean 569 non-null float64\n", + " 4 perimeter_mean 569 non-null float64\n", + " 5 area_mean 569 non-null float64\n", + " 6 smoothness_mean 569 non-null float64\n", + " 7 compactness_mean 569 non-null float64\n", + " 8 concavity_mean 569 non-null float64\n", + " 9 concave points_mean 569 non-null float64\n", + " 10 symmetry_mean 569 non-null float64\n", + " 11 fractal_dimension_mean 569 non-null float64\n", + " 12 radius_se 569 non-null float64\n", + " 13 texture_se 569 non-null float64\n", + " 14 perimeter_se 569 non-null float64\n", + " 15 area_se 569 non-null float64\n", + " 16 smoothness_se 569 non-null float64\n", + " 17 compactness_se 569 non-null float64\n", + " 18 concavity_se 569 non-null float64\n", + " 19 concave points_se 569 non-null float64\n", + " 20 symmetry_se 569 non-null float64\n", + " 21 fractal_dimension_se 569 non-null float64\n", + " 22 radius_worst 569 non-null float64\n", + " 23 texture_worst 569 non-null float64\n", + " 24 perimeter_worst 569 non-null float64\n", + " 25 area_worst 569 non-null float64\n", + " 26 smoothness_worst 569 non-null float64\n", + " 27 compactness_worst 569 non-null float64\n", + " 28 concavity_worst 569 non-null float64\n", + " 29 concave points_worst 569 non-null float64\n", + " 30 symmetry_worst 569 non-null float64\n", + " 31 fractal_dimension_worst 569 non-null float64\n", + " 32 Unnamed: 32 0 non-null float64\n", + "dtypes: float64(31), int64(1), object(1)\n", + "memory usage: 146.8+ KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddiagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_mean...texture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worstUnnamed: 32
0842302M17.9910.38122.801001.00.118400.277600.30010.14710...17.33184.602019.00.16220.66560.71190.26540.46010.11890NaN
1842517M20.5717.77132.901326.00.084740.078640.08690.07017...23.41158.801956.00.12380.18660.24160.18600.27500.08902NaN
284300903M19.6921.25130.001203.00.109600.159900.19740.12790...25.53152.501709.00.14440.42450.45040.24300.36130.08758NaN
384348301M11.4220.3877.58386.10.142500.283900.24140.10520...26.5098.87567.70.20980.86630.68690.25750.66380.17300NaN
484358402M20.2914.34135.101297.00.100300.132800.19800.10430...16.67152.201575.00.13740.20500.40000.16250.23640.07678NaN
\n", + "

5 rows × 33 columns

\n", + "
" + ], + "text/plain": [ + " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", + "0 842302 M 17.99 10.38 122.80 1001.0 \n", + "1 842517 M 20.57 17.77 132.90 1326.0 \n", + "2 84300903 M 19.69 21.25 130.00 1203.0 \n", + "3 84348301 M 11.42 20.38 77.58 386.1 \n", + "4 84358402 M 20.29 14.34 135.10 1297.0 \n", + "\n", + " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", + "0 0.11840 0.27760 0.3001 0.14710 \n", + "1 0.08474 0.07864 0.0869 0.07017 \n", + "2 0.10960 0.15990 0.1974 0.12790 \n", + "3 0.14250 0.28390 0.2414 0.10520 \n", + "4 0.10030 0.13280 0.1980 0.10430 \n", + "\n", + " ... texture_worst perimeter_worst area_worst smoothness_worst \\\n", + "0 ... 17.33 184.60 2019.0 0.1622 \n", + "1 ... 23.41 158.80 1956.0 0.1238 \n", + "2 ... 25.53 152.50 1709.0 0.1444 \n", + "3 ... 26.50 98.87 567.7 0.2098 \n", + "4 ... 16.67 152.20 1575.0 0.1374 \n", + "\n", + " compactness_worst concavity_worst concave points_worst symmetry_worst \\\n", + "0 0.6656 0.7119 0.2654 0.4601 \n", + "1 0.1866 0.2416 0.1860 0.2750 \n", + "2 0.4245 0.4504 0.2430 0.3613 \n", + "3 0.8663 0.6869 0.2575 0.6638 \n", + "4 0.2050 0.4000 0.1625 0.2364 \n", + "\n", + " fractal_dimension_worst Unnamed: 32 \n", + "0 0.11890 NaN \n", + "1 0.08902 NaN \n", + "2 0.08758 NaN \n", + "3 0.17300 NaN \n", + "4 0.07678 NaN \n", + "\n", + "[5 rows x 33 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Resolving issues" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# There are issues with this dataset - 'diagnosis' (our y object) MUST be an integer\n", + "# we can simply just create another column and fill that with the correct versions of values (ie 0, 1) & replace the intial column\n", + "fDiagnosis = pd.get_dummies(data[\"diagnosis\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n", + "# note: this creates a column for each categorical, where 1 represents in each column whether a row had that value set or not\n", + "data[\"diagnosis\"] = fDiagnosis.iloc[:, -1] # replace old w in new column" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Another issue - column 'id' & 'Unnamed 32' are unusable - delete them\n", + "data.drop(columns=\"id\", inplace=True) # remove old column\n", + "data.drop(columns=\"Unnamed: 32\", inplace=True) # remove old column" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
diagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst
0117.9910.38122.801001.00.118400.277600.30010.147100.2419...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
1120.5717.77132.901326.00.084740.078640.08690.070170.1812...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
2119.6921.25130.001203.00.109600.159900.19740.127900.2069...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
3111.4220.3877.58386.10.142500.283900.24140.105200.2597...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
4120.2914.34135.101297.00.100300.132800.19800.104300.1809...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", + "0 1 17.99 10.38 122.80 1001.0 \n", + "1 1 20.57 17.77 132.90 1326.0 \n", + "2 1 19.69 21.25 130.00 1203.0 \n", + "3 1 11.42 20.38 77.58 386.1 \n", + "4 1 20.29 14.34 135.10 1297.0 \n", + "\n", + " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", + "0 0.11840 0.27760 0.3001 0.14710 \n", + "1 0.08474 0.07864 0.0869 0.07017 \n", + "2 0.10960 0.15990 0.1974 0.12790 \n", + "3 0.14250 0.28390 0.2414 0.10520 \n", + "4 0.10030 0.13280 0.1980 0.10430 \n", + "\n", + " symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n", + "0 0.2419 ... 25.38 17.33 184.60 \n", + "1 0.1812 ... 24.99 23.41 158.80 \n", + "2 0.2069 ... 23.57 25.53 152.50 \n", + "3 0.2597 ... 14.91 26.50 98.87 \n", + "4 0.1809 ... 22.54 16.67 152.20 \n", + "\n", + " area_worst smoothness_worst compactness_worst concavity_worst \\\n", + "0 2019.0 0.1622 0.6656 0.7119 \n", + "1 1956.0 0.1238 0.1866 0.2416 \n", + "2 1709.0 0.1444 0.4245 0.4504 \n", + "3 567.7 0.2098 0.8663 0.6869 \n", + "4 1575.0 0.1374 0.2050 0.4000 \n", + "\n", + " concave points_worst symmetry_worst fractal_dimension_worst \n", + "0 0.2654 0.4601 0.11890 \n", + "1 0.1860 0.2750 0.08902 \n", + "2 0.2430 0.3613 0.08758 \n", + "3 0.2575 0.6638 0.17300 \n", + "4 0.1625 0.2364 0.07678 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now view fixed data\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x: texture_mean perimeter_mean area_mean smoothness_mean \\\n", + "0 10.38 122.80 1001.0 0.11840 \n", + "1 17.77 132.90 1326.0 0.08474 \n", + "2 21.25 130.00 1203.0 0.10960 \n", + "3 20.38 77.58 386.1 0.14250 \n", + "4 14.34 135.10 1297.0 0.10030 \n", + ".. ... ... ... ... \n", + "564 22.39 142.00 1479.0 0.11100 \n", + "565 28.25 131.20 1261.0 0.09780 \n", + "566 28.08 108.30 858.1 0.08455 \n", + "567 29.33 140.10 1265.0 0.11780 \n", + "568 24.54 47.92 181.0 0.05263 \n", + "\n", + " compactness_mean concavity_mean concave points_mean symmetry_mean \\\n", + "0 0.27760 0.30010 0.14710 0.2419 \n", + "1 0.07864 0.08690 0.07017 0.1812 \n", + "2 0.15990 0.19740 0.12790 0.2069 \n", + "3 0.28390 0.24140 0.10520 0.2597 \n", + "4 0.13280 0.19800 0.10430 0.1809 \n", + ".. ... ... ... ... \n", + "564 0.11590 0.24390 0.13890 0.1726 \n", + "565 0.10340 0.14400 0.09791 0.1752 \n", + "566 0.10230 0.09251 0.05302 0.1590 \n", + "567 0.27700 0.35140 0.15200 0.2397 \n", + "568 0.04362 0.00000 0.00000 0.1587 \n", + "\n", + " fractal_dimension_mean radius_se ... radius_worst texture_worst \\\n", + "0 0.07871 1.0950 ... 25.380 17.33 \n", + "1 0.05667 0.5435 ... 24.990 23.41 \n", + "2 0.05999 0.7456 ... 23.570 25.53 \n", + "3 0.09744 0.4956 ... 14.910 26.50 \n", + "4 0.05883 0.7572 ... 22.540 16.67 \n", + ".. ... ... ... ... ... \n", + "564 0.05623 1.1760 ... 25.450 26.40 \n", + "565 0.05533 0.7655 ... 23.690 38.25 \n", + "566 0.05648 0.4564 ... 18.980 34.12 \n", + "567 0.07016 0.7260 ... 25.740 39.42 \n", + "568 0.05884 0.3857 ... 9.456 30.37 \n", + "\n", + " perimeter_worst area_worst smoothness_worst compactness_worst \\\n", + "0 184.60 2019.0 0.16220 0.66560 \n", + "1 158.80 1956.0 0.12380 0.18660 \n", + "2 152.50 1709.0 0.14440 0.42450 \n", + "3 98.87 567.7 0.20980 0.86630 \n", + "4 152.20 1575.0 0.13740 0.20500 \n", + ".. ... ... ... ... \n", + "564 166.10 2027.0 0.14100 0.21130 \n", + "565 155.00 1731.0 0.11660 0.19220 \n", + "566 126.70 1124.0 0.11390 0.30940 \n", + "567 184.60 1821.0 0.16500 0.86810 \n", + "568 59.16 268.6 0.08996 0.06444 \n", + "\n", + " concavity_worst concave points_worst symmetry_worst \\\n", + "0 0.7119 0.2654 0.4601 \n", + "1 0.2416 0.1860 0.2750 \n", + "2 0.4504 0.2430 0.3613 \n", + "3 0.6869 0.2575 0.6638 \n", + "4 0.4000 0.1625 0.2364 \n", + ".. ... ... ... \n", + "564 0.4107 0.2216 0.2060 \n", + "565 0.3215 0.1628 0.2572 \n", + "566 0.3403 0.1418 0.2218 \n", + "567 0.9387 0.2650 0.4087 \n", + "568 0.0000 0.0000 0.2871 \n", + "\n", + " fractal_dimension_worst \n", + "0 0.11890 \n", + "1 0.08902 \n", + "2 0.08758 \n", + "3 0.17300 \n", + "4 0.07678 \n", + ".. ... \n", + "564 0.07115 \n", + "565 0.06637 \n", + "566 0.07820 \n", + "567 0.12400 \n", + "568 0.07039 \n", + "\n", + "[569 rows x 29 columns]\n", + "y: 0 1\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 1\n", + " ..\n", + "564 1\n", + "565 1\n", + "566 1\n", + "567 1\n", + "568 0\n", + "Name: diagnosis, Length: 569, dtype: uint8\n" + ] + } + ], + "source": [ + "x = data.iloc[:, 2:].values # values we want to classify - we only want\n", + "print(\"x: \", data.iloc[:, 2:])\n", + "y = data.iloc[:, 0].values # acceptances for each row (ie either benign (0) or malignant (1))\n", + "print(\"y: \", data.iloc[:, 0])\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticRegression(max_iter=20000)\n", + "model.fit(x_train, y_train)\n", + "predictions = model.predict(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test accuracy using testing values" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted values:Actual values
011
100
200
300
400
\n", + "
" + ], + "text/plain": [ + " Predicted values: Actual values\n", + "0 1 1\n", + "1 0 0\n", + "2 0 0\n", + "3 0 0\n", + "4 0 0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with\n", + "pred_vs_actual = pd.DataFrame({\"Predicted values:\": y_test_pred, \"Actual values\": y_test})\n", + "pred_vs_actual.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean squared error: 0.04895104895104895\n", + "Mean absolute error: 1.8251748251748252\n", + "Model accuracy: 0.951\n" + ] + } + ], + "source": [ + "print(\"Mean squared error:\", mean_squared_error(y_test, y_test_pred))\n", + "print(\"Mean absolute error:\", mean_absolute_error(y_test, y_test_pred))\n", + "accuracy = model.score(x_test, y_test) # or simply called score method to use the models inherent predictions vs a dataset / subset we give it \n", + "print(\"Model accuracy: {:.3f}\".format(accuracy)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week5/.ipynb_checkpoints/exercise3b-checkpoint.ipynb b/Week5/.ipynb_checkpoints/exercise3b-checkpoint.ipynb new file mode 100644 index 0000000..ec2bf25 --- /dev/null +++ b/Week5/.ipynb_checkpoints/exercise3b-checkpoint.ipynb @@ -0,0 +1,924 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing more logisitic regression using multiple variables involving lots of preprocessing\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder # method to preprocess data (specifically converting columns->categorical datas)\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.linear_model import LogisticRegression # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"titanic.csv\") # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing\n", + "### Check for possible issues" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".sum of PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket \\\n", + "0 False False False False False False False False False \n", + "1 False False False False False False False False False \n", + "2 False False False False False False False False False \n", + "3 False False False False False False False False False \n", + "4 False False False False False False False False False \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "886 False False False False False False False False False \n", + "887 False False False False False False False False False \n", + "888 False False False False False True False False False \n", + "889 False False False False False False False False False \n", + "890 False False False False False False False False False \n", + "\n", + " Fare Cabin Embarked \n", + "0 False True False \n", + "1 False False False \n", + "2 False True False \n", + "3 False False False \n", + "4 False True False \n", + ".. ... ... ... \n", + "886 False True False \n", + "887 False False False \n", + "888 False True False \n", + "889 False False False \n", + "890 False True False \n", + "\n", + "[891 rows x 12 columns]>" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.isnull().sum" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".sum of 0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "886 False\n", + "887 False\n", + "888 False\n", + "889 False\n", + "890 False\n", + "Length: 891, dtype: bool>" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().sum" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 PassengerId 891 non-null int64 \n", + " 1 Survived 891 non-null int64 \n", + " 2 Pclass 891 non-null int64 \n", + " 3 Name 891 non-null object \n", + " 4 Sex 891 non-null object \n", + " 5 Age 714 non-null float64\n", + " 6 SibSp 891 non-null int64 \n", + " 7 Parch 891 non-null int64 \n", + " 8 Ticket 891 non-null object \n", + " 9 Fare 891 non-null float64\n", + " 10 Cabin 204 non-null object \n", + " 11 Embarked 889 non-null object \n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.7+ KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
.......................................
88688702Montvila, Rev. Juozasmale27.00021153613.0000NaNS
88788811Graham, Miss. Margaret Edithfemale19.00011205330.0000B42S
88888903Johnston, Miss. Catherine Helen \"Carrie\"femaleNaN12W./C. 660723.4500NaNS
88989011Behr, Mr. Karl Howellmale26.00011136930.0000C148C
89089103Dooley, Mr. Patrickmale32.0003703767.7500NaNQ
\n", + "

891 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + ".. ... ... ... \n", + "886 887 0 2 \n", + "887 888 1 1 \n", + "888 889 0 3 \n", + "889 890 1 1 \n", + "890 891 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + ".. ... ... ... ... \n", + "886 Montvila, Rev. Juozas male 27.0 0 \n", + "887 Graham, Miss. Margaret Edith female 19.0 0 \n", + "888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n", + "889 Behr, Mr. Karl Howell male 26.0 0 \n", + "890 Dooley, Mr. Patrick male 32.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S \n", + ".. ... ... ... ... ... \n", + "886 0 211536 13.0000 NaN S \n", + "887 0 112053 30.0000 B42 S \n", + "888 2 W./C. 6607 23.4500 NaN S \n", + "889 0 111369 30.0000 C148 C \n", + "890 0 370376 7.7500 NaN Q \n", + "\n", + "[891 rows x 12 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Resolving issues" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# turn any non numerical (be that string or conti. values) categories -> numerical ones\n", + "# we can simply just create another column and fill that with the correct versions of values (ie 0, 1) & replace the intial column\n", + "fSex = pd.get_dummies(data[\"Sex\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n", + "data[\"Sex\"] = fSex.iloc[:, -1] # replace old w in new column\n", + "fEmbarked = pd.get_dummies(data[\"Embarked\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n", + "data[\"Embarked\"] = fEmbarked.iloc[:, -1] # replace old w in new column" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Another issue - many columns are irrelevant - delete them\n", + "data.drop(columns=\"PassengerId\", inplace=True) # remove old column\n", + "data.drop(columns=\"Name\", inplace=True) # remove old column\n", + "data.drop(columns=\"Ticket\", inplace=True) # remove old column" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Some columns contain too many NaN's (IEEE NotANumber) to make it non-viable to delete the rows afflicted - delete them too\n", + "data.drop(columns=\"Cabin\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# now drop the rows containing NaN's\n", + "data = data.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SurvivedPclassSexAgeSibSpParchFareEmbarked
003122.0107.25001
111038.01071.28330
213026.0007.92501
311035.01053.10001
403135.0008.05001
...........................
88503039.00529.12500
88602127.00013.00001
88711019.00030.00001
88911126.00030.00000
89003132.0007.75000
\n", + "

714 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " Survived Pclass Sex Age SibSp Parch Fare Embarked\n", + "0 0 3 1 22.0 1 0 7.2500 1\n", + "1 1 1 0 38.0 1 0 71.2833 0\n", + "2 1 3 0 26.0 0 0 7.9250 1\n", + "3 1 1 0 35.0 1 0 53.1000 1\n", + "4 0 3 1 35.0 0 0 8.0500 1\n", + ".. ... ... ... ... ... ... ... ...\n", + "885 0 3 0 39.0 0 5 29.1250 0\n", + "886 0 2 1 27.0 0 0 13.0000 1\n", + "887 1 1 0 19.0 0 0 30.0000 1\n", + "889 1 1 1 26.0 0 0 30.0000 0\n", + "890 0 3 1 32.0 0 0 7.7500 0\n", + "\n", + "[714 rows x 8 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now view fixed data\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x: [[ 1. 22. 1. 0. 7.25 1. ]\n", + " [ 0. 38. 1. 0. 71.2833 0. ]\n", + " [ 0. 26. 0. 0. 7.925 1. ]\n", + " ...\n", + " [ 0. 19. 0. 0. 30. 1. ]\n", + " [ 1. 26. 0. 0. 30. 0. ]\n", + " [ 1. 32. 0. 0. 7.75 0. ]]\n", + "y: 0 0\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 0\n", + " ..\n", + "885 0\n", + "886 0\n", + "887 1\n", + "889 1\n", + "890 0\n", + "Name: Survived, Length: 714, dtype: int64\n" + ] + } + ], + "source": [ + "x = data.iloc[:, 2:].values # values we want to classify - we only want\n", + "print(\"x:\", x)\n", + "y = data.iloc[:, 0].values # acceptances for each row (ie either benign (0) or malignant (1))\n", + "print(\"y:\", data.iloc[:, 0])\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticRegression(max_iter=20000)\n", + "model.fit(x_train, y_train)\n", + "predictions = model.predict(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test accuracy using testing values" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted values:Actual values
010
110
211
300
401
.........
17400
17500
17611
17711
17800
\n", + "

179 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Predicted values: Actual values\n", + "0 1 0\n", + "1 1 0\n", + "2 1 1\n", + "3 0 0\n", + "4 0 1\n", + ".. ... ...\n", + "174 0 0\n", + "175 0 0\n", + "176 1 1\n", + "177 1 1\n", + "178 0 0\n", + "\n", + "[179 rows x 2 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with\n", + "pred_vs_actual = pd.DataFrame({\"Predicted values:\": y_test_pred, \"Actual values\": y_test})\n", + "pred_vs_actual" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean squared error: 0.22905027932960895\n", + "Mean absolute error: 0.22905027932960895\n", + "Model accuracy: 0.771\n" + ] + } + ], + "source": [ + "print(\"Mean squared error:\", mean_squared_error(y_test, y_test_pred))\n", + "print(\"Mean absolute error:\", mean_absolute_error(y_test, y_test_pred))\n", + "accuracy = model.score(x_test, y_test) # or simply called score method to use the models inherent predictions vs a dataset / subset we give it \n", + "print(\"Model accuracy: {:.3f}\".format(accuracy)) " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week5/.ipynb_checkpoints/exercise4a-checkpoint.ipynb b/Week5/.ipynb_checkpoints/exercise4a-checkpoint.ipynb new file mode 100644 index 0000000..25b5b39 --- /dev/null +++ b/Week5/.ipynb_checkpoints/exercise4a-checkpoint.ipynb @@ -0,0 +1,202 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing confusion matrix evaluation of a trained logistic regression model\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.linear_model import LogisticRegression # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method\n", + "from sklearn.metrics import confusion_matrix\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "cols = [\"test a\", \"test b\", \"accepted\"]\n", + "data = pd.read_csv(\"../Week3/admission.data\", names=cols) # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "x = data.iloc[:, -3:-1].values # values we want to classify\n", + "y = data.iloc[:, -1].values # acceptances for each row\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing (using confusion matrix) (focus of notebook)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred = model.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_test_pred) # shows true positive, true negative, false positive, false negatives for test dataset\n", + " # in array form" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualise matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "plot_confusion_matrix() missing 2 required positional arguments: 'X' and 'y_true'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"T+\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"T-\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"F+\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"F-\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mplot_confusion_matrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36minner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;31m# extra_args > 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: plot_confusion_matrix() missing 2 required positional arguments: 'X' and 'y_true'" + ] + } + ], + "source": [ + "labels = [\"T+\",\"T-\",\"F+\",\"F-\"]\n", + "plot_confusion_matrix(cm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week5/exercise1a.ipynb b/Week5/exercise1a.ipynb new file mode 100644 index 0000000..a0a4d01 --- /dev/null +++ b/Week5/exercise1a.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing evaluation of fit on a linear regression model\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.linear_model import LinearRegression # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"../Week2/headbrain.csv\") # import dataset (already contains headers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 237 entries, 0 to 236\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 Gender 237 non-null int64\n", + " 1 Age Range 237 non-null int64\n", + " 2 Head Size(cm^3) 237 non-null int64\n", + " 3 Brain Weight(grams) 237 non-null int64\n", + "dtypes: int64(4)\n", + "memory usage: 7.5 KB\n" + ] + } + ], + "source": [ + "data.info() # show basic stats" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Gender 0\n", + "Age Range 0\n", + "Head Size(cm^3) 0\n", + "Brain Weight(grams) 0\n", + "dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.isnull().sum() # no null values" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().any() # no duplicated data\n", + "# no further preprocessing needed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Variable extraction " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# We want to use the correlation of COMBINED yet only RELEVANT variables to determine our model\n", + "# therefore, we essentially split the data into our single dependancy...\n", + "y = data.iloc[:,-1].values # dependant" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# relevant variable(s) shown as: [\"Head Size(cm^2)\"]\n", + "x = data.iloc[:, 2:3].values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "model = LinearRegression()\n", + "model.fit(x_train, y_train)\n", + "predictions = model.predict(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing (the model's fit) (focus of notebook)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean squared error: 5264.955252408745\n", + "Mean absolute error: 59.995743221875\n" + ] + } + ], + "source": [ + "print(\"Mean squared error:\", mean_squared_error(y_test, y_test_pred))\n", + "print(\"Mean absolute error:\", mean_absolute_error(y_test, y_test_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model accuracy on training set: 0.669\n", + "Model accuracy on testing set: 0.669\n" + ] + } + ], + "source": [ + "train_accuracy = model.score(x_test, y_test) # or simply called score method to use the models inherent predictions vs a dataset / subset we give it \n", + "test_accuracy = model.score(x_test, y_test) # or simply called score method to use the models inherent predictions vs a dataset / subset we give it \n", + "print(\"Model accuracy on training set: {:.3f}\".format(train_accuracy)) \n", + "print(\"Model accuracy on testing set: {:.3f}\".format(test_accuracy))\n", + "# note: low percentage may because of a) use of a linear regression (straight line) algorithm, b) lack of data overall" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week5/exercise1b.ipynb b/Week5/exercise1b.ipynb index 70a0adc..906480f 100644 --- a/Week5/exercise1b.ipynb +++ b/Week5/exercise1b.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -40,7 +40,8 @@ "\n", "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", "from sklearn.linear_model import LogisticRegression # linear regression algorithm\n", - "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method\n", + "from sklearn.metrics import confusion_matrix" ] }, { @@ -52,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -76,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -96,18 +97,35 @@ "cell_type": "code", "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model = LogisticRegression()\n", - "model.fit(x_train, y_train)\n", - "predictions = model.predict(x_test)" + "model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing (focus of notebook)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Test accuracy using testing values" + "### Model score (testing for overfit / underfit)" ] }, { @@ -125,18 +143,12 @@ } ], "source": [ + "y_test_pred = model.predict(x_test) \n", "train_score = model.score(x_train, y_train) # test data we trained with\n", "test_score = model.score(x_test, y_test) # test data we set aside\n", - "print(\"train_score:\", train_score) # shows 91%, *shouldn't* be considered underfit\n", + "print(\"train_score:\", train_score) # shows 91%, *not* be considered underfit\n", "print(\"test_score:\", test_score) # shows 84%, *may* be considered overfit" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/Week5/exercise2.ipynb b/Week5/exercise2.ipynb index a8c23da..e10c930 100644 --- a/Week5/exercise2.ipynb +++ b/Week5/exercise2.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Notebook showing the evaluation of a trained linear regression model with regards to fit\n", + "# Notebook showing the evaluation of a trained linear regression model\n", "#### by Salih MSA" ] }, @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -33,7 +33,13 @@ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", - "%matplotlib inline" + "%matplotlib inline\n", + "\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.model_selection import cross_val_score" ] }, { @@ -45,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "metadata": { "scrolled": true }, @@ -65,11 +71,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "from sklearn.preprocessing import LabelEncoder\n", "le = LabelEncoder()\n", "df[\"class\"] = le.fit_transform(df[\"class\"])" ] @@ -90,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -100,12 +105,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "from sklearn.model_selection import train_test_split\n", - "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=100) # split dataset into train, test\n", + " # varying seed causes variations in the training set's performance (0=97%, 100=94%)" ] }, { @@ -117,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -126,13 +131,12 @@ "KNeighborsClassifier(n_neighbors=1)" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from sklearn.neighbors import KNeighborsClassifier\n", "model = KNeighborsClassifier(n_neighbors=1)\n", "model.fit(x_train, y_train) # learning takes place here" ] @@ -144,9 +148,16 @@ "## Testing (main feature of this notebook)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Comparing model scores" + ] + }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -154,7 +165,7 @@ "output_type": "stream", "text": [ "train_score: 1.0\n", - "test_score: 0.9736842105263158\n" + "test_score: 0.9473684210526315\n" ] } ], @@ -165,6 +176,174 @@ "print(\"test_score:\", test_score) # shows 97%, *shouldn't* be considered overfit" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "accuracy_score: 0.9473684210526315\n" + ] + } + ], + "source": [ + "y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with\n", + "accuracy_score_res = accuracy_score(y_test, y_test_pred)\n", + "print(\"accuracy_score:\", accuracy_score_res)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cross validation" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "result#0: 0.9736842105263158\n", + "result#1: 0.9736842105263158\n", + "result#2: 0.9459459459459459\n", + "result#3: 0.972972972972973\n" + ] + } + ], + "source": [ + "cross_val_results = cross_val_score(model, x, y, cv=4) # 4 fold cross validation\n", + "for i in range(0, len(cross_val_results), 1):\n", + " print(\"result#\"+str(i)+\":\", cross_val_results[i])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### BONUS: we can use cross validation to test the accuracy for a model with varying neighbours and then deduce the best value it" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "k_list = range(1,30)\n", + "avgs = []" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# go through 1-30, find the best number for k / number of nearest neighbours for KNN algorihtm\n", + "# we'll determine which model is best / most accurate using cross validation\n", + "for k in k_list:\n", + " model = KNeighborsClassifier(n_neighbors=k)\n", + " cross_val_results = cross_val_score(model, x, y, cv=4) # 4 fold cross validation\n", + " avgs.append(cross_val_results.mean()) # take the average result from the cross validation" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEWCAYAAACqitpwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABLNElEQVR4nO29e5wcV3nn/f3NpWc03aPLXC1LsiRLMwEBxgZhwhKwMOFdvGHxJV5iZwM4C3EukCXLehccNob1rpdsYMO7XLNmMcEvBNtxuDiJwXZ8AZOQxALfbdQ9liUk2e4ejW7dc+uZnuf9o6pmSq259Mx0T3f1nO/n05+pOudU1TnV0/XUeZ7nPI/MDIfD4XA4yklDtTvgcDgcjvrDCReHw+FwlB0nXBwOh8NRdpxwcTgcDkfZccLF4XA4HGXHCReHw+FwlB0nXBx1gaSnJe0pse0BSb88R90eSYfL2TeHYzXSVO0OOBzlwMxeUe0+OByOGdzMxeGoAvKoy9+fJPfS6nDCxVE7+Oqq6yQ9IemkpNsltYbq3yHpMUknJP2DpPOKjv1lf3uNpK9JOi7pWUn/eRZV1/lzXcc/xx9KOuqf99+GytdJulXSoKSDkv5LICQkfULS10Ntt0my4GEr6SFJN0n6e2AEOFfSNZL2S8pKej58raL+3C3pf4X2b5N0yxxtL5T0Y/8+vSjp85JiofpXSLpP0jFJaUl/6Jc3+uN+zu/PTyRtKR5HaCzv97evkfT3kj4jaQj4hKQdkh6QNOTfx29IWh86foukb/n3cSjoo9+nV4Xa9UgakdQ921gdtYsTLo5a413A24HtwHnANQCSLgBuAX4b6AT+D3CXpJZZzvFxYBtwLvA24DdKvY7PWUAXsAl4L3CzpF/w6z4HrPPPfRHwHuA3FzG+dwPXAu3AIPBZ4BIzawf+BfDYHMf9O+Ddki72BdCFwIfmaFsA/oM/hjcAbwV+D0BSO/B3wPeBs4GdwP3+cR8Grgb+FbDWv+ZIieN6PbAf6AVuAgR80r/Gy4EtwCf8PjQCfwMcxPueNgG3mVkeuI3Tv6+rgfvNbLDEfjhqBTNzH/epiQ9wAPiN0P6fAH/mb38J+G9F7fcBF4WO/WV/ez/wL0Pt3g8cLvE6e4BJIB6qvwP4I6ARyAO7QnW/DTzkb38C+HqobhtgQJO//xBwY6g+DpwAfhVYU8L9+VXgEHAU+KVF3Nc/AL7tb18NPDpHu33ApbOUnzaO0Fje729fA/x8gT5cFlwXT+ANhs8Xavd64OeA/P29wLuq/b/pPov/uJmLo9Z4KbQ9AiT87a3Af/RVPSckncB7Gz57lnOcjfcQDjg0S5u5rgNw3MyGQ/sH/XN2Ac3+frhu05yjOZPpvvjX+DXgd4AXJf2tpJfNc+xf4wm4fWb2o7kaSeqX9DeSXpJ0Cvgfft/Bu2fPzXHofHULcdo9ltTrq+6O+H34elEfDprZZPFJzOyf8L6PPf692AnctcQ+OaqIEy6OqHAIuMnM1oc+bWb2zVnavghsDu1vWeS1NkiKh/bPAV7AmzFM4Am6cN0Rf3sYaAvVnTXLuU8LQ25m95jZ24CNwM+AL8/Tr5uAZ4GNkq6ep92X/HP1mdla4A/x1FTg3cdz5zjuELBjlvJA0M43tuLw6v/DL3uV34ffKOrDOfMY/r/mt383cKeZjc3RzlHDOOHiiApfBn5H0us9RyvFJf2Kb0Mo5g7gekkbJG0CPriE6/1X38D8JuAdwF+aWcE/902S2iVtxbNTBEb8x4A3SzpH0jrg+vku4L/dX+oLsnEgB0zN0fbNeLad9+DZgT7nj2022oFTQM5/+//dUN3f4AmnP5DU4o/j9X7d/wX+m6Q+/x6fJ6nTPHvHEeA3fKP/v2N2IVTchxxw0u/nfwrV/TPeC8Af+99jq6Q3huq/DlyOJ2BuXeA6jhrFCRdHJDCzvcBvAZ8HjgMDnG6ED3MjcBh4Hs94fSfew7tUXvKv8QLwDeB3zOxnft3v473J7wd+BPwFnqMBZnYfcDvwBPATvAf5fDTgCacXgGN4DgK/W9xI0lq8h+wHzeyImT0MfAX4qiQVtweuA34dyOIJ5duDCjPL4jk5/Gt/nCngLX71n+IJz3vxhNNXgDV+3W/hCYgh4BXAPywwtv8KvAY4Cfwt8K1QHwr+9Xfi2VcO46kHg/pDwE/xZj4PL3AdR40SGM0cjrpF0u8CV5nZRdXui6M0fDfrF8zsv1S7L46l4RY7OeoOSRvx7Ao/BvqA/4g343FEAEnbgCuAC6rcFccycGoxRz0Sw1sHkwUeAL4LfLGqPXKUhKT/BjwFfMrMnq92fxxLx6nFHA6Hw1F23MzF4XA4HGVnVdtcurq6bNu2bdXuhsPhcESKn/zkJ0fNbN54b6tauGzbto29e/dWuxsOh8MRKSQdXKiNU4s5HA6Ho+w44eJwOByOsuOEi8PhcDjKjhMuDofD4Sg7FRUukt4uaZ+kAUkfnaV+q6T75WUEfEjS5lDdn0h6Wl4mwc8GMZQkvVbSk/45w+Ud8rLrpfy/Gyo5NofD4XDMTcWEi59t7gvAJcAu4GpJu4qafRq41czOwws2+En/2H8BvBEvQ+ArgdfhBfUDL5z4b+GF9ejDyyYI8FG8jHV9eJn1zhBmDofD4VgZKjlzuRAYMLP9NpO+9NKiNrvwwnMAPBiqN6AVL4xHC16CprQfM2qtmf2jeaEFbsXLcId/7Nf87a+Fyh0Oh8OxwlRSuGzi9Ox0hzkzY9/jeAHqwMvf0O7nj/gxnrB50f/cY2bP+scfnuOcvWb2or/9El4u7zOQdK2kvZL2Dg6ujrTc/zBwlGQ6W+1uOBwV467HX2Aot5isCo5KU22D/nXARZIexVN7HQEKknYCL8fLJrgJuNhP2lQS/qxm1qBpZnazme02s93d3fMuMK0b/uD2x/iT7/9s4YYORwR54cQo//6bj/KNf/p5tbviCFFJ4XKE09PLbmYmHSwAZvaCmV1hZhcAH/PLTuDNYv7RzHJmlgO+B7zBP37zHOcM1GZByPVM2UcUQU6M5Mlkx9nnZi6OOiWVyZ3211EbVFK4PAL0SdouKQZcBdwVbiCpS1LQh+vxM/rhZae7SFKTpGa8Wc2zvtrrlKRf9L3E3oMXTh3/3O/1t98bKl/VDPg/uEPHRhnJT1a5Nw5H+Un5L04p9wJVU1RMuJjZJF7u8nuAZ4E7zOxpSTdKeqffbA+wT1ISz0Zyk19+J/Ac8CSeXeZxM/trv+738HJ9D/htvueX/zHwNkkp4Jf9/VVP+G1uwL3ZOeqQ4P96/9FhJgtTVe6NI6CigSvN7G7g7qKyG0Lbd+IJkuLjCsBvz3HOvXjuycXlQ8Bbl9nluiOVnhEoyXSO8zavr15nHI4KELxA5SenOHR8lO1d8Sr3yAHVN+g7Kkwqk+XlG9cSa2pwagNH3WFmpNJZXrlpLeBUY7WEEy51zkAmx8vOamdHd8K5IzvqjsHsOKfGJrnklRsBZ9SvJVZ1Ppd6Jzs2wYsnx9jZk2DKjL0Hjle7Sw5HWQnsLedvWc/Z61p5zgmXmsHNXOqY5waHAejrSdDf286RE6MMjzuPMUf9EMxU+noS7OxtdzOXGsIJlzom0D/39bbT15PwytyPz1FHpDJZ2lub6G5vYWd3goFMjqmpWddPO1YYJ1zqmIFMjlhTA1s2rKG/tx3A2V0cdUUqnaOvJ4Ek+noTjE4UOHJitNrdcuCES12TyuQ4tytOU2MDWzraaHEeY446YyCTo6/He3EKZuduPVdt4IRLHZPKZNnp/+AaG8TOngTJtPvhOeqDodw4Q8N5+nq9//Gd06pf9wJVCzjhUqeM5Cc5fHx0+q0OoL+33c1cHHVDMEMJhMr6thjd7S2nLRx2VA8nXOqU/YPDmDH9Vgfe9gsnx8iOTVSxZw5HeRgY9D3FemdeoPp6Es5ppUZwwqVOGQi5aAb0+7MY9+Nz1AOpdI54rJGz17VOl/X1JHguk8PLuuGoJk641CmpTJamBrG1cybOUjCLcaoxRz0wkMmxw/cUC9jZkyA7Pkn6lEscVm2ccKlTUukc27rixJpmvuItG9pobW5wRn1HXRB2WAnYOT07dy9Q1cYJlzrFc9E8/YfXMO0x5n54jmhzcnSC9Knx0xxWIDw7dy9Q1cYJlzpkfLLAgaHhM4QLeHYX98NzRJ3ZbIoAnfEYG9qanV2xBnDCpQ55/ugwUwY7ZhEufb3tvHRqjJOjzmPMEV0GMkFoo9P/xyXR19M+Xe+oHhUVLpLeLmmfpAFJH52lfquk+yU9IekhSZv98rdIeiz0GZN0mV/3cKj8BUnf8cv3SDoZqruh+HqrhWBmUqwyAOjvDVYxux+fI7oMZHK0NDWweUPbGXU7ez13ZOcxVl0qFnJfUiPwBeBtwGHgEUl3mdkzoWafBm41s69Juhj4JPBuM3sQON8/TwdeSuN7AczsTaFr/BXw3dD5Hjazd1RqTFEhlcnRIDi3+8yMfDMxxnK8dmvHSnfN4SgLqUyOHd0JGht0Rl1fT4ITIxMMDefpSrRUoXcOqOzM5UJgwMz2m1keuA24tKjNLuABf/vBWeoBrgS+Z2Yj4UJJa4GLge+Us9P1wHOZHOd0tNHa3HhG3ab1a1jT3OiM+o5Ik0rnzvAUC5gOA+Nsi1WlksJlE3AotH/YLwvzOHCFv3050C6ps6jNVcA3Zzn/ZcD9ZnYqVPYGSY9L+p6kV8zWKUnXStorae/g4GCJQ4kWnovmmSox8DzG+noT7ofniCzD45McOTE6q8MKzKiDneq3ulTboH8dcJGkR4GLgCNAIaiUtBF4FXDPLMdezelC56fAVjN7NfA55pjRmNnNZrbbzHZ3d3eXZRC1xERhiuePDp9h6AzT19PuZi6OyPLcdNiX2f/He9e20N7S5DzGqkwlhcsRYEtof7NfNo2ZvWBmV5jZBcDH/LIToSbvAr5tZqe5NknqwlO7/W3oXKfMLOdv3w00++1WFQeHRpgo2JxvdeAZ9TPZcU6OOI8xR/QIZt1zzc4leUZ9NzuvKpUULo8AfZK2S4rhqbfuCjeQ1CUp6MP1wC1F5yienQRcCfyNmY2FznWW/DgQki7EG9tQWUYSIQJVwFz6aAgZ9Z3awBFBBgZzNDeKrZ1neooFuACW1adiwsXMJoEP4qm0ngXuMLOnJd0o6Z1+sz3APklJoBe4KThe0ja8mc8PZjn9bHaYK4GnJD0OfBa4ylahL2Lwtrajex61mK9OcKoxRxRJpXNs74rT3Dj346uvp52juXFOjORXsGeOMBVzRYZp9dTdRWU3hLbvBO6c49gDnOkAENTtmaXs88Dnl97b+iCVybFp/RriLXN/tZvWryEea3RqA0ckGchk2XX22nnb7Jxez5Vj9zbncl8Nqm3Qd5SZgUxuXmM+BDppZ9R3RI+xiQI/PzYyp70lYGd3kJXSvUBVCydc6ojClPHc4JkBK2ej36U8dkSQ/YNeaKOF/seD9Vxudl49nHCpIw4fH2F8cmrWsC/F9Pd6Ounjw04n7YgOqTliihUTRAB3oferhxMudcS0i+YCPzxwRn1HNBnwQxtt7zoztFExfT2J6ejJjpXHCZc6ItAvz+eGHDDjjux+fI7oMJDJsa0zTkvTmaGNitnZm+DFk2Nkx9x6rmrghEsdkcpk6V3bwtrW5gXbblzX6q1idjMXR4RIZeaOKVbMTBgY9wJVDZxwqSO87JML21tgZhWzU4s5okJ+cooDR4cXIVxm3JEdK48TLnXC1JQxsIi3OnBZKR3R4uDQMJNTtqAxP2BLRxuxpgYnXKqEEy51wounxhjJF0r+4YFn1B8azjOUG69gzxyO8pCaTm1c2uy8sUGc2xV3a12qhBMudUJgOyn1hwenJw5zOGqdVDqHNH9oo2L6etudO3KVcMKlThiYfqtbhFrMFy7ux+eIAqlMls0b1rAmtrCnWEBfT4LDx0cZyU9WsGeO2XDCpU5IpXN0JWJsiMdKPqZ3bQvtrU3OqO+IBItxWAno60lg5q3sd6wsTrjUCalMdlHqAvA8xvp7251azFHzTBam2H90eFEzc5hZLOxm5yuPEy51gJmRKiFg5Wz09yZIpbOswuwEjghx6Pgo+ckpdixSuGztjNPUIOcVWQWccKkDMtlxsmOTi1YZgOcAcHxkgqM5F2PMUbvMOKwsTrg0NzawvSvu3JGrQEWFi6S3S9onaUDSR2ep3yrpfklPSHpI0ma//C2SHgt9xiRd5tf9uaTnQ3Xn++WS9Fn/Wk9Iek0lx1ZLLMWYHzBt1Hd2F0cNs5jQRsXsdDHGqkLFhIukRuALwCXALuBqSbuKmn0auNXMzgNuBD4JYGYPmtn5ZnY+cDEwAtwbOu4/BfVm9phfdgnQ53+uBb5UkYHVIIFgKCVgZTH9LoClIwIMZHJeyKISQhsV09eT4MDQMOOThQr0zDEXlZy5XAgMmNl+M8sDtwGXFrXZBTzgbz84Sz146Yu/Z2YjC1zvUjxBZWb2j8B6SRuX3v3okMrkWLemme5Ey6KP7W5vYd2aZhfA0lHTpDLZJc1aAHb2tjNl8PxR5zG2klRSuGwCDoX2D3Nm2uLHgSv87cuBdkmdRW2uAr5ZVHaTr/r6jKTgiVrK9eqSVMZLECZp0cd6HmMJpxZz1CxTU8ZzmeEl2RRhRl3sjPorS7UN+tcBF0l6FLgIOAJMz139mcergHtCx1wPvAx4HdABfGQxF5R0raS9kvYODg4us/u1wWJjihXT57sjO48xRy1y5MQooxOLC20UZntXnAa5lMcrTSWFyxFgS2h/s182jZm9YGZXmNkFwMf8shOhJu8Cvm1mE6FjXvRVX+PAV/HUbyVdzz/+ZjPbbWa7u7u7lzy4WmEoN86x4fyyhEt/T4KToxMMZl2MMUftMbAMYz5Aa3MjWzvjDLi1LitKJYXLI0CfpO2SYnjqrbvCDSR1SQr6cD1wS9E5rqZIJRbYUeTpgC4DnvKr7gLe43uN/SJw0sxeLON4apLpYH69S1MZgIsx5qhtggWQOxe5SDiM8xhbeSomXMxsEvggnkrrWeAOM3ta0o2S3uk32wPsk5QEeoGbguMlbcObifyg6NTfkPQk8CTQBfx3v/xuYD8wAHwZ+L0KDKvmSC3DDTmgb1q4uDc7R+3hhTZqWVRoo2J29iR4/ugwE4WpMvbMMR9NlTy5md2N99APl90Q2r4TuHOOYw8wi0HezC6eo70BH1hGdyPJc5kc8VgjG9e1LvkcXYkYG9qaXYgMR00SOKwsh76eBBMF4+DQyLJUyI7SqbZB37FMUpksO3vbl+QpFiBp2qjvcNQSZl4SvKUa8wNmUh67F6iVwgmXiJNKL/+tDrzFlEkXY8xRY6RPjZMbn1z2//iOnjjg3JFXEidcIszJkQky2fEyCZd2smOTpE85jzFH7TBtzF/iGpeAtlgTmzesce7IK4gTLhFmYDD44S1fuARqA2fUd9QSwUyjPP/jCSdcVhAnXCJM8MNb6srlMC7GmKMWSWVyrG9rpiuxdE+xgL7edvYP5ihMOdXvSuCES4RJZXK0NjewacOaZZ+rM9FCZzzmdNKOmmIgk11yaKNidnYnGJ+c4vDxhcIUOsqBEy4RZiCTY0d3gsaG5f/wwMval3TeNI4awcxIpnPLtrcEBFHD3QvUyuCES4QZKIP/f5j+3nYGXIwxR41wNJfn5OhE2f7HA7uNs7usDE64RJTc+CRHTowuK+xLMX297WTHJ3nx5FjZzulwLJXpJHjLXOMSsLa1mbPWtrrFwiuEEy4R5bllBvObjf4eZ9R31A7BgsdyOKwE9PW6GGMrRUXDvzgqx3LSvs5FMAv6o+8+RWd84cRjscYGbrzsFbzsrLVl64PDEZDK5Ei0NNG7dvFJ8OZiZ0+C2x85xNSU0VAmW6VjdpxwiSipTJbmRrG1o61s5+yIx3jfL20vWSf9w+Qg9z2drjvhYmaMT1Y3wGGDRKwpGoqFSt2vfS952SfL4SkW0NfTzki+wIunxti0fvlelo65ccIlogykc2zvitPUWN4H0B+9Y1fJbd/4xw/UpXH0D25/jO8+9kJV+9DYIP7i/a/n9ecWJ2ZdOj9IDvKH33qSe/7Dm0m0lO+nf9PfPsv//dHzZTtfmHft3lzW8wUz/YFMzgmXCuOES0RJZrKct3l9VfvQ11ufK57/fmCIC85Zz9t29Vbl+mP5Ap99YIB96WxZhcuTh09w5MQoP3vxFLu3dZTtvD8aOEp/b4LLLihvVnEh3nHexrKeM4genjnlnFYqjRMuEWQkP8mhY6Nc+ZotCzeuIH09CX783BCFKSvbWptqc3w4z9HcONe+eTvXvnlHVfowWZjisw8MMJTLl/W8Q8Pe+ZLpXNmEy2Rhiv2Dw1zzxm383p6dZTlnJenwc8IE98JROaKh1HWcRuDt0l8mF82l0tfTXncrngNPuXK6eC+WpsYG1rc1c6zMD8Bj08KlfN6AB4+NkC9MlXW9VSVpizXS0tRQ9nvrOJOKChdJb5e0T9KApI/OUr9V0v2SnpD0kKTNfvlbJD0W+oxJusyv+4Z/zqck3SKp2S/fI+lk6Jgbiq9XLwR5V6r5AIT6XPGcnBbc1b23HfFYxYRLOdd5pHxBVe37VSqS6IzHyj4rdJxJxYSLpEbgC8AlwC7gaknF1uJPA7ea2XnAjcAnAczsQTM738zOBy4GRoB7/WO+AbwMeBWwBnh/6HwPB8eZ2Y2VGVn1SaWzxBob2NZZPk+xpVCPK55T6SyJlibOXkZmz3LQFW/haK686Q+O5mbUYuUiWcaoxStFZ6KFY8MutUSlqeTM5UJgwMz2m1keuA24tKjNLuABf/vBWeoBrgS+Z2Yj4KVONh/gn4HyupNEgGQ6y7nd5fcUWyxrW5vpXdtSVyuek+nyu78uhcrMXMaRYDA7zomR8pw7mc6yecMa4mX0Pqs0HfGYs7msAJV8Om0CDoX2D/tlYR4HrvC3LwfaJRW7x1wFfLP45L467N3A90PFb5D0uKTvSXrFbJ2SdK2kvZL2Dg4Olj6aGiKZzlVdJRbQ19NeVyueU+lc1W1ZAB2J8goXM+PYcH56TVK5Zi/e/aqN/8VScWqxlaHaBv3rgIskPQpcBBwBCkGlpI146q97Zjn2i8APzexhf/+nwFYzezXwOeA7s13QzG42s91mtru7u7tsA1kphv2YYv01oobY2eOF05iqgxwZQ7lxhobzNfGw7IzHOD6SL9t9zY5PMlEwfvFcz0usHEb9icIU+48uP7/9SlOJWaHjTCopXI4AYV/ZzX7ZNGb2gpldYWYXAB/zy06EmrwL+LaZTYSPk/RxoBv4cOhcp8ws52/fDTRL6irfcGqDVKY2jPkBfb0JRvIFXjg5Wu2uLJtacZQA7wE4ZXBidGLhxiUQvKm/4ux1JFqapg3xy+Hg0DATBaO/jLG/VoKORIzRiQKj+cLCjR1LppLC5RGgT9J2STE89dZd4QaSuiQFfbgeuKXoHFdTpBKT9H7gXwJXm9lUqPws+YpySRfijW2ojOOpCZLT3jm18bYYBBWsB9VYYDuqhXvbmfDiaQ2VyagfGLC7EjF29iTKohYLzlELM73F0OXHzRtyRv2KUjHhYmaTwAfxVFrPAneY2dOSbpT0Tr/ZHmCfpCTQC9wUHC9pG97M5wdFp/4zv+2Pi1yOrwSekvQ48FngKqvDxCSpdJZYUwNbO+PV7grA9PqGehAuyXSW9pYmzlpbXU8x8NRiUL7FfsHMpTPeQn9voixOGMl0FilanmIQWkjp7C4VpaIuHr566u6ishtC23cCd85x7AHOdADAzGbts5l9Hvj8MrobCZLp8mafXC4b4jG6EvWRHtlzlKi+pxjMPADLZRsIztORiNHf284dew9zbDg/fZ2lkErn2LKhjTWxxrL0caXoSJT33jpmp6SZi6RvSfqVkArLUSVS6WxNqG3C7Oguz5twNTEz/97Whoqn7DOX4WDmEpu2KS3XqJ+swf/FUij3vXXMTqnC4ovArwMpSX8s6Rcq2CfHHGTHJnjh5FjNPAADggCWUdZCHs3lOT4yURPGfPBmhADHyqS6GcrlaYs10trcOC0QlmPUz09O8fzR4Zq5X4thZlbobC6VpCThYmZ/Z2b/FngNcAD4O0n/IOk3g/Arjsoz7SlWYzruvp52smOTZLLR/bGmasxRormxgXVrmstmdD42PE6nrw46a20r7S1NyzLqHxgaZnLKauZ+LYZESxOxpgY3c6kwJau5/MWN1+CFW3kU+N94wua+ivTMcQa1GscpEHZRtrska/DedpZxJfnQcJ4O30tKEn29iWWpxaYDfEbMDRlcfLGVolSby7eBh4E24F+b2TvN7HYz+30geq8uESWZztHS1MCWMmafLAdBAMuBCNtdkpkca1ub6GkvX0rd5dIRj5VNLXZsOD9tawBPiC4nJlwynaMhgp5iAW4hZeUpdebyWTPbZWafNLMXwxVmtrsC/XLMQhD3qlY8xQK6Ey2sW9Mc6QCWgTG/FjzFAsr5ACz2DOvrbeeYn7tmKaTSWc7paKO1OVqeYgEuvljlKVW47JK0PtiRtEHS71WmS465qNU4TpLo64luVkozq6l4bQGdiZay2FzMjKFcftrmAjO2paWqxpLpbM3dr8XQ5SIjV5xShctvhcOymNlx4Lcq0iPHrJwcneClU2M1G8eprzcR2YWUg9lxTo5O1Jxx2osvNrHs+GK58Unyhakz1GKwNDvZ+GSBA0MjNXe/FkM5VY6O2SlVuDQqpC/wc7UsffWVY9EE9oxajeO0ozvBseF82cKVrCS1GsakIx6jMGWcXGZ8sekFlPEZe1JPewtrW5uWNHN5/ugwhSmrufu1GDriMYbzBcYmXHyxSlGqcPk+cLukt0p6K168r+8vcIyjjNTqAzAgUJFEUTU2k9q4tt7EAzXWcm0D4QWUAZI8o/4SZi7TAT5r9EWnFNxCyspTqnD5CF4yr9/1P/cD/7lSnXKcSTKdZU1zI5s3rKl2V2alL8JZKVOZLOvbmulO1I6nGJQvBEzgclsc6qWvt51kJrvoxa+pdJYGwbndtRHfbil0lHmRquNMSoot5kcf/pL/cVSBVDrHzp4EDTXmKRawcV0r8VgjA2UI5b7SJNM5+ntqy1MMvCCTsPzIyIHhOmzQB8+o/81/nmAwN05Pe+nBOpPpLNs645H1FINQ1Gln1K8Ypa5z6ZN0p6RnJO0PPpXunGMGzzunttQ2YSSxs7edgcFozVw8T7HavLflV4udPjNbqlE/lY5egrBiOl1k5IpTqlrsq3izlkngLcCtwNcr1SnH6ZwcmSCTHa9Ze0tAX08icqv006fGyY5N1uS93dBWHrXYsVyeNc2NZ0Qv7luCO/LYRIEDQ8M1eb8Wg4uMXHlKFS5rzOx+QGZ20Mw+AfxK5brlCJOsoSRW89HXkyCTHefkSHmyJ64EtWrMB4g1NdDe2rR84TJHaP3uRAvr25oXFWNs/+AwU1Yb2TqXQ3tLE82Ncgb9ClKqcBn3w+2nJH1Q0uW4sC8rRlTiOAWhQAYGo2N3qcWYYmHKEV/s6HD+DHsL+B5jPe2Lio5cS9k6l4MkPwKCs7lUilKFy4fw4or9e+C1wG8A713oIElvl7RP0oCkj85Sv1XS/ZKekPSQpM1++Vv8LJPBZ0zSZX7ddkn/5J/zdj+FMpJa/P0Bv35biWOreVLpHG2xRjatr01PsYBA+EVJNZZK5+iIx+iqMU+xgM5ES1kM+p1zJAULAliW6jGWTGdpbBDbu6LrKRbQGW9xarEKsqBw8RdM/pqZ5czssJn9ppn9qpn9YwnHfQG4BNgFXC1pV1GzTwO3mtl5wI3AJwHM7EEzO9/MzgcuBkaAe/1j/ifwGTPbCRwH3ueXvw847pd/xm9XFyTTWfpq2FMsYNOGNbQ2N0TKHTmZydZcCoMw5YgvdiyXP20BZZj+3nZOLSJdQjKdY1tnGy1N0fUUC+hMxDjqDPoVY0HhYmYF4JeWcO4LgQEz229meeA24NKiNruAB/ztB2epB7gS+J6ZjfhRAi5mJjXy14DL/O1L/X38+reGowpEmVqMezUbjQ3ys1JGQ7iYGQM1Gq8tYLlqMTNjaA61GCzeqF9L2TqXi4uMXFlKVYs9KukuSe+WdEXwWeCYTcCh0P5hvyzM40BwnsuBdj9vTJir8CICAHQCJ8xscpZzTl/Prz/ptz8NSddK2itp7+Dg4AJDqD7H/ci1UdFx9/UkeC4iwuXFk2Nkxydr+t52xGMcH84vOcvnSL7A+OTUrAZ9mLE1lWLUH5socPDYSCRedErBCZfKUqpwaQWG8GYN/9r/vKMM178OuEjSo8BFwBFgOtiPpI3Aq4B7ynAtAMzsZjPbbWa7u7u7y3XaijHjzRSNH3RfbztHToySG59cuHGVicK97YjHmJwyTo0u7X7OtTo/oCvRQkc8VpJRfyCTwyz6xvyAzniM3Pgk45MuvlglKHWF/m8u4dxHgC2h/c1+Wfi8L+DPXCQlgF8NR18G3gV828wC39YhYL2kJn92Ej5ncL3DkpqAdX77SJPM1HZMsWICj7HnMjlevWV9dTuzAKkaj9cGTDsaHB0eZ13b4jOKByvQu+ZQi4E32yxFLTbjKVa792sxBKv0jw3n2biutp1lokipK/S/KumW4s8Chz0C9PneXTE89dZdReft8l2cAa4His95NTMqMczTDTyIZ4cBz2Ptu/72Xcx4sF0JPGBL1SXUEKl0lkRLE2evKz08RzWJUoyxZDpLVyI251t9LbDc+GKzRUQuJghgudDPJZnO0dQgtnVG31MMZu6tW6VfGUpVi/0N8Lf+535gLTDv08OfWXwQT6X1LHCHmT0t6UZJ7/Sb7QH2SUoCvcBNwfG+K/EW4AdFp/4I8GFJA3g2la/45V8BOv3yDwNnuD5HkSD7ZFR8E87paCPW2DD9llvLJDO5ml87tNwH4GwRkYvp702QHZ/kpVNj854rlc6yvStOrKnUx0Zt4yIjV5ZS1WJ/Fd6X9E3gRyUcdzdwd1HZDaHtO5nx/Co+9gBnOgBgZvvxPNGKy8eAf7NQn6JGKp3jrS/vqXY3SqapsYHtXXEGanyti+cpluXK126udlfmpXOZYUoWsrnAjM0pmc7Nqx5KZXK88ux1S+pHLTIzK3QLKSvBUl9B+oDoPPEiylBunKHhfOR03Dt7a98d+ciJUYbzhZo25sPyH4DHhsdpbW6gLTb3upSZAJZzzzZH8wV+fmykJsPkLJXpyMhOLVYRSrW5ZCWdCj7AX+OppxwVZDopU40/AIvp60lw6PhITWf5i4IxH6ClqZH2lqYlL/YbGs7TGW+ZV63qRSiIzWvUf24w8BSr7fu1GNa2evHFnDtyZShVLVY//1ERIqpxnPp62jHzHkivqFE1ykxMsdq/tx2Jpa/HmCtoZTF9Pe3zrnWJ0v0qFUlsaIu5mUuFKHXmcrmkdaH99UGsL0flSKaztLc0cdbaaHiKBQSqk4EaVo0l0zm621tY31a7nmIBy1nsV6pw6e9N+OtYZvcYS6ZzNDeKrXXiKRbQUYbAoI7ZKdXm8nEzOxns+GtRPl6RHjmmSfpJmaLiKRawrTNOY4NqOoBlKpONzFv4ckLADOXy83qKBfT1tpMbn+SFk7N7jKXSWc7tStDcWB+eYgGdCRcZuVKU+p8yW7uSVGqOpWFmkY3jFGtqYFtnW826I09NmZdNscbdkAO86L1LewAODY/PGVcszEwYmNm/s2SmNrN1LhcXGblylCpc9kr6U0k7/M+fAj+pZMdWO0dzeY6PTETOmB+ws6d2PcaOnBhldKIQGcEd2FwWuyZ4JD/J2MTUvAsoA4JZ3GweYyP5SQ4dG43M/VoMHXFnc6kUpQqX3wfywO140Y3HgA9UqlOOmR95VFQ3xfT1tHNwaKQm4zZFzTjdGY8xUTBOjS0uvljw0CxFLba+LUZ3e8usRv2B6RBE0bhfi6EzHiPr4otVhFK9xYapkxXvUaHWMyQuRF9vgsKUceDoCL9wVm2NIWou3uEQMOvWlB5fbCb0S2lOC/29iVlnLlG7X4uhw1cZHh+e4Kx10c9RU0uU6i12n6T1of0NksoWqdhxJslMjrWtTfS012aGxIWYTnlcg6qxVDpL79qWRT2oq8lSF1IGQSs7SrC5gDfbTGVyTE2drn5LpbPEGhvY2tG2qOtHgZkQMM6oX25KVYt1haMVm9lx3Ar9ihIY86PmKRawozuBRE0a9ZOZaDlKTEdGXqRtIFCLdZVgcwFvljySL3DkxOhp5cl0lnO74zTVmacYnB4Z2VFeSv1vmZJ0TrDjB5WMfMThWsXMIpN9ci5amxs5p6Ot5oz6U1PGQAQCVoZZamTkabVYiTOXaaN+0QtBssazdS4HFxm5cpQqXD4G/EjS/yfp63iRiq+vXLdWN4PZcU6OTkTegNrXk6i5AJZeWJqpSN3b5QiXWFMD8XniioXpmyUr5fD4JEdOjEbqfi0GFxm5cpQkXMzs+8BuYB9efpX/CIzOe5BjySQjEvdqIXb0JNh/NMdkYaraXZkmisbp1uZG4rHGRb9dH/UXUJaqWl23ppnetS2nrXUJZp5Rul+LYW1rM40NcgspK0BJ3mKS3g98CC/z42PALwI/xkt77CgzM+l3o/222NfTzkTBOHhshB3dtTGWqN7bjiWsJD82PL7oRGhB4rCAqHstLkRDgxdfzNlcyk+parEPAa8DDprZW4ALgBMLHSTp7ZL2SRqQdIYrs6Stku6X9ISkhyRtDtWdI+leSc9Kesa38yDpYUmP+Z8XJH3HL98j6WSo7obi60WFVCbL+rZmuhPR9BQLmM5KWUOqsVQ6y8Z1raxtjYanWEBnvGXRqptjw/lpg3Wp9PW0MxDyGEuls7Q0NXBOHXqKBXQl3ELKSlBqCJcxMxuThKQWM/uZpF+Y7wBJjcAXgLcBh4FHJN1lZs+Emn0auNXMvibpYuCTwLv9uluBm8zsPkkJYArAzN4UusZfMZPmGOBhM3tHiWOqWZLpHP090fUUC9gx7Y6cBc6qbmd8ouoo0RmP8eIccb/mYmg4z7mLnDH29yYYnShw+Pgo53S2kUzn2NGdoLEh2v+L8+GCV1aGUmcuh/11Lt8B7pP0XeDgAsdcCAyY2X4zy+Ot7L+0qM0u4AF/+8GgXtIuoMnM7gMws5yZjYQPlLQWTy33nRLHEAk8T7H6iOOUaGli0/o1NbPWpTBlPDeYo78nevd2KZGRS42IHKavKMaY5xIfvfu1GJYTddoxN6Ua9C83sxNm9gngj/Dy1V+2wGGbgEOh/cOcmbb4ceAKf/tyoF1SJ9APnJD0LUmPSvqUPxMKcxlwv5mdCpW9QdLjkr4n6RWzdUrStZL2Sto7ODi4wBBWnvSpcbJjk3Wj466lGGM/PzbC+ORUJO/tYuOLjeYLjOQLSxAuniBJZrJkxyZ44eRYJGd6i6EzHmMo5wz65WbRq6LM7Admdpc/G1ku1wEXSXoUuAg4AhTw1HVv8utfB5wLXFN07NV4nmsBPwW2mtmrgc8xx4zGzG42s91mtru7u7sMQygvUTU4z0Vfj5cnpDBV/WVRUb63nfEY+cIUufHS4osFK85LiSsWZm1rMxvXtZJK56ZfCqIojBdDR7yFU2OT5Cdrx6uxHqjkktsjwJbQ/ma/bBoze8HMrjCzC/DW0gS5Yg4Dj/kqtUk8QfGa4DhJXXhqt78NneuUmeX87buBZr9dpKg375y+3gTjk1McOV59z/XUtHCJ3r3tjC8u33ug5lmsQR+8+5NMZyMfPLVUgpQEx0ecaqycVFK4PAL0SdouKQZcBdwVbiCpS1LQh+uBW0LHrpcUTC0uBsKOAFcCf2Nm0xZOSWfJt4BLuhBvbENlHlPFSaVzfk7zaHuKBQQxxmohDEwynWPT+jUkWqKXiihYZV+q4XlokUErw/T7s82fvZSltbmBLRvq11MMQgspncdYWamYcPFnHB8E7gGeBe4ws6cl3SjpnX6zPcA+SUmgF7jJP7aApxK7X9KTgIAvh05/FaerxMATOE9Jehz4LHCVLTYBRg2QzGSnXXjrgZ3d3iyhFuwuUXaU6FzkKv1jiwi3X0x/bzvjk1M88LMMO3sSNNSxpxgsPQKCY34q+grnq6fuLiq7IbR9J3DnHMfeB5w3R92eWco+D3x+Gd2tOmbGQDrHZRcU+z1El3VtzfS0t1R9rctkYYr9g8O8ub/27GylsNjIyIuNiBwmEMAHh0a4oo7+F+eiM+EiI1eC+gtzGmFePDlGdnyy7nTcfb0JBgarK1wOHhshX5iK7Kxw2uayCLVYrLGB9iWoAMM2qSjapxZLcG/dzKW8OOFSQyQjbHCej76edgbS2UWn6S0nqYg7SqyJNdK2iPhix3LeGpelLMQN1idB/RvzwYup1tggZ3MpM9GzbNYxqToJWFnMzp4Ew/kCL54c42z/oVUOhscn+fFzQ0yVILTufSY93ZeospjFfktZQBmmrzfhR0Our//F2fDiizWvqlX69z79Ejt7EouO4LAYnHCpIZLpLF2J2LIeCrVIoIpKprNlFS7/54f7+ez9qZLb7+xJEI+gp1hA5yLClBwdzk/bEpbCa87ZwNMvnJqewdQ7nuBeHTaXycIUH/iLn/K+XzqXj17ysopdJ7q/tDokGbEkVqUSqPlS6Rx7fqF8CUyfeeEk27vifO7qC0pqH/UHZUc8xmCJK8mPDY+zrXPpLsS/u2cH73nD1rr3FAtYTSFgDh4bYaJgFbc/OuFSI3ieYlmufO3mhRtHDG/dTuy0PCHlIJnO8apN63jlpnVlPW+t0ploYd9Lpd3DY7n8tKF6KTQ3NrC+rb5m0PPRmWjh2RdPLdywDgjU75VWETuDfo1w5MQow/lC3RnzA/p62kmWca3LaL7AoeMjkV23shQ64zGOlhBfbGyiwHC+sCy12GrDiy+2OmYuA/6C5h1OuKwO6tWYH9Dfmyirx9hAJodZ/d6v2eiIx8hPTjGcL8zb7tgyVuevVjriMU6OTjBRQ1lTK8VAZmUiVTjhUiMk6zyOU19vO8P5AkdOlCfGWL3fr9mYXki5wBt28AbuhEvpBJEMVkN8sVQmtyJek0641AjJdI7u9pa61XP3h4z65SCZydLcKLZ2xstyvihQ6krypUZEXs10rJKFlIUpYyCTW5HFxE641AipTH0nZQrGVi6jfiqd49yuBM2Nq+dfuNSV5MuJiLxaCQT3QrPCqHPk+Cjjk1MrYqtcPb/MGmZqykil69MNOWB9W4zu9haS5Zq5RDgI5VLpKDF6r7O5LJ5glne0zmcuQXTynSvwrHHCpQY4cmKU0YlC3Run+3sTZQm9Pzw+yeHjq2P1eJjOEsPuH83laW4Ua1vdSoNSmbFn1fdCyiA6ubO5rBJWi3G6r6edVDrH1DKzUg5MZ0is7/tVTFusidbmhgVXkh8bHmdD29Liiq1W1rfFkOrf5pJK5+hpb2HdmuaKX8sJlxogUBXV6xqXgP7edkYnlu8xVq8BPkuhM96y4MxluXHFViONDWJDW+nhdaLKQGbl1MlOuNQAqXSW3rUr8zZRTcpl1E9lcsQaG9jaUd8ZEmejM7HwYr+h4XzdZDJdSep9IaVZ4Cm2Mi9lFRUukt4uaZ+kAUkfnaV+q6T7JT0h6SFJm0N150i6V9Kzkp6RtM0v/3NJz0t6zP+c75dL0mf9az0h6TWVHFs5SWayq8J+EMw0lmvUT6aznNsdp2kVeYoFlBIDy81clka9xxd78eQYw/nCikUGr9ivU1Ij8AXgEmAXcLWkXUXNPg3cambnATcCnwzV3Qp8ysxeDlwIZEJ1/8nMzvc/j/lllwB9/uda4EtlHlJFmJpa2beJarJuTTO9a1umc6sslVQ6tyqE8WyU8gAcyjnhshQ6E7G6zkYZGPNXKmFeJV/9LgQGzGy/meWB24BLi9rsAh7wtx8M6n0h1OSnOsbMcmY2ssD1LsUTVGZm/wisl7SxTGOpGIeOjzA2MbVqjNP9ve0kl+Exlhuf9POMrI77VYwXdn/uB+D4ZIHc+KRbQLkE6n3mklphW2Ulhcsm4FBo/7BfFuZx4Ap/+3KgXVIn0A+ckPQtSY9K+pQ/Ewq4yVd9fUZSoFwu5XpIulbSXkl7BwcHlz66MrFajPkBfT3tDGSW7jG20j+QWqMz0cLYxBQj+clZ690CyqXTGW/hxOgEk3UaX2wgk6MzvnL5oqqttL4OuEjSo8BFwBGggJcK4E1+/euAc4Fr/GOuB17ml3cAH1nMBc3sZjPbbWa7u7u7yzGGZTHj+bQ63sT7exOMTUxx+PjSPMbqPcDnQiy0kNLFFVs6nYkYZnB8ZKLaXakIqUyu4pGQw1RSuBwBtoT2N/tl05jZC2Z2hZldAHzMLzuBN+t4zFepTQLfAV7j17/oq77Gga/iqd9Kul4tkkpn2biulbWt9e0pFjBj1F+aaiyZztLS1MA5q9BTDGZWks/lMjszc3HCZbFML6SsQ9WYmZFKZ1fM3gKVFS6PAH2StkuKAVcBd4UbSOqSFPTheuCW0LHrJQVTi4uBZ/xjNvp/BVwGPOW3uQt4j+819ovASTN7sSIjKyPJdG5VqXiCGdpS7S7JTI4d3QkaV0mGxGJmHoCz210Ce4ybuSye6VlhHRr1B7PjnBqbrA/h4s84PgjcAzwL3GFmT0u6UdI7/WZ7gH2SkkAvcJN/bAFPJXa/pCcBAV/2j/mGX/Yk0AX8d7/8bmA/MOC3/b1Kja1cFKaM5wZz9K/gF15t1rY2s3Fd65KjI6fS9R3gcyGC4JULqcWcQX/xlBoYNIoEUS1W8kW2osGHzOxuvId+uOyG0PadwJ1zHHsfcN4s5RfP0d6ADyynvyvNz4+NMD45tersB3297UtSi50am+DFk2OraqZXzHT03nnUYk0NWjVq1nKy0L2NMivthgzVN+ivalabMT+gvyfBQCZHYZEeY6vdmA/QFmukpalhXpvLhniMhlWqNlwOG/z4YkfrcJV+KpNlbWsT3e0r50XohEsVWa1utf297YxPTnHo2EJLl04ntUoCfM6HpHnDlBzN5Z1KbIk0Noj1a5oXDAwaRVK+bXclg5k64VJFkumVyWVda/QtMcZYMp2jtbmBLRtWp6dYQEciNucD8NjwuDPmL4N6XUi5UtknwzjhUkVWY8IrmJmpBXrgUkllsuzsSax6lU9HvGVem4sTLkunM95Sd8Erh3LjDA3nVyymWIATLlVisjDF/sHhVWk/SLQ0sWn9mkXPXFLpHP2rIAbbQnTF5w4N7yIiL4/ORP3NXAZWMEFYGCdcqsTBYyPkC1MrPlWtFfp6E4uKjnxydIKXTq1uT7GAjjlsLvnJKbJjk27msgw65hHcUSVVBTdkcMKlaswYp1fnw7K/t53nBkv3GBvIOGN+QEcixuhEgdF84bTy4I3bCZel0xmPcXwkv2hPxlpmIJMjHmvk7HWtK3pdJ1yqRPDWvtJT1VqhrydBfnKKg0PDJbVPOjfkaTrnWEke7DtvsaXTEffii50YqZ/Zy0Amx86exIqnvXbCpUok01k2b1hDfJV5igX0LzJxWDKdZU1zI5vWr6lktyJBxxwryd3MZfl0JOpvlb7nCLPyL2VOuFSJ1ZzwCmZmbKUmDvP89J2nGMysJC+2Dbhw+8unyxfM9bKQ8uToBOlT41XxSnXCpQpMFKbYfzS3Kt2QA+ItTWzesIZkie7IyXR2VWTrLIXOOcLuu7hiy6ejzkLATHuKdTvhsio4ODTMRMFWvVttf297STOXkyMTZLLjzpjvM1dk5KHhcRobxLo1Lq7YUlko6nTUCBxh3MxlleCM0x59vQn2Dw4vmPkvmVndnnXFJFqaiDWeGV/s2HCeDW3NTnW4DDa0zZ8vJ2qk0jlamhrYXIWoFk64VIFkOou0ej3FAvp72skXpjgwNH+MsdUa4HMuJHmL/WZRiwVh4x1Lo7mxgfVtzfWjFhusXv4jJ1yqQCqdY8uGNtbEGqvdlaoSzEQWUo2l0p6fvvMUm2G2xX4u9Et5mGuRahQJHGGqQUWFi6S3S9onaUDSR2ep3yrpfklPSHpI0uZQ3TmS7pX0rKRnJG3zy7/hn/MpSbdIavbL90g6Kekx/3ND8fVqheQqT3gV4PneL+yOnExn2bnCEV1rndmEy9Bwftog7Vg6nfFYXWSjHB6f5MiJ0apFAamYcJHUCHwBuATYBVwtaVdRs08Dt5rZecCNwCdDdbcCnzKzlwMXAhm//BvAy4BXAWuA94eOedjMzvc/N5Z7TOUgPznF80eHXRgTYE2skS0b2hZMeZxMr65snaXQGT8zMvJQbtx5ipWBeomM/NxgsFC7Os+aSs5cLgQGzGy/meWB24BLi9rsAh7wtx8M6n0h1ORno8TMcmY24m/fbT7APwObiRAHhoaZnDI3c/Hp703MqxY7PpznaG7cGfOL6Ii3nGZzmShMccrFFSsL80WdjhKpKkcBqaRw2QQcCu0f9svCPA5c4W9fDrRL6gT6gROSviXpUUmf8mdC0/jqsHcD3w8Vv0HS45K+J+kV5RxMuZg2Tq9yN+SAvt52nj86zMQcHmPOmD87nYkYw/kCYxNefLHjbgFl2ejyIyNPRTy+WCqTo7lRbO2sTv6jahv0rwMukvQocBFwBCgATcCb/PrXAecC1xQd+0Xgh2b2sL//U2Crmb0a+BzwndkuKOlaSXsl7R0cHCzvaEogmc7R4DzFpunvTTBRMA4cnT3GWLDI0s1cTmcmvlj+tL9OLbZ8OuIxpgxOjE5UuyvLYiCTZXtXnObG6jzmK3nVI8CW0P5mv2waM3vBzK4wswuAj/llJ/BmOY/5KrVJPEHxmuA4SR8HuoEPh851ysxy/vbdQLOkruJOmdnNZrbbzHZ3d3eXZaCLIZXOck5HG63Nq9tTLCCYwc1l1E+ls7S3NLFxhSO61jrTi/181Vjg3eTUYsunXhZSpjK5qmpIKilcHgH6JG2XFAOuAu4KN5DUJSnow/XALaFj10sKnv4XA8/4x7wf+JfA1WY2FTrXWfLdiSRdiDe2oYqMbBl42SfdW3jAzp4EDZo75bHnKbbyEV1rnZn4YuOn/XUzl+UTrBWKsjvy2ESBQ8dGqqohqZhw8WccHwTuAZ4F7jCzpyXdKOmdfrM9wD5JSaAXuMk/toCnErtf0pOAgC/7x/yZ3/bHRS7HVwJPSXoc+CxwlW/0rxnGJwscGBpxxvwQrc2NnNPRRmoOjzGXfXJ2iiMju4jI5WNm5hJd4bJ/cJgpq66tsqLx3n311N1FZTeEtu8E7pzj2PuA82Ypn7XPZvZ54PPL6W+lef7oMIUpc/aDIvp622dViwW5v50x/0w6iwIsHhvO0yBY3+aEy3LpmiPqdJQIXtbqVS3mKCJ4gDpPsdPp701w4Ogw+cnTPcZcDLa5aW9porlR06Hhh4bzbGiLVSXMR72xYY6o01FiIJOjsUFs66qOpxg44bKipNJZGgTndser3ZWaor+3nckp4/kij7GUC1g5J5L8xX6+zSU37lRiZaK5sYG1rU2RNuin0jm2drTR0lQ9xyEnXFaQZDrLts648xQrYsZj7HS7SzKdpb21id61bu3GbIQX+7m4YuWlM9ESebVYtZc7OOGyglQziFwtc253nAadGcAy6WfrdJ5is9MZii82NJyftsM4lk+UQ8DkJ71I49V+1jjhskKMTRQ4MDTsVDyz0NrcyLbO+GlGfTMj5QJ8zktnInbazMWF2y8fnRGOjHxgyHMcqrZt1wmXFWLGNdAJl9no602cFsDyaC7P8ZGJqv9AapkgNPxEYYoTIxNOLVZGOhNnRp2OCtOpjZ1abHUwY5x2b+Kz0d/bzsGhEcYnvVhZgYrMzfTmpjMeIzc+SfrUmLfv1GJloyMe4/hINOOLpdI5JNjR7YTLqiCZztLYILZ3OU+x2ejrbacwZewf9DzGkmknjBciWEgZvKm6mUv56Ii3UJgyTo1FL75YKpOtiWSETrisEMl0jm2d1XUNrGUCIRIIlWQmx7o1zXS3OzvCXATCxAmX8lMcGDRKDGRyVVeJgRMuK4ZnnHYqnrnY3hWnsUHTOSgCY77zFJubYCV5IJC7XLj9sjEduy1iRv3JwhT7B4erln0yjBMuK8DYRIGDx0acMX8eWpoa2dbZRjKdxcxIpnPufi1AMFMJvOzczKV8RDUy8s+PjZAvTLmZy2phIJPDzNkPFqK/t51UJsdgdpyToxMutfECdIZsLhJscHHFysZ0ZOSIqcVSvoq0Fl7MnHBZAVwYk9Lo623n4NAwTx45Cbj7tRBr1zTR1CBy45OsX9Ps4oqVkQ3xZoDTUklHgVpxQwYnXFaEZDpHU4PY1uk8xeajvzfBlMH3n3oJqI23r1omiC8GLr1xuWlpaqS9tSlyM5eBTI6z17WSaKlowPuScMJlBUilvXSjsSZ3u+cjmKnc8/RLbGhrnjZYO+YmEC7O3lJ+wuF1okIqk2VnjbyUuafdChDEyHLMz7bOOE0N4tTYJH0uplhJBF5NLgNl+QlHnY4CU1PGQCZXE55iUGHhIuntkvZJGpD00Vnqt0q6X9ITkh6StDlUd46keyU9K+kZSdv88u2S/sk/5+1+CmUktfj7A379tkqOrVRG8wUOHa9+ELkoEGtqmF5k6pwfSiNYSOlmLuWnI94SKVfkIydGGZuoDU8xqKBwkdQIfAG4BNgFXC1pV1GzTwO3mtl5wI3AJ0N1twKfMrOXAxcCGb/8fwKfMbOdwHHgfX75+4Djfvln/HZV57nBwFPMzVxKIbhP7n6VRjBjcTOX8tMZscjIM9kn61y44AmEATPbb2Z54Dbg0qI2u4AH/O0Hg3pfCDX5qY4xs5yZjcjTk1zMTGrkrwGX+duX+vv49W9VlfQq3kKmHPc+/RJ//g8HAPcmXirBDM8FrCyNTmfQrxhB1GmzaMQXCxYg18rMpZIuBZuAQ6H9w8Dri9o8DlwB/G/gcqBdUifQD5yQ9C1gO/B3wEeBDcAJM5sMnXNT8fXMbFLSSaATOBq+oKRrgWsBzjnnnGUNcGyiwPNHh0llcgxkcjzn/33+6DD5wkzK3l0b1zpPsRK5+GU9PLRvkFdtXlftrkSCjoQz6FeKjniMySnj1Ogk69qaq92dBUllcnS3t7C+RtY7Vdtf7Trg85KuAX4IHAEKeP16E3AB8HPgduAa4LvLvaCZ3QzcDLB79+4lvZI8+LMMn/jrpzl0bIQgaGqD4JyONnb2JNjzsm52difo621nR3ec9tba/8esFc7bvJ7vfOCN1e5GZHBqscoROEtc9sW/pykCa4iOnBjl/C3rq92NaSopXI4AW0L7m/2yaczsBbyZC5ISwK+a2QlJh4HHzGy/X/cd4BeBW4D1kpr82Uv4nMH1DktqAtYBQ5UYWEc8xivPXsel52+iryfBzp4E27tc+mLHyvPGnV1c++Zzec3WDdXuSt3xxp1dXHHBJsb8NBC1Tl9vgitfu3nhhiuEKqVP9B/wSeCteA/+R4BfN7OnQ226gGNmNiXpJqBgZjf4zgA/BX7ZzAYlfRXYa2ZfkPSXwF+Z2W2S/gx4wsy+KOkDwKvM7HckXQVcYWbvmq+Pu3fvtr1791Zi+A6Hw1G3SPqJme2er03FDPr+zOKDwD3As8AdZva0pBslvdNvtgfYJykJ9AI3+ccW8FRm90t6EhDwZf+YjwAfljSAZ1P5il/+FaDTL/8wno3G4XA4HFWgYjOXKOBmLg6Hw7F4qjpzcTgcDsfqxQkXh8PhcJQdJ1wcDofDUXaccHE4HA5H2XHCxeFwOBxlxwkXh8PhcJSdVe2KLGkQOFhU3EVRPLI6wY0retTr2Ny4okfx2LaaWfd8B6xq4TIbkvYu5L8dRdy4oke9js2NK3osZWxOLeZwOByOsuOEi8PhcDjKjhMuZ3JztTtQIdy4oke9js2NK3osemzO5uJwOByOsuNmLg6Hw+EoO064OBwOh6PsOOHiI+ntkvZJGpBUV7lgJB2Q9KSkxyRFNseApFskZSQ9FSrrkHSfpJT/N3IpGecY1yckHfG/s8ck/atq9nEpSNoi6UFJz0h6WtKH/PJ6+M7mGlukvzdJrZL+WdLj/rj+q1++XdI/+c/H2yUtmFfb2VwAP/NlEngbcBgva+bVZvZMVTtWJiQdAHabWaQXeEl6M5ADbjWzV/plf4KXzfSP/ZeCDWb2kWr2c7HMMa5PADkz+3Q1+7YcJG0ENprZTyW1Az8BLgOuIfrf2VxjexcR/t4kCYibWU5SM/Aj4EN4CRi/FcoA/LiZfWm+c7mZi8eFwICZ7TezPHAbcGmV++Qowsx+CBwrKr4U+Jq//TW8H3ikmGNckcfMXjSzn/rbWbyMtJuoj+9srrFFGvPI+bvN/seAi4E7/fKSvjMnXDw2AYdC+4epg3+UEAbcK+knkq6tdmfKTK+Zvehvv4SXLrte+KCkJ3y1WeRUR2EkbQMuAP6JOvvOisYGEf/eJDVKegzIAPcBzwEn/NT1UOLz0QmX1cEvmdlrgEuAD/hqmLrDPB1vveh5vwTsAM4HXgT+V1V7swwkJYC/Av7AzE6F66L+nc0ytsh/b2ZWMLPzgc14Wp2XLeU8Trh4HAG2hPY3+2V1gZkd8f9mgG/j/cPUC2lf/x3owTNV7k9ZMLO0/yOfAr5MRL8zX2//V8A3zOxbfnFdfGezja1evjcAMzsBPAi8AVgvqcmvKun56ISLxyNAn+8REQOuAu6qcp/KgqS4b3BEUhz4f4Cn5j8qUtwFvNfffi/w3Sr2pWwED1+fy4ngd+Ybh78CPGtmfxqqivx3NtfYov69SeqWtN7fXoPn5PQsnpC50m9W0nfmvMV8fJfB/xdoBG4xs5uq26PyIOlcvNkKQBPwF1Edm6RvAnvwwn+ngY8D3wHuAM7BS5/wLjOLlHF8jnHtwVOtGHAA+O2QnSISSPol4GHgSWDKL/5DPNtE1L+zucZ2NRH+3iSdh2ewb8SbfNxhZjf6z5HbgA7gUeA3zGx83nM54eJwOByOcuPUYg6Hw+EoO064OBwOh6PsOOHicDgcjrLjhIvD4XA4yo4TLg6Hw+EoO064OBwVQtLvSHrPAm2ukfT5Oepys5U7HFGgaeEmDodjKZjZn1W7D8VIagrFiHI4KoabuTgcJSJpm6RnJX3Zz3Vxr6Q1knZI+r4fGPRhSS/z239C0nX+9uv8YIaPSfpUOHcLcLZ/fMpPIRC+5mf8a90vqdsvO1/SP/rn+3YQHFHSQ5J2+9tdfqqFYHZ0l6QHgPslbZT0Q78vT0l6U8VvnmPV4YSLw7E4+oAvmNkrgBPArwI3A79vZq8FrgO+OMtxX8VbrX0+UCiqOx/4NeBVwK9JCuLcxYG9/rV+gLdyH+BW4CNmdh7eCvGPszCvAa40s4uAXwfu8fvyauCxEo53OBaFU4s5HIvjeTN7zN/+CbAN+BfAX3rhpgBoCR/gx2pqN7Mf+0V/Abwj1OR+Mzvpt30G2IqXAmIKuN1v83XgW5LWAevN7Ad++deAvyyh3/eFQqw8AtziB178Tmg8DkfZcDMXh2NxhOMpFfBiLZ0ws/NDn5cv85xzvfQtFKtpkpnfdGtR3fD0SbzkZG/Gi2z75ws5HTgcS8EJF4djeZwCnpf0b8CLlivp1eEGfujyrKTX+0VXlXjuBmYi0f468CN/hnM8ZCd5N57KDLxAia/1t4PjzkDSViBtZl8G/i+eyszhKCtOuDgcy+ffAu+T9DjwNLOnyH4f8GU/w18cOFnCeYeBC33j/8XAjX75e4FPSXoCz14TlH8a+F1Jj+JFWJ6LPcDjfrtfA/53CX1xOBaFi4rscKwAkhJBbnJJHwU2mtmHqtwth6NiOIO+w7Ey/Iqk6/F+cweBa6rbHYejsriZi8PhcDjKjrO5OBwOh6PsOOHicDgcjrLjhIvD4XA4yo4TLg6Hw+EoO064OBwOh6Ps/P8zeNlCUxjz1QAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Create scatterplot\n", + "plt.title('neighbours x accuracy')\n", + "plt.xlabel('neighbours')\n", + "plt.ylabel('accuracy')\n", + "plt.plot(k_list, avgs)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "neighbour#0: 0.959815078236131\n", + "neighbour#1: 0.9599928876244666\n", + "neighbour#2: 0.9665718349928877\n", + "neighbour#3: 0.9731507823613088\n", + "neighbour#4: 0.9799075391180655\n", + "neighbour#5: 0.9733285917496444\n", + "neighbour#6: 0.9733285917496444\n", + "neighbour#7: 0.9665718349928876\n", + "neighbour#8: 0.9599928876244666\n", + "neighbour#9: 0.9665718349928876\n", + "neighbour#10: 0.9665718349928876\n", + "neighbour#11: 0.9731507823613088\n", + "neighbour#12: 0.9731507823613088\n", + "neighbour#13: 0.9731507823613088\n", + "neighbour#14: 0.9731507823613088\n", + "neighbour#15: 0.959815078236131\n", + "neighbour#16: 0.9731507823613088\n", + "neighbour#17: 0.9665718349928876\n", + "neighbour#18: 0.9731507823613088\n", + "neighbour#19: 0.9731507823613088\n", + "neighbour#20: 0.9731507823613088\n", + "neighbour#21: 0.9799075391180654\n", + "neighbour#22: 0.9731507823613088\n", + "neighbour#23: 0.959815078236131\n", + "neighbour#24: 0.959815078236131\n", + "neighbour#25: 0.959815078236131\n", + "neighbour#26: 0.959815078236131\n", + "neighbour#27: 0.959815078236131\n", + "neighbour#28: 0.9665718349928876\n", + "\n", + "best neighbour#4: 0.9799075391180655\n" + ] + } + ], + "source": [ + "largest = 0\n", + "\n", + "for i in range(0, 29, 1): # 1-30 (array index form)\n", + " print(\"neighbour#\"+str(i)+\":\", avgs[i])\n", + " if avgs[i] > avgs[largest]:\n", + " largest = i\n", + "print()\n", + "\n", + "print(\"best neighbour#\"+str(largest)+\":\", avgs[largest])" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/Week5/exercise3a.ipynb b/Week5/exercise3a.ipynb new file mode 100644 index 0000000..1e9ebff --- /dev/null +++ b/Week5/exercise3a.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing the evaluation of a logisitic regression model\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder # method to split dataset into 4\n", + "\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.linear_model import LogisticRegression # linear regression algorithm\n", + "\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method\n", + "from sklearn.metrics import accuracy_score, recall_score, precision_score # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"../Week3/breast_cancer.csv\") # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preprocessing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Resolving issues" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "# There are issues with this dataset - 'diagnosis' (our y object) MUST be an integer\n", + "# we can simply just create another column and fill that with the correct versions of values (ie 0, 1) & replace the intial column\n", + "fDiagnosis = pd.get_dummies(data[\"diagnosis\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n", + "# note: this creates a column for each categorical, where 1 represents in each column whether a row had that value set or not\n", + "data[\"diagnosis\"] = fDiagnosis.iloc[:, -1] # replace old w in new column" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "# Another issue - column 'id' & 'Unnamed 32' are unusable - delete them\n", + "data.drop(columns=\"id\", inplace=True) # remove old column\n", + "data.drop(columns=\"Unnamed: 32\", inplace=True) # remove old column" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "x = data.iloc[:, 2:].values # values we want to classify - we only want\n", + "y = data.iloc[:, 0].values # acceptances for each row (ie either benign (0) or malignant (1))\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticRegression(max_iter=20000)\n", + "model.fit(x_train, y_train)\n", + "predictions = model.predict(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing (main feature of this notebook)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "accuracy_score: 0.951048951048951\n" + ] + } + ], + "source": [ + "print(\"accuracy_score:\", accuracy_score(y_test, y_test_pred)) \n", + "# proportion of ALL results correctly classified (trues / (t's+f's))" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "recall_score: 0.9811320754716981\n" + ] + } + ], + "source": [ + "print(\"recall_score:\", recall_score(y_test, y_test_pred)) \n", + "# proportion of correctly predicted positives (true positives / (tp + fn))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "precision_score: 0.896551724137931\n" + ] + } + ], + "source": [ + "print(\"precision_score:\", precision_score(y_test, y_test_pred)) \n", + "# proportion of positive identifications that were actually correct (True Positives / (True Positives + False Positives))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week5/exercise3b.ipynb b/Week5/exercise3b.ipynb new file mode 100644 index 0000000..95631ab --- /dev/null +++ b/Week5/exercise3b.ipynb @@ -0,0 +1,198 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing evaluation of a logisitic regression model\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder # method to preprocess data (specifically converting columns->categorical datas)\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.linear_model import LogisticRegression # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"../Week3/titanic.csv\") # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preprocessing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Resolving issues" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# turn any non numerical (be that string or conti. values) categories -> numerical ones\n", + "# we can simply just create another column and fill that with the correct versions of values (ie 0, 1) & replace the intial column\n", + "fSex = pd.get_dummies(data[\"Sex\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n", + "data[\"Sex\"] = fSex.iloc[:, -1] # replace old w in new column\n", + "fEmbarked = pd.get_dummies(data[\"Embarked\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n", + "data[\"Embarked\"] = fEmbarked.iloc[:, -1] # replace old w in new column" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Another issue - many columns are irrelevant - delete them\n", + "data.drop(columns=\"PassengerId\", inplace=True) # remove old column\n", + "data.drop(columns=\"Name\", inplace=True) # remove old column\n", + "data.drop(columns=\"Ticket\", inplace=True) # remove old column" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Some columns contain too many NaN's (IEEE NotANumber) to make it non-viable to delete the rows afflicted - delete them too\n", + "data.drop(columns=\"Cabin\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# now drop the rows containing NaN's\n", + "data = data.dropna()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "x = data.iloc[:, 2:].values # values we want to classify - we only want\n", + "y = data.iloc[:, 0].values # acceptances for each row (ie either benign (0) or malignant (1))\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticRegression(max_iter=20000)\n", + "model.fit(x_train, y_train)\n", + "predictions = model.predict(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing (main feature of this notebook)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week5/exercise4a.ipynb b/Week5/exercise4a.ipynb new file mode 100644 index 0000000..233e53e --- /dev/null +++ b/Week5/exercise4a.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing confusion matrix evaluation of a trained logistic regression model\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.linear_model import LogisticRegression # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method\n", + "from sklearn.metrics import confusion_matrix\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "cols = [\"test a\", \"test b\", \"accepted\"]\n", + "data = pd.read_csv(\"../Week3/admission.data\", names=cols) # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "x = data.iloc[:, -3:-1].values # values we want to classify\n", + "y = data.iloc[:, -1].values # acceptances for each row\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(x_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing (using confusion matrix) (focus of notebook)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred = model.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_test_pred) # shows true positive, true negative, false positive, false negatives for test dataset\n", + " # in array form" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualise matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD4CAYAAADM6gxlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAIwElEQVR4nO3cf6zVdR3H8fcn7yYqGRhxsQkuxf7QbGlmueUSlfkD06xltthyjTDNfyyb81eWTlmb8w9njqGZeumfdLJKlKZO++FEQcbQ2EywFEx+LFERSAw+/YPIpXPODtG93/s+PB5/3e/3O+55bZw99933HCi11gAgjw81PQCAPSPcAMkIN0Aywg2QjHADJNM31C9wwHGX+doKI9aGRbc3PQFaGtUXpd01d9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyQg3QDLCDZCMcAMkI9wAyXQd7lLK1KEcAkB39uSO+2dDtoKWZl//rXjl8Vmx+P6rd5776unHxXMPXBObnrstjj96UoPr4ANP/emPce60M+KcM6fGL+6c0/ScnudRyQg28LuFcd73fz7o3F9W/iMu/OGd8eclKxtaBYNt27Ytbr7phrhj9l0x77fzY8HDD8XKFSuantXT+jpdLKX8MiJqRJSImFRKufv9a7XW7wzxtn3eU0tWxqRDDxl07sW/rW1oDbT2wvPLYuLEw+OwiRMjIuLMs6fFk088HkdOntzwst7VMdwRcc8uP38xIu4duilARuvWro0Jh07YeTy+vz+eX7aswUW9r2O4a61/eP/nUsrGXY87KaXMjIiZERF9h50SfeOO2auRAHyg7TPuUsruUd/a7S+ttc6ptZ5Qaz1BtKG3je/vjzWvr9l5vG7t2ujv729wUe/r9OHks7se1Fq/MMRbgISO+dSx8eqrf4/Vq1fFe1u3xoKH58eXppza9Kye1ulRSRm2FbR076yL4uTPHhXjxoyOFQtujBtnPxwb3toUt1759Rg3dnQ8eNv3YtmLr8W5u33zBIZTX19fXHXNj+OSmTNi+/Zt8ZXzvxaTJx/V9KyeVmqtrS+Usjoibm33B2utba/t6oDjLmv9AjACbFh0e9MToKVRfe1vnjvdce8XEaPDnTfAiNIp3K/XWm8YtiUAdKXTh5PutAFGoE7hPm3YVgDQtbbhrrW+MZxDAOiO/2QKIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZEqtdUhf4L7Fq4b2BWAvXHzdA01PgJa2PHJ5aXfNHTdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMsINkIxwAyQj3ADJCDdAMn1ND6B727dvi7uvvTQ+PHZcfONHNzU9h33Y7MunxlknHhHr39wcJ1wyEBERY0fvHwNXTYvD+w+OV9a+HdNnzY8333m34aW9yR13IosWzItxH5/U9AyIgUeXx3nXzht07ooLTownl66KY2fcE08uXRVXXPC5htb1vj0Kdynl+KEaQmdv/3N9rFj6THxmytlNT4F46oXX4o2N/xp07pyTjoi5jy2PiIi5jy2PL590ZBPT9gltw11KafUY5a4h3EIHjw7cEad+87tRSml6CrQ0fsyBsWbDpoiIWLNhU4wfc2DDi3pXpzvuZ1uc66oapZSZpZTFpZTFTzz4q/9tGTu9tGRhHPiRMXHoJz7Z9BToWq1NL+hdnT6cbBXpn3bzS2utcyJiTkTEfYtX+evbS6v/+kK89NzTsXLps/Hv97bGu1s2x2/umBXnXXpV09Ngp3Vvbo4JYw+KNRs2xYSxB8X6tzY3PalndQr3x0opP9j95Pvnaq23DtkqBply4YyYcuGMiIh4ZfnSWDj/ftFmxJm/8OWYfvrRccv9i2L66UfHQ0+/3PSkntUp3PtFxOjo8vEIsO+498qz4uRPT4xxB4+KFQMz4saBp+OWXy+KuVdPi2+fcUy8um5jTL/5oaZn9qxS2zyIKqUsqbXu9bdIPCphJLv4ugeangAtbXnk8rY3zZ0+nHSnDTACdQr3acO2AoCutQ13rfWN4RwCQHf8k3eAZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkhGuAGSEW6AZIQbIBnhBkim1Fqb3sAeKKXMrLXOaXoH7M57c/i4485nZtMDoA3vzWEi3ADJCDdAMsKdj2eIjFTem8PEh5MAybjjBkhGuAGS6Wt6AP+tlPLRiHh8x+GEiNgWEet3HJ9Ya93ayDDYhfdpczzjHuFKKT+JiHdqrbfsdv6UiLio1nrR8K+Cwdq9TxkaHpUAJCPcAMl4xp1MKeWZiNg/IkZHxCGllKU7Ll1Za/19Y8OAYSPcydRaPx/hGTcjUynl/Ii4fsfhjFrr4ib39CrhBv5vaq3zImJe0zt6nWfcAMn4OiBAMu64AZIRboBkhBsgGeEGSEa4AZIRboBkhBsgmf8ALVSSN9rBPGcAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(cm, xticklabels=[\"T+\", \"T-\"], yticklabels=[\"T+\", \"T-\"], annot=cm, fmt=\"\", cmap='Blues', cbar=False)\n", + "# REMINDER: add big x and y labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week6/.ipynb_checkpoints/exercise1a-checkpoint.ipynb b/Week6/.ipynb_checkpoints/exercise1a-checkpoint.ipynb new file mode 100644 index 0000000..5aaf980 --- /dev/null +++ b/Week6/.ipynb_checkpoints/exercise1a-checkpoint.ipynb @@ -0,0 +1,837 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing neural network classification using multiple variables with a little bit of preprocessing\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder # method to split dataset into 4\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.neural_network import MLPClassifier # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"../Week3/breast_cancer.csv\") # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing\n", + "### Check for possible issues" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".sum of 0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "564 False\n", + "565 False\n", + "566 False\n", + "567 False\n", + "568 False\n", + "Length: 569, dtype: bool>" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().sum" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 569 entries, 0 to 568\n", + "Data columns (total 33 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 569 non-null int64 \n", + " 1 diagnosis 569 non-null object \n", + " 2 radius_mean 569 non-null float64\n", + " 3 texture_mean 569 non-null float64\n", + " 4 perimeter_mean 569 non-null float64\n", + " 5 area_mean 569 non-null float64\n", + " 6 smoothness_mean 569 non-null float64\n", + " 7 compactness_mean 569 non-null float64\n", + " 8 concavity_mean 569 non-null float64\n", + " 9 concave points_mean 569 non-null float64\n", + " 10 symmetry_mean 569 non-null float64\n", + " 11 fractal_dimension_mean 569 non-null float64\n", + " 12 radius_se 569 non-null float64\n", + " 13 texture_se 569 non-null float64\n", + " 14 perimeter_se 569 non-null float64\n", + " 15 area_se 569 non-null float64\n", + " 16 smoothness_se 569 non-null float64\n", + " 17 compactness_se 569 non-null float64\n", + " 18 concavity_se 569 non-null float64\n", + " 19 concave points_se 569 non-null float64\n", + " 20 symmetry_se 569 non-null float64\n", + " 21 fractal_dimension_se 569 non-null float64\n", + " 22 radius_worst 569 non-null float64\n", + " 23 texture_worst 569 non-null float64\n", + " 24 perimeter_worst 569 non-null float64\n", + " 25 area_worst 569 non-null float64\n", + " 26 smoothness_worst 569 non-null float64\n", + " 27 compactness_worst 569 non-null float64\n", + " 28 concavity_worst 569 non-null float64\n", + " 29 concave points_worst 569 non-null float64\n", + " 30 symmetry_worst 569 non-null float64\n", + " 31 fractal_dimension_worst 569 non-null float64\n", + " 32 Unnamed: 32 0 non-null float64\n", + "dtypes: float64(31), int64(1), object(1)\n", + "memory usage: 146.8+ KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddiagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_mean...texture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worstUnnamed: 32
0842302M17.9910.38122.801001.00.118400.277600.30010.14710...17.33184.602019.00.16220.66560.71190.26540.46010.11890NaN
1842517M20.5717.77132.901326.00.084740.078640.08690.07017...23.41158.801956.00.12380.18660.24160.18600.27500.08902NaN
284300903M19.6921.25130.001203.00.109600.159900.19740.12790...25.53152.501709.00.14440.42450.45040.24300.36130.08758NaN
384348301M11.4220.3877.58386.10.142500.283900.24140.10520...26.5098.87567.70.20980.86630.68690.25750.66380.17300NaN
484358402M20.2914.34135.101297.00.100300.132800.19800.10430...16.67152.201575.00.13740.20500.40000.16250.23640.07678NaN
\n", + "

5 rows × 33 columns

\n", + "
" + ], + "text/plain": [ + " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", + "0 842302 M 17.99 10.38 122.80 1001.0 \n", + "1 842517 M 20.57 17.77 132.90 1326.0 \n", + "2 84300903 M 19.69 21.25 130.00 1203.0 \n", + "3 84348301 M 11.42 20.38 77.58 386.1 \n", + "4 84358402 M 20.29 14.34 135.10 1297.0 \n", + "\n", + " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", + "0 0.11840 0.27760 0.3001 0.14710 \n", + "1 0.08474 0.07864 0.0869 0.07017 \n", + "2 0.10960 0.15990 0.1974 0.12790 \n", + "3 0.14250 0.28390 0.2414 0.10520 \n", + "4 0.10030 0.13280 0.1980 0.10430 \n", + "\n", + " ... texture_worst perimeter_worst area_worst smoothness_worst \\\n", + "0 ... 17.33 184.60 2019.0 0.1622 \n", + "1 ... 23.41 158.80 1956.0 0.1238 \n", + "2 ... 25.53 152.50 1709.0 0.1444 \n", + "3 ... 26.50 98.87 567.7 0.2098 \n", + "4 ... 16.67 152.20 1575.0 0.1374 \n", + "\n", + " compactness_worst concavity_worst concave points_worst symmetry_worst \\\n", + "0 0.6656 0.7119 0.2654 0.4601 \n", + "1 0.1866 0.2416 0.1860 0.2750 \n", + "2 0.4245 0.4504 0.2430 0.3613 \n", + "3 0.8663 0.6869 0.2575 0.6638 \n", + "4 0.2050 0.4000 0.1625 0.2364 \n", + "\n", + " fractal_dimension_worst Unnamed: 32 \n", + "0 0.11890 NaN \n", + "1 0.08902 NaN \n", + "2 0.08758 NaN \n", + "3 0.17300 NaN \n", + "4 0.07678 NaN \n", + "\n", + "[5 rows x 33 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Resolving issues" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# There are issues with this dataset - 'diagnosis' (our y object) MUST be an integer\n", + "# we can simply just create another column and fill that with the correct versions of values (ie 0, 1) & replace the intial column\n", + "fDiagnosis = pd.get_dummies(data[\"diagnosis\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n", + "# note: this creates a column for each categorical, where 1 represents in each column whether a row had that value set or not\n", + "data[\"diagnosis\"] = fDiagnosis.iloc[:, -1] # replace old w in new column" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Another issue - column 'id' & 'Unnamed 32' are unusable - delete them\n", + "data.drop(columns=\"id\", inplace=True) # remove old column\n", + "data.drop(columns=\"Unnamed: 32\", inplace=True) # remove old column" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
diagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst
0117.9910.38122.801001.00.118400.277600.30010.147100.2419...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
1120.5717.77132.901326.00.084740.078640.08690.070170.1812...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
2119.6921.25130.001203.00.109600.159900.19740.127900.2069...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
3111.4220.3877.58386.10.142500.283900.24140.105200.2597...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
4120.2914.34135.101297.00.100300.132800.19800.104300.1809...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", + "0 1 17.99 10.38 122.80 1001.0 \n", + "1 1 20.57 17.77 132.90 1326.0 \n", + "2 1 19.69 21.25 130.00 1203.0 \n", + "3 1 11.42 20.38 77.58 386.1 \n", + "4 1 20.29 14.34 135.10 1297.0 \n", + "\n", + " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", + "0 0.11840 0.27760 0.3001 0.14710 \n", + "1 0.08474 0.07864 0.0869 0.07017 \n", + "2 0.10960 0.15990 0.1974 0.12790 \n", + "3 0.14250 0.28390 0.2414 0.10520 \n", + "4 0.10030 0.13280 0.1980 0.10430 \n", + "\n", + " symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n", + "0 0.2419 ... 25.38 17.33 184.60 \n", + "1 0.1812 ... 24.99 23.41 158.80 \n", + "2 0.2069 ... 23.57 25.53 152.50 \n", + "3 0.2597 ... 14.91 26.50 98.87 \n", + "4 0.1809 ... 22.54 16.67 152.20 \n", + "\n", + " area_worst smoothness_worst compactness_worst concavity_worst \\\n", + "0 2019.0 0.1622 0.6656 0.7119 \n", + "1 1956.0 0.1238 0.1866 0.2416 \n", + "2 1709.0 0.1444 0.4245 0.4504 \n", + "3 567.7 0.2098 0.8663 0.6869 \n", + "4 1575.0 0.1374 0.2050 0.4000 \n", + "\n", + " concave points_worst symmetry_worst fractal_dimension_worst \n", + "0 0.2654 0.4601 0.11890 \n", + "1 0.1860 0.2750 0.08902 \n", + "2 0.2430 0.3613 0.08758 \n", + "3 0.2575 0.6638 0.17300 \n", + "4 0.1625 0.2364 0.07678 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now view fixed data\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "x = data.iloc[:, 2:].values # values we want to classify - we only want\n", + "y = data.iloc[:, 0].values # acceptances for each row (ie either benign (0) or malignant (1))\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def fit_and_test(x_train, x_test, y_train, y_test):\n", + " model = MLPClassifier(random_state=0,max_iter=20000)\n", + " model.fit(x_train, y_train)\n", + " print(\"training score:\", model.score(x_train, y_train))\n", + " print(\"testing score:\", model.score(x_test, y_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### using unscaled data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.818e+01, 7.638e+01, 4.088e+02, ..., 1.416e-01, 2.660e-01,\n", + " 9.270e-02],\n", + " [1.518e+01, 9.397e+01, 6.401e+02, ..., 1.599e-01, 2.691e-01,\n", + " 7.683e-02],\n", + " [1.956e+01, 7.854e+01, 4.610e+02, ..., 1.080e-01, 2.668e-01,\n", + " 8.174e-02],\n", + " ...,\n", + " [1.762e+01, 7.079e+01, 3.656e+02, ..., 9.861e-02, 2.289e-01,\n", + " 8.278e-02],\n", + " [1.726e+01, 7.526e+01, 4.319e+02, ..., 5.356e-02, 2.779e-01,\n", + " 8.121e-02],\n", + " [1.658e+01, 6.585e+01, 3.208e+02, ..., 8.333e-02, 2.691e-01,\n", + " 9.479e-02]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min of unscaled train set: 0.005217\n", + "Max of unscaled train set: 528.1\n", + "Min of unscaled test set: 0.004768\n", + "Max of unscaled test set: 811.3\n" + ] + } + ], + "source": [ + "print(\"Min of unscaled train set:\", min(x_train.tolist()[0]))\n", + "print(\"Max of unscaled train set:\", max(x_train.tolist()[0]))\n", + "print(\"Min of unscaled test set:\", min(x_test.tolist()[0]))\n", + "print(\"Max of unscaled test set:\", max(x_test.tolist()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training score: 0.9107981220657277\n", + "testing score: 0.9090909090909091\n" + ] + } + ], + "source": [ + "fit_and_test(x_train, x_test, y_train, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Scale copy of model and train (main focus of this notebook)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "sc = StandardScaler()\n", + "x_train_scaled_s = sc.fit_transform(x_train) # get mean and variances, store and \n", + "x_test_scaled_s = sc.transform(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min of train scaled set: -0.6980307692070779\n", + "Max of train scaled set: 2.454856998389618\n", + "Min of test scaled set: -0.20461336960599075\n", + "Max of test scaled set: 2.941456724450779\n" + ] + } + ], + "source": [ + "print(\"Min of scaled train set:\", min(x_train_scaled_s.tolist()[0]))\n", + "print(\"Max of scaled train set:\", max(x_train_scaled_s.tolist()[0]))\n", + "print(\"Min of scaled test set:\", min(x_test_scaled_s.tolist()[0]))\n", + "print(\"Max of scaled test set:\", max(x_test_scaled_s.tolist()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training score: 0.9107981220657277\n", + "testing score: 0.9090909090909091\n" + ] + } + ], + "source": [ + "fit_and_test(x_train, x_test, y_train, y_test)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week6/.ipynb_checkpoints/exercise1b-checkpoint.ipynb b/Week6/.ipynb_checkpoints/exercise1b-checkpoint.ipynb new file mode 100644 index 0000000..70e2188 --- /dev/null +++ b/Week6/.ipynb_checkpoints/exercise1b-checkpoint.ipynb @@ -0,0 +1,633 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing neural network classification using multiple variables with a little bit of preprocessing (again)\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder # method to split dataset into 4\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.neural_network import MLPClassifier # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "cols = [\"class\",\n", + " \"alcohol\",\n", + " \"malic_acid\",\n", + " \"ash\",\n", + " \"ashalcalinity\",\n", + " \"magnesium\",\n", + " \"total_phenols\",\n", + " \"flavanoids\", \n", + " \"nonflavanoid_phenols\",\n", + " \"proanthocyanins\",\n", + " \"color_intensity\",\n", + " \"hue\",\n", + " \"od280/od315\",\n", + " \"proline\"\n", + " ]\n", + "data = pd.read_csv(\"wines.csv\", delimiter=',', names=cols) # import dataset with custom delimeter (ie vals are seperated by ;)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing\n", + "### Check for possible issues" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".sum of 0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "173 False\n", + "174 False\n", + "175 False\n", + "176 False\n", + "177 False\n", + "Length: 178, dtype: bool>" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().sum" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "class 0\n", + "alcohol 0\n", + "malic_acid 0\n", + "ash 0\n", + "ashalcalinity 0\n", + "magnesium 0\n", + "total_phenols 0\n", + "flavanoids 0\n", + "nonflavanoid_phenols 0\n", + "proanthocyanins 0\n", + "color_intensity 0\n", + "hue 0\n", + "od280/od315 0\n", + "proline 0\n", + "dtype: int64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 class 178 non-null int64 \n", + " 1 alcohol 178 non-null float64\n", + " 2 malic_acid 178 non-null float64\n", + " 3 ash 178 non-null float64\n", + " 4 ashalcalinity 178 non-null float64\n", + " 5 magnesium 178 non-null int64 \n", + " 6 total_phenols 178 non-null float64\n", + " 7 flavanoids 178 non-null float64\n", + " 8 nonflavanoid_phenols 178 non-null float64\n", + " 9 proanthocyanins 178 non-null float64\n", + " 10 color_intensity 178 non-null float64\n", + " 11 hue 178 non-null float64\n", + " 12 od280/od315 178 non-null float64\n", + " 13 proline 178 non-null int64 \n", + "dtypes: float64(11), int64(3)\n", + "memory usage: 19.6 KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# no issues found, proceed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "x = data.iloc[:, 1:].values # values we want to use to classify\n", + "y = data.iloc[:, 0].values # independant var - class / quality of wine\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train model" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "model = MLPClassifier(random_state=0,max_iter=20000)\n", + "model.fit(x_train, y_train)\n", + "predictions = model.predict(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test accuracy using testing values" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Predicted values:Actual values
011
133
222
311
422
522
611
733
822
922
1033
1133
1211
1322
1433
1522
1611
1711
1832
1911
2022
2111
2211
2322
2422
2522
2622
2722
2822
2933
3011
3111
3222
3311
3411
3511
3633
3722
3822
3933
4021
4111
4222
4322
4422
\n", + "
" + ], + "text/plain": [ + " Predicted values: Actual values\n", + "0 1 1\n", + "1 3 3\n", + "2 2 2\n", + "3 1 1\n", + "4 2 2\n", + "5 2 2\n", + "6 1 1\n", + "7 3 3\n", + "8 2 2\n", + "9 2 2\n", + "10 3 3\n", + "11 3 3\n", + "12 1 1\n", + "13 2 2\n", + "14 3 3\n", + "15 2 2\n", + "16 1 1\n", + "17 1 1\n", + "18 3 2\n", + "19 1 1\n", + "20 2 2\n", + "21 1 1\n", + "22 1 1\n", + "23 2 2\n", + "24 2 2\n", + "25 2 2\n", + "26 2 2\n", + "27 2 2\n", + "28 2 2\n", + "29 3 3\n", + "30 1 1\n", + "31 1 1\n", + "32 2 2\n", + "33 1 1\n", + "34 1 1\n", + "35 1 1\n", + "36 3 3\n", + "37 2 2\n", + "38 2 2\n", + "39 3 3\n", + "40 2 1\n", + "41 1 1\n", + "42 2 2\n", + "43 2 2\n", + "44 2 2" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with\n", + "pred_vs_actual = pd.DataFrame({\"Predicted values:\": y_test_pred, \"Actual values\": y_test})\n", + "pred_vs_actual" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean squared error: 0.044444444444444446\n", + "Mean absolute error: 0.044444444444444446\n", + "Model accuracy: 0.956\n" + ] + } + ], + "source": [ + "print(\"Mean squared error:\", mean_squared_error(y_test, y_test_pred))\n", + "print(\"Mean absolute error:\", mean_absolute_error(y_test, y_test_pred))\n", + "accuracy = model.score(x_test, y_test) # or simply called score method to use the models inherent predictions vs a dataset / subset we give it \n", + "print(\"Model accuracy: {:.3f}\".format(accuracy)) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week6/.ipynb_checkpoints/exercise2-checkpoint.ipynb b/Week6/.ipynb_checkpoints/exercise2-checkpoint.ipynb new file mode 100644 index 0000000..468e947 --- /dev/null +++ b/Week6/.ipynb_checkpoints/exercise2-checkpoint.ipynb @@ -0,0 +1,566 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing optimum hyperparameter selection (using grid / random search algorithms)\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing\n", + "\n", + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "%matplotlib inline\n", + "\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n", + "\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Import dataset\n", + "cols = [\"sepal length\", \"sepal width\", \"petal length\", \"class\"] # create manual headings\n", + "df = pd.read_csv(\"../Week1/iris.data\", names=cols) # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing\n", + "### Check for possible issues" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal lengthsepal widthpetal lengthclass
5.13.51.40.2Iris-setosa
4.93.01.40.2Iris-setosa
4.73.21.30.2Iris-setosa
4.63.11.50.2Iris-setosa
5.03.61.40.2Iris-setosa
...............
6.73.05.22.3Iris-virginica
6.32.55.01.9Iris-virginica
6.53.05.22.0Iris-virginica
6.23.45.42.3Iris-virginica
5.93.05.11.8Iris-virginica
\n", + "

150 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " sepal length sepal width petal length class\n", + "5.1 3.5 1.4 0.2 Iris-setosa\n", + "4.9 3.0 1.4 0.2 Iris-setosa\n", + "4.7 3.2 1.3 0.2 Iris-setosa\n", + "4.6 3.1 1.5 0.2 Iris-setosa\n", + "5.0 3.6 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ...\n", + "6.7 3.0 5.2 2.3 Iris-virginica\n", + "6.3 2.5 5.0 1.9 Iris-virginica\n", + "6.5 3.0 5.2 2.0 Iris-virginica\n", + "6.2 3.4 5.4 2.3 Iris-virginica\n", + "5.9 3.0 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 4 columns]" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df # show dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Float64Index: 150 entries, 5.1 to 5.9\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sepal length 150 non-null float64\n", + " 1 sepal width 150 non-null float64\n", + " 2 petal length 150 non-null float64\n", + " 3 class 150 non-null object \n", + "dtypes: float64(3), object(1)\n", + "memory usage: 5.9+ KB\n" + ] + } + ], + "source": [ + "df.info() # show basic stats" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remedy issue (conversion of string categorical labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal lengthsepal widthpetal lengthclass
5.13.51.40.20
4.93.01.40.20
4.73.21.30.20
4.63.11.50.20
5.03.61.40.20
\n", + "
" + ], + "text/plain": [ + " sepal length sepal width petal length class\n", + "5.1 3.5 1.4 0.2 0\n", + "4.9 3.0 1.4 0.2 0\n", + "4.7 3.2 1.3 0.2 0\n", + "4.6 3.1 1.5 0.2 0\n", + "5.0 3.6 1.4 0.2 0" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "le = LabelEncoder()\n", + "df[\"class\"] = le.fit_transform(df[\"class\"])\n", + "df.head() # show update table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "x = df.iloc[:,:-1].values\n", + "y = df.iloc[:, -1].values\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training model (without any tuning)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "KNeighborsClassifier()" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model0 = KNeighborsClassifier()\n", + "model0.fit(x_train, y_train) # learning takes place here" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0\n", + " 2]\n", + "Predicted values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0\n", + " 1]\n" + ] + } + ], + "source": [ + "y_test_pred = model0.predict(x_test)\n", + "print(\"Predicted values:\", y_test_pred)\n", + "print(\"Predicted values:\", y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model accuracy:0.974\n" + ] + } + ], + "source": [ + "accuracy = model0.score(x_test, y_test)\n", + "print(\"Model accuracy:{:.3f}\".format(accuracy))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training model (using grid search to find best hyper-parameters for KNN)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters we wish to tune: leaf_size, n_neighbors, p\n", + "# we'll pass values to GridSearch to cycle through and select the best from them\n", + "hyperparameters = dict(leaf_size=list(range(1,50)), n_neighbors=list(range(1,30)), p=[1,2])\n", + "model1 = KNeighborsClassifier()\n", + "start = time.time()\n", + "clf1 = GridSearchCV(model1, hyperparameters, cv=4) # tell GridSearch we want the best values from knn, for select hyperparameters, \n", + " # setting the amount of cross validation (subdataset rotation of which is training) they'll do for each versionmodel\n", + " # change our models internal parameters\n", + "end = time.time()\n", + "clf1.fit(x_train, y_train) # learning takes place here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred = clf1.predict(x_test)\n", + "print(\"Predicted values:\", y_test_pred)\n", + "print(\"Predicted values:\", y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = clf1.score(x_test, y_test)\n", + "print(\"Model accuracy:{:.3f}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Hyperparameters\")\n", + "# for parameter in clf1.get_params():\n", + "# print(parameter)\n", + "print(\"\\t\", \"Tuned Leaf size:\", clf1.get_params()[\"estimator__leaf_size\"])\n", + "print(\"\\t\", \"Tuned N_Neighbour:\", clf1.get_params()[\"estimator__n_neighbors\"])\n", + "print(\"\\t\", \"Tuned P:\", clf1.get_params()[\"estimator__p\"])\n", + "print(\"Time taken to evaluate and find best hyperparameters:\", (end-start))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training model (using random search to find best hyper-parameters for KNN)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# hyperparameters we wish to tune: weights, n_neighbors, p\n", + "# we'll pass values to GridSearch to cycle through and select the best from them\n", + "hyperparameters = dict(leaf_size=list(range(1,50)), n_neighbors=list(range(1,30)), p=[1,2])\n", + "model2 = KNeighborsClassifier()\n", + "start = time.time()\n", + "clf2 = RandomizedSearchCV(model1, hyperparameters, cv=4) # tell GridSearch we want the best values from knn, for select hyperparameters, \n", + " # setting the amount of cross validation (subdataset rotation of which is training) they'll do for each versionmodel\n", + " # change our models internal parameters\n", + "end = time.time()\n", + "clf2.fit(x_train, y_train) # learning takes place here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_test_pred = clf2.predict(x_test)\n", + "print(\"Predicted values:\", y_test_pred)\n", + "print(\"Predicted values:\", y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy = clf2.score(x_test, y_test)\n", + "print(\"Model accuracy:{:.3f}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Hyperparameters\")\n", + "# for parameter in clf2.get_params():\n", + "# print(parameter)\n", + "print(\"\\t\", \"Tuned Leaf size:\", clf2.get_params()[\"estimator__leaf_size\"])\n", + "print(\"\\t\", \"Tuned N_Neighbour:\", clf2.get_params()[\"estimator__n_neighbors\"])\n", + "print(\"\\t\", \"Tuned P:\", clf2.get_params()[\"estimator__p\"])\n", + "print(\"Time taken to evaluate and find best hyperparameters:\", (end-start))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week6/exercise1a.ipynb b/Week6/exercise1a.ipynb new file mode 100644 index 0000000..5aaf980 --- /dev/null +++ b/Week6/exercise1a.ipynb @@ -0,0 +1,837 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing neural network classification using multiple variables with a little bit of preprocessing\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder # method to split dataset into 4\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.neural_network import MLPClassifier # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv(\"../Week3/breast_cancer.csv\") # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing\n", + "### Check for possible issues" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".sum of 0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "564 False\n", + "565 False\n", + "566 False\n", + "567 False\n", + "568 False\n", + "Length: 569, dtype: bool>" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().sum" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 569 entries, 0 to 568\n", + "Data columns (total 33 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 569 non-null int64 \n", + " 1 diagnosis 569 non-null object \n", + " 2 radius_mean 569 non-null float64\n", + " 3 texture_mean 569 non-null float64\n", + " 4 perimeter_mean 569 non-null float64\n", + " 5 area_mean 569 non-null float64\n", + " 6 smoothness_mean 569 non-null float64\n", + " 7 compactness_mean 569 non-null float64\n", + " 8 concavity_mean 569 non-null float64\n", + " 9 concave points_mean 569 non-null float64\n", + " 10 symmetry_mean 569 non-null float64\n", + " 11 fractal_dimension_mean 569 non-null float64\n", + " 12 radius_se 569 non-null float64\n", + " 13 texture_se 569 non-null float64\n", + " 14 perimeter_se 569 non-null float64\n", + " 15 area_se 569 non-null float64\n", + " 16 smoothness_se 569 non-null float64\n", + " 17 compactness_se 569 non-null float64\n", + " 18 concavity_se 569 non-null float64\n", + " 19 concave points_se 569 non-null float64\n", + " 20 symmetry_se 569 non-null float64\n", + " 21 fractal_dimension_se 569 non-null float64\n", + " 22 radius_worst 569 non-null float64\n", + " 23 texture_worst 569 non-null float64\n", + " 24 perimeter_worst 569 non-null float64\n", + " 25 area_worst 569 non-null float64\n", + " 26 smoothness_worst 569 non-null float64\n", + " 27 compactness_worst 569 non-null float64\n", + " 28 concavity_worst 569 non-null float64\n", + " 29 concave points_worst 569 non-null float64\n", + " 30 symmetry_worst 569 non-null float64\n", + " 31 fractal_dimension_worst 569 non-null float64\n", + " 32 Unnamed: 32 0 non-null float64\n", + "dtypes: float64(31), int64(1), object(1)\n", + "memory usage: 146.8+ KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddiagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_mean...texture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worstUnnamed: 32
0842302M17.9910.38122.801001.00.118400.277600.30010.14710...17.33184.602019.00.16220.66560.71190.26540.46010.11890NaN
1842517M20.5717.77132.901326.00.084740.078640.08690.07017...23.41158.801956.00.12380.18660.24160.18600.27500.08902NaN
284300903M19.6921.25130.001203.00.109600.159900.19740.12790...25.53152.501709.00.14440.42450.45040.24300.36130.08758NaN
384348301M11.4220.3877.58386.10.142500.283900.24140.10520...26.5098.87567.70.20980.86630.68690.25750.66380.17300NaN
484358402M20.2914.34135.101297.00.100300.132800.19800.10430...16.67152.201575.00.13740.20500.40000.16250.23640.07678NaN
\n", + "

5 rows × 33 columns

\n", + "
" + ], + "text/plain": [ + " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", + "0 842302 M 17.99 10.38 122.80 1001.0 \n", + "1 842517 M 20.57 17.77 132.90 1326.0 \n", + "2 84300903 M 19.69 21.25 130.00 1203.0 \n", + "3 84348301 M 11.42 20.38 77.58 386.1 \n", + "4 84358402 M 20.29 14.34 135.10 1297.0 \n", + "\n", + " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", + "0 0.11840 0.27760 0.3001 0.14710 \n", + "1 0.08474 0.07864 0.0869 0.07017 \n", + "2 0.10960 0.15990 0.1974 0.12790 \n", + "3 0.14250 0.28390 0.2414 0.10520 \n", + "4 0.10030 0.13280 0.1980 0.10430 \n", + "\n", + " ... texture_worst perimeter_worst area_worst smoothness_worst \\\n", + "0 ... 17.33 184.60 2019.0 0.1622 \n", + "1 ... 23.41 158.80 1956.0 0.1238 \n", + "2 ... 25.53 152.50 1709.0 0.1444 \n", + "3 ... 26.50 98.87 567.7 0.2098 \n", + "4 ... 16.67 152.20 1575.0 0.1374 \n", + "\n", + " compactness_worst concavity_worst concave points_worst symmetry_worst \\\n", + "0 0.6656 0.7119 0.2654 0.4601 \n", + "1 0.1866 0.2416 0.1860 0.2750 \n", + "2 0.4245 0.4504 0.2430 0.3613 \n", + "3 0.8663 0.6869 0.2575 0.6638 \n", + "4 0.2050 0.4000 0.1625 0.2364 \n", + "\n", + " fractal_dimension_worst Unnamed: 32 \n", + "0 0.11890 NaN \n", + "1 0.08902 NaN \n", + "2 0.08758 NaN \n", + "3 0.17300 NaN \n", + "4 0.07678 NaN \n", + "\n", + "[5 rows x 33 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Resolving issues" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# There are issues with this dataset - 'diagnosis' (our y object) MUST be an integer\n", + "# we can simply just create another column and fill that with the correct versions of values (ie 0, 1) & replace the intial column\n", + "fDiagnosis = pd.get_dummies(data[\"diagnosis\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n", + "# note: this creates a column for each categorical, where 1 represents in each column whether a row had that value set or not\n", + "data[\"diagnosis\"] = fDiagnosis.iloc[:, -1] # replace old w in new column" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Another issue - column 'id' & 'Unnamed 32' are unusable - delete them\n", + "data.drop(columns=\"id\", inplace=True) # remove old column\n", + "data.drop(columns=\"Unnamed: 32\", inplace=True) # remove old column" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
diagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst
0117.9910.38122.801001.00.118400.277600.30010.147100.2419...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
1120.5717.77132.901326.00.084740.078640.08690.070170.1812...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
2119.6921.25130.001203.00.109600.159900.19740.127900.2069...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
3111.4220.3877.58386.10.142500.283900.24140.105200.2597...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
4120.2914.34135.101297.00.100300.132800.19800.104300.1809...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", + "0 1 17.99 10.38 122.80 1001.0 \n", + "1 1 20.57 17.77 132.90 1326.0 \n", + "2 1 19.69 21.25 130.00 1203.0 \n", + "3 1 11.42 20.38 77.58 386.1 \n", + "4 1 20.29 14.34 135.10 1297.0 \n", + "\n", + " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", + "0 0.11840 0.27760 0.3001 0.14710 \n", + "1 0.08474 0.07864 0.0869 0.07017 \n", + "2 0.10960 0.15990 0.1974 0.12790 \n", + "3 0.14250 0.28390 0.2414 0.10520 \n", + "4 0.10030 0.13280 0.1980 0.10430 \n", + "\n", + " symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n", + "0 0.2419 ... 25.38 17.33 184.60 \n", + "1 0.1812 ... 24.99 23.41 158.80 \n", + "2 0.2069 ... 23.57 25.53 152.50 \n", + "3 0.2597 ... 14.91 26.50 98.87 \n", + "4 0.1809 ... 22.54 16.67 152.20 \n", + "\n", + " area_worst smoothness_worst compactness_worst concavity_worst \\\n", + "0 2019.0 0.1622 0.6656 0.7119 \n", + "1 1956.0 0.1238 0.1866 0.2416 \n", + "2 1709.0 0.1444 0.4245 0.4504 \n", + "3 567.7 0.2098 0.8663 0.6869 \n", + "4 1575.0 0.1374 0.2050 0.4000 \n", + "\n", + " concave points_worst symmetry_worst fractal_dimension_worst \n", + "0 0.2654 0.4601 0.11890 \n", + "1 0.1860 0.2750 0.08902 \n", + "2 0.2430 0.3613 0.08758 \n", + "3 0.2575 0.6638 0.17300 \n", + "4 0.1625 0.2364 0.07678 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Now view fixed data\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "x = data.iloc[:, 2:].values # values we want to classify - we only want\n", + "y = data.iloc[:, 0].values # acceptances for each row (ie either benign (0) or malignant (1))\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def fit_and_test(x_train, x_test, y_train, y_test):\n", + " model = MLPClassifier(random_state=0,max_iter=20000)\n", + " model.fit(x_train, y_train)\n", + " print(\"training score:\", model.score(x_train, y_train))\n", + " print(\"testing score:\", model.score(x_test, y_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### using unscaled data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.818e+01, 7.638e+01, 4.088e+02, ..., 1.416e-01, 2.660e-01,\n", + " 9.270e-02],\n", + " [1.518e+01, 9.397e+01, 6.401e+02, ..., 1.599e-01, 2.691e-01,\n", + " 7.683e-02],\n", + " [1.956e+01, 7.854e+01, 4.610e+02, ..., 1.080e-01, 2.668e-01,\n", + " 8.174e-02],\n", + " ...,\n", + " [1.762e+01, 7.079e+01, 3.656e+02, ..., 9.861e-02, 2.289e-01,\n", + " 8.278e-02],\n", + " [1.726e+01, 7.526e+01, 4.319e+02, ..., 5.356e-02, 2.779e-01,\n", + " 8.121e-02],\n", + " [1.658e+01, 6.585e+01, 3.208e+02, ..., 8.333e-02, 2.691e-01,\n", + " 9.479e-02]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min of unscaled train set: 0.005217\n", + "Max of unscaled train set: 528.1\n", + "Min of unscaled test set: 0.004768\n", + "Max of unscaled test set: 811.3\n" + ] + } + ], + "source": [ + "print(\"Min of unscaled train set:\", min(x_train.tolist()[0]))\n", + "print(\"Max of unscaled train set:\", max(x_train.tolist()[0]))\n", + "print(\"Min of unscaled test set:\", min(x_test.tolist()[0]))\n", + "print(\"Max of unscaled test set:\", max(x_test.tolist()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training score: 0.9107981220657277\n", + "testing score: 0.9090909090909091\n" + ] + } + ], + "source": [ + "fit_and_test(x_train, x_test, y_train, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Scale copy of model and train (main focus of this notebook)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "sc = StandardScaler()\n", + "x_train_scaled_s = sc.fit_transform(x_train) # get mean and variances, store and \n", + "x_test_scaled_s = sc.transform(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min of train scaled set: -0.6980307692070779\n", + "Max of train scaled set: 2.454856998389618\n", + "Min of test scaled set: -0.20461336960599075\n", + "Max of test scaled set: 2.941456724450779\n" + ] + } + ], + "source": [ + "print(\"Min of scaled train set:\", min(x_train_scaled_s.tolist()[0]))\n", + "print(\"Max of scaled train set:\", max(x_train_scaled_s.tolist()[0]))\n", + "print(\"Min of scaled test set:\", min(x_test_scaled_s.tolist()[0]))\n", + "print(\"Max of scaled test set:\", max(x_test_scaled_s.tolist()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training score: 0.9107981220657277\n", + "testing score: 0.9090909090909091\n" + ] + } + ], + "source": [ + "fit_and_test(x_train, x_test, y_train, y_test)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week6/exercise1b.ipynb b/Week6/exercise1b.ipynb new file mode 100644 index 0000000..0e23144 --- /dev/null +++ b/Week6/exercise1b.ipynb @@ -0,0 +1,563 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing feature scaling on a dataset\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import statistics # mean, median, etc.\n", + "\n", + "# Data visualisation functionality\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import seaborn as sns\n", + "\n", + "from sklearn.preprocessing import OneHotEncoder # method to split dataset into 4\n", + "from sklearn.model_selection import train_test_split # method to split dataset into 4\n", + "from sklearn.neural_network import MLPClassifier # linear regression algorithm\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "cols = [\"class\",\n", + " \"alcohol\",\n", + " \"malic_acid\",\n", + " \"ash\",\n", + " \"ashalcalinity\",\n", + " \"magnesium\",\n", + " \"total_phenols\",\n", + " \"flavanoids\", \n", + " \"nonflavanoid_phenols\",\n", + " \"proanthocyanins\",\n", + " \"color_intensity\",\n", + " \"hue\",\n", + " \"od280/od315\",\n", + " \"proline\"\n", + " ]\n", + "data = pd.read_csv(\"../Week4/wines.csv\", delimiter=',', names=cols) # import dataset with custom delimeter (ie vals are seperated by ;)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing\n", + "### Check for possible issues" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + ".sum of 0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + " ... \n", + "173 False\n", + "174 False\n", + "175 False\n", + "176 False\n", + "177 False\n", + "Length: 178, dtype: bool>" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.duplicated().sum" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "class 0\n", + "alcohol 0\n", + "malic_acid 0\n", + "ash 0\n", + "ashalcalinity 0\n", + "magnesium 0\n", + "total_phenols 0\n", + "flavanoids 0\n", + "nonflavanoid_phenols 0\n", + "proanthocyanins 0\n", + "color_intensity 0\n", + "hue 0\n", + "od280/od315 0\n", + "proline 0\n", + "dtype: int64" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 class 178 non-null int64 \n", + " 1 alcohol 178 non-null float64\n", + " 2 malic_acid 178 non-null float64\n", + " 3 ash 178 non-null float64\n", + " 4 ashalcalinity 178 non-null float64\n", + " 5 magnesium 178 non-null int64 \n", + " 6 total_phenols 178 non-null float64\n", + " 7 flavanoids 178 non-null float64\n", + " 8 nonflavanoid_phenols 178 non-null float64\n", + " 9 proanthocyanins 178 non-null float64\n", + " 10 color_intensity 178 non-null float64\n", + " 11 hue 178 non-null float64\n", + " 12 od280/od315 178 non-null float64\n", + " 13 proline 178 non-null int64 \n", + "dtypes: float64(11), int64(3)\n", + "memory usage: 19.6 KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
classalcoholmalic_acidashashalcalinitymagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315proline
0114.231.712.4315.61272.803.060.282.295.641.043.921065
1113.201.782.1411.21002.652.760.261.284.381.053.401050
2113.162.362.6718.61012.803.240.302.815.681.033.171185
3114.371.952.5016.81133.853.490.242.187.800.863.451480
4113.242.592.8721.01182.802.690.391.824.321.042.93735
\n", + "
" + ], + "text/plain": [ + " class alcohol malic_acid ash ashalcalinity magnesium total_phenols \\\n", + "0 1 14.23 1.71 2.43 15.6 127 2.80 \n", + "1 1 13.20 1.78 2.14 11.2 100 2.65 \n", + "2 1 13.16 2.36 2.67 18.6 101 2.80 \n", + "3 1 14.37 1.95 2.50 16.8 113 3.85 \n", + "4 1 13.24 2.59 2.87 21.0 118 2.80 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + "\n", + " od280/od315 proline \n", + "0 3.92 1065 \n", + "1 3.40 1050 \n", + "2 3.17 1185 \n", + "3 3.45 1480 \n", + "4 2.93 735 " + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# no issues found, proceed\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "x = data.iloc[:, 1:].values # values we want to use to classify\n", + "y = data.iloc[:, 0].values # independant var - class / quality of wine\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25)#, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "def fit_and_test(x_train, x_test, y_train, y_test):\n", + " model = MLPClassifier(random_state=0,max_iter=20000)\n", + " model.fit(x_train, y_train)\n", + " print(\"training score:\", model.score(x_train, y_train))\n", + " print(\"testing score:\", model.score(x_test, y_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### using unscaled data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Min of unscaled train set:\", min(x_train.tolist()[0]))\n", + "print(\"Max of unscaled train set:\", max(x_train.tolist()[0]))\n", + "print(\"Min of unscaled test set:\", min(x_test.tolist()[0]))\n", + "print(\"Max of unscaled test set:\", max(x_test.tolist()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training score: 1.0\n", + "testing score: 0.9777777777777777\n" + ] + } + ], + "source": [ + "# run on original version\n", + "fit_and_test(x_train, x_test, y_train, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Scale copy of model and train (main focus of this notebook)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "sc = StandardScaler()\n", + "x_train_scaled_s = sc.fit_transform(x_train) # get mean and variances, store and \n", + "x_test_scaled_s = sc.transform(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Min of scaled train set:\", min(x_train_scaled_s.tolist()[0]))\n", + "print(\"Max of scaled train set:\", max(x_train_scaled_s.tolist()[0]))\n", + "print(\"Min of scaled test set:\", min(x_test_scaled_s.tolist()[0]))\n", + "print(\"Max of scaled test set:\", max(x_test_scaled_s.tolist()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training score: 1.0\n", + "testing score: 0.9555555555555556\n" + ] + } + ], + "source": [ + "# run on scaled copy\n", + "fit_and_test(x_train_scaled_s, x_test_scaled_s, y_train, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import MinMaxScaler\n", + "mmc = MinMaxScaler()\n", + "x_train_scaled_mm = mmc.fit_transform(x_train) # get mean and variances, store and \n", + "x_test_scaled_mm = mmc.transform(x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min of scaled train set: 0.19144602851323828\n", + "Max of scaled train set: 0.6973684210526314\n", + "Min of scaled test set: 0.04887983706720975\n", + "Max of scaled test set: 0.6907216494845361\n" + ] + } + ], + "source": [ + "print(\"Min of scaled train set:\", min(x_train_scaled_mm.tolist()[0]))\n", + "print(\"Max of scaled train set:\", max(x_train_scaled_mm.tolist()[0]))\n", + "print(\"Min of scaled test set:\", min(x_test_scaled_mm.tolist()[0]))\n", + "print(\"Max of scaled test set:\", max(x_test_scaled_mm.tolist()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training score: 1.0\n", + "testing score: 0.9555555555555556\n" + ] + } + ], + "source": [ + "# run on scaled copy\n", + "fit_and_test(x_train_scaled_mm, x_test_scaled_mm, y_train, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Week6/exercise2.ipynb b/Week6/exercise2.ipynb new file mode 100644 index 0000000..ed33196 --- /dev/null +++ b/Week6/exercise2.ipynb @@ -0,0 +1,668 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook showing optimum hyperparameter selection (using grid / random search algorithms)\n", + "#### by Salih MSA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing\n", + "\n", + "### Importing libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "%matplotlib inline\n", + "\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n", + "\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Import dataset\n", + "cols = [\"sepal length\", \"sepal width\", \"petal length\", \"class\"] # create manual headings\n", + "df = pd.read_csv(\"../Week1/iris.data\", names=cols) # import dataset with custom headers, store" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data exploration & Preprocessing\n", + "### Check for possible issues" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal lengthsepal widthpetal lengthclass
5.13.51.40.2Iris-setosa
4.93.01.40.2Iris-setosa
4.73.21.30.2Iris-setosa
4.63.11.50.2Iris-setosa
5.03.61.40.2Iris-setosa
...............
6.73.05.22.3Iris-virginica
6.32.55.01.9Iris-virginica
6.53.05.22.0Iris-virginica
6.23.45.42.3Iris-virginica
5.93.05.11.8Iris-virginica
\n", + "

150 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " sepal length sepal width petal length class\n", + "5.1 3.5 1.4 0.2 Iris-setosa\n", + "4.9 3.0 1.4 0.2 Iris-setosa\n", + "4.7 3.2 1.3 0.2 Iris-setosa\n", + "4.6 3.1 1.5 0.2 Iris-setosa\n", + "5.0 3.6 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ...\n", + "6.7 3.0 5.2 2.3 Iris-virginica\n", + "6.3 2.5 5.0 1.9 Iris-virginica\n", + "6.5 3.0 5.2 2.0 Iris-virginica\n", + "6.2 3.4 5.4 2.3 Iris-virginica\n", + "5.9 3.0 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 4 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df # show dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Float64Index: 150 entries, 5.1 to 5.9\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 sepal length 150 non-null float64\n", + " 1 sepal width 150 non-null float64\n", + " 2 petal length 150 non-null float64\n", + " 3 class 150 non-null object \n", + "dtypes: float64(3), object(1)\n", + "memory usage: 5.9+ KB\n" + ] + } + ], + "source": [ + "df.info() # show basic stats" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Remedy issue (conversion of string categorical labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal lengthsepal widthpetal lengthclass
5.13.51.40.20
4.93.01.40.20
4.73.21.30.20
4.63.11.50.20
5.03.61.40.20
\n", + "
" + ], + "text/plain": [ + " sepal length sepal width petal length class\n", + "5.1 3.5 1.4 0.2 0\n", + "4.9 3.0 1.4 0.2 0\n", + "4.7 3.2 1.3 0.2 0\n", + "4.6 3.1 1.5 0.2 0\n", + "5.0 3.6 1.4 0.2 0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "le = LabelEncoder()\n", + "df[\"class\"] = le.fit_transform(df[\"class\"])\n", + "df.head() # show update table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning itself\n", + "### Split sets" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "x = df.iloc[:,:-1].values\n", + "y = df.iloc[:, -1].values\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training model (without any tuning)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "KNeighborsClassifier()" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model0 = KNeighborsClassifier()\n", + "model0.fit(x_train, y_train) # learning takes place here" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0\n", + " 2]\n", + "Predicted values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0\n", + " 1]\n" + ] + } + ], + "source": [ + "y_test_pred = model0.predict(x_test)\n", + "print(\"Predicted values:\", y_test_pred)\n", + "print(\"Predicted values:\", y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model accuracy:0.974\n" + ] + } + ], + "source": [ + "accuracy = model0.score(x_test, y_test)\n", + "print(\"Model accuracy:{:.3f}\".format(accuracy))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training model (using grid search to find best hyper-parameters for KNN)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "GridSearchCV(cv=4, estimator=KNeighborsClassifier(),\n", + " param_grid={'leaf_size': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n", + " 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,\n", + " 23, 24, 25, 26, 27, 28, 29, 30, ...],\n", + " 'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n", + " 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,\n", + " 23, 24, 25, 26, 27, 28, 29],\n", + " 'p': [1, 2]})" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# hyperparameters we wish to tune: leaf_size, n_neighbors, p\n", + "# we'll pass values to GridSearch to cycle through and select the best from them\n", + "hyperparameters = dict(leaf_size=list(range(1,50)), n_neighbors=list(range(1,30)), p=[1,2])\n", + "model1 = KNeighborsClassifier()\n", + "start = time.time()\n", + "clf1 = GridSearchCV(model1, hyperparameters, cv=4) # tell GridSearch we want the best values from knn, for select hyperparameters, \n", + " # setting the amount of cross validation (subdataset rotation of which is training) they'll do for each versionmodel\n", + " # change our models internal parameters\n", + "end = time.time()\n", + "clf1.fit(x_train, y_train) # learning takes place here" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0\n", + " 2]\n", + "Predicted values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0\n", + " 1]\n" + ] + } + ], + "source": [ + "y_test_pred = clf1.predict(x_test)\n", + "print(\"Predicted values:\", y_test_pred)\n", + "print(\"Predicted values:\", y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model accuracy:0.974\n" + ] + } + ], + "source": [ + "accuracy = clf1.score(x_test, y_test)\n", + "print(\"Model accuracy:{:.3f}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hyperparameters\n", + "\t Tuned Leaf size: 30\n", + "\t Tuned N_Neighbour: 5\n", + "\t Tuned P: 2\n", + "Time taken to evaluate and find best hyperparameters: 0.00022149085998535156\n" + ] + } + ], + "source": [ + "print(\"Hyperparameters\")\n", + "# for parameter in clf1.get_params():\n", + "# print(parameter)\n", + "print(\"\\t\", \"Tuned Leaf size:\", clf1.get_params()[\"estimator__leaf_size\"])\n", + "print(\"\\t\", \"Tuned N_Neighbour:\", clf1.get_params()[\"estimator__n_neighbors\"])\n", + "print(\"\\t\", \"Tuned P:\", clf1.get_params()[\"estimator__p\"])\n", + "print(\"Time taken to evaluate and find best hyperparameters:\", (end-start))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training model (using random search to find best hyper-parameters for KNN)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RandomizedSearchCV(cv=4, estimator=KNeighborsClassifier(),\n", + " param_distributions={'leaf_size': [1, 2, 3, 4, 5, 6, 7, 8, 9,\n", + " 10, 11, 12, 13, 14, 15,\n", + " 16, 17, 18, 19, 20, 21,\n", + " 22, 23, 24, 25, 26, 27,\n", + " 28, 29, 30, ...],\n", + " 'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8,\n", + " 9, 10, 11, 12, 13, 14,\n", + " 15, 16, 17, 18, 19, 20,\n", + " 21, 22, 23, 24, 25, 26,\n", + " 27, 28, 29],\n", + " 'p': [1, 2]})" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# hyperparameters we wish to tune: weights, n_neighbors, p\n", + "# we'll pass values to GridSearch to cycle through and select the best from them\n", + "hyperparameters = dict(leaf_size=list(range(1,50)), n_neighbors=list(range(1,30)), p=[1,2])\n", + "model2 = KNeighborsClassifier()\n", + "start = time.time()\n", + "clf2 = RandomizedSearchCV(model1, hyperparameters, cv=4) # tell GridSearch we want the best values from knn, for select hyperparameters, \n", + " # setting the amount of cross validation (subdataset rotation of which is training) they'll do for each versionmodel\n", + " # change our models internal parameters\n", + "end = time.time()\n", + "clf2.fit(x_train, y_train) # learning takes place here" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0\n", + " 2]\n", + "Predicted values: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0\n", + " 1]\n" + ] + } + ], + "source": [ + "y_test_pred = clf2.predict(x_test)\n", + "print(\"Predicted values:\", y_test_pred)\n", + "print(\"Predicted values:\", y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model accuracy:0.974\n" + ] + } + ], + "source": [ + "accuracy = clf2.score(x_test, y_test)\n", + "print(\"Model accuracy:{:.3f}\".format(accuracy))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hyperparameters\n", + "\t Tuned Leaf size: 30\n", + "\t Tuned N_Neighbour: 5\n", + "\t Tuned P: 2\n", + "Time taken to evaluate and find best hyperparameters: 9.036064147949219e-05\n" + ] + } + ], + "source": [ + "print(\"Hyperparameters\")\n", + "# for parameter in clf2.get_params():\n", + "# print(parameter)\n", + "print(\"\\t\", \"Tuned Leaf size:\", clf2.get_params()[\"estimator__leaf_size\"])\n", + "print(\"\\t\", \"Tuned N_Neighbour:\", clf2.get_params()[\"estimator__n_neighbors\"])\n", + "print(\"\\t\", \"Tuned P:\", clf2.get_params()[\"estimator__p\"])\n", + "print(\"Time taken to evaluate and find best hyperparameters:\", (end-start))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}