Skip to content
Permalink
8328ef52ee
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
1101 lines (1101 sloc) 78.2 KB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notebook showing linear regression using relevant independant variable(s) from a wide-ranging data set to calculate a single dependant\n",
"#### by Salih MSA"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Importing"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Importing libraries"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"# Data visualisation functionality\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns\n",
"\n",
"from sklearn.model_selection import train_test_split # method to split dataset into 4\n",
"from sklearn.linear_model import LinearRegression # linear regression algorithm\n",
"from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Importing data"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(\"headbrain.csv\") # import dataset (already contains headers)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data exploration & Preprocessing"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 237 entries, 0 to 236\n",
"Data columns (total 4 columns):\n",
" # Column Non-Null Count Dtype\n",
"--- ------ -------------- -----\n",
" 0 Gender 237 non-null int64\n",
" 1 Age Range 237 non-null int64\n",
" 2 Head Size(cm^3) 237 non-null int64\n",
" 3 Brain Weight(grams) 237 non-null int64\n",
"dtypes: int64(4)\n",
"memory usage: 7.5 KB\n"
]
}
],
"source": [
"data.info() # show basic stats"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Gender 0\n",
"Age Range 0\n",
"Head Size(cm^3) 0\n",
"Brain Weight(grams) 0\n",
"dtype: int64"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isnull().sum() # no null values"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.duplicated().any() # no duplicated data\n",
"# no further preprocessing needed"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Gender</th>\n",
" <th>Age Range</th>\n",
" <th>Head Size(cm^3)</th>\n",
" <th>Brain Weight(grams)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4512</td>\n",
" <td>1530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3738</td>\n",
" <td>1297</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4261</td>\n",
" <td>1335</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3777</td>\n",
" <td>1282</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4177</td>\n",
" <td>1590</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Gender Age Range Head Size(cm^3) Brain Weight(grams)\n",
"0 1 1 4512 1530\n",
"1 1 1 3738 1297\n",
"2 1 1 4261 1335\n",
"3 1 1 3777 1282\n",
"4 1 1 4177 1590"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head() # show first couple values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Variable extraction "
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1530 1297 1335 1282 1590 1300 1400 1255 1355 1375 1340 1380 1355 1522\n",
" 1208 1405 1358 1292 1340 1400 1357 1287 1275 1270 1635 1505 1490 1485\n",
" 1310 1420 1318 1432 1364 1405 1432 1207 1375 1350 1236 1250 1350 1320\n",
" 1525 1570 1340 1422 1506 1215 1311 1300 1224 1350 1335 1390 1400 1225\n",
" 1310 1560 1330 1222 1415 1175 1330 1485 1470 1135 1310 1154 1510 1415\n",
" 1468 1390 1380 1432 1240 1195 1225 1188 1252 1315 1245 1430 1279 1245\n",
" 1309 1412 1120 1220 1280 1440 1370 1192 1230 1346 1290 1165 1240 1132\n",
" 1242 1270 1218 1430 1588 1320 1290 1260 1425 1226 1360 1620 1310 1250\n",
" 1295 1290 1290 1275 1250 1270 1362 1300 1173 1256 1440 1180 1306 1350\n",
" 1125 1165 1312 1300 1270 1335 1450 1310 1027 1235 1260 1165 1080 1127\n",
" 1270 1252 1200 1290 1334 1380 1140 1243 1340 1168 1322 1249 1321 1192\n",
" 1373 1170 1265 1235 1302 1241 1078 1520 1460 1075 1280 1180 1250 1190\n",
" 1374 1306 1202 1240 1316 1280 1350 1180 1210 1127 1324 1210 1290 1100\n",
" 1280 1175 1160 1205 1163 1022 1243 1350 1237 1204 1090 1355 1250 1076\n",
" 1120 1220 1240 1220 1095 1235 1105 1405 1150 1305 1220 1296 1175 955\n",
" 1070 1320 1060 1130 1250 1225 1180 1178 1142 1130 1185 1012 1280 1103\n",
" 1408 1300 1246 1380 1350 1060 1350 1220 1110 1215 1104 1170 1120]\n"
]
}
],
"source": [
"# We want to use the correlation of COMBINED yet only RELEVANT variables to determine our model\n",
"# therefore, we essentially split the data into our single dependancy...\n",
"y = data.iloc[:,-1].values # dependant\n",
"print(y)"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Brain Weight(grams) Gender Age Range Head Size(cm^3)\n",
"Brain Weight(grams) 1.000000 -0.465266 -0.169438 0.799570\n",
"Gender -0.465266 1.000000 -0.088652 -0.514050\n",
"Age Range -0.169438 -0.088652 1.000000 -0.105428\n",
"Head Size(cm^3) 0.799570 -0.514050 -0.105428 1.000000\n"
]
},
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# ...& the rest, after being evaluated which variables have some promise regarding correlation, being used as combined independants\n",
"# to do this, we'll create a 'correlation matrix', which determines the correlations between each&every variable\n",
"# 'heatmaps' are the visual equivalent of a given correlation matrix\n",
"# then, we'll select the ones which predict the weight of one's brain (and have a high correlation to it)\n",
"# this process is called 'feature selection'!\n",
"numeric_col = [\"Brain Weight(grams)\", \"Gender\", \"Age Range\", \"Head Size(cm^3)\"] # only interested in the latter 3's relation to the brain weight\n",
"corr_matrix = data.loc[:,numeric_col].corr() # correlation matrix formation\n",
"print(corr_matrix) # print out pure text values\n",
"sns.heatmap(corr_matrix, annot=True) # use heatmap to visualise the correlation matrix"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[4512]\n",
" [3738]\n",
" [4261]\n",
" [3777]\n",
" [4177]\n",
" [3585]\n",
" [3785]\n",
" [3559]\n",
" [3613]\n",
" [3982]\n",
" [3443]\n",
" [3993]\n",
" [3640]\n",
" [4208]\n",
" [3832]\n",
" [3876]\n",
" [3497]\n",
" [3466]\n",
" [3095]\n",
" [4424]\n",
" [3878]\n",
" [4046]\n",
" [3804]\n",
" [3710]\n",
" [4747]\n",
" [4423]\n",
" [4036]\n",
" [4022]\n",
" [3454]\n",
" [4175]\n",
" [3787]\n",
" [3796]\n",
" [4103]\n",
" [4161]\n",
" [4158]\n",
" [3814]\n",
" [3527]\n",
" [3748]\n",
" [3334]\n",
" [3492]\n",
" [3962]\n",
" [3505]\n",
" [4315]\n",
" [3804]\n",
" [3863]\n",
" [4034]\n",
" [4308]\n",
" [3165]\n",
" [3641]\n",
" [3644]\n",
" [3891]\n",
" [3793]\n",
" [4270]\n",
" [4063]\n",
" [4012]\n",
" [3458]\n",
" [3890]\n",
" [4166]\n",
" [3935]\n",
" [3669]\n",
" [3866]\n",
" [3393]\n",
" [4442]\n",
" [4253]\n",
" [3727]\n",
" [3329]\n",
" [3415]\n",
" [3372]\n",
" [4430]\n",
" [4381]\n",
" [4008]\n",
" [3858]\n",
" [4121]\n",
" [4057]\n",
" [3824]\n",
" [3394]\n",
" [3558]\n",
" [3362]\n",
" [3930]\n",
" [3835]\n",
" [3830]\n",
" [3856]\n",
" [3249]\n",
" [3577]\n",
" [3933]\n",
" [3850]\n",
" [3309]\n",
" [3406]\n",
" [3506]\n",
" [3907]\n",
" [4160]\n",
" [3318]\n",
" [3662]\n",
" [3899]\n",
" [3700]\n",
" [3779]\n",
" [3473]\n",
" [3490]\n",
" [3654]\n",
" [3478]\n",
" [3495]\n",
" [3834]\n",
" [3876]\n",
" [3661]\n",
" [3618]\n",
" [3648]\n",
" [4032]\n",
" [3399]\n",
" [3916]\n",
" [4430]\n",
" [3695]\n",
" [3524]\n",
" [3571]\n",
" [3594]\n",
" [3383]\n",
" [3499]\n",
" [3589]\n",
" [3900]\n",
" [4114]\n",
" [3937]\n",
" [3399]\n",
" [4200]\n",
" [4488]\n",
" [3614]\n",
" [4051]\n",
" [3782]\n",
" [3391]\n",
" [3124]\n",
" [4053]\n",
" [3582]\n",
" [3666]\n",
" [3532]\n",
" [4046]\n",
" [3667]\n",
" [2857]\n",
" [3436]\n",
" [3791]\n",
" [3302]\n",
" [3104]\n",
" [3171]\n",
" [3572]\n",
" [3530]\n",
" [3175]\n",
" [3438]\n",
" [3903]\n",
" [3899]\n",
" [3401]\n",
" [3267]\n",
" [3451]\n",
" [3090]\n",
" [3413]\n",
" [3323]\n",
" [3680]\n",
" [3439]\n",
" [3853]\n",
" [3156]\n",
" [3279]\n",
" [3707]\n",
" [4006]\n",
" [3269]\n",
" [3071]\n",
" [3779]\n",
" [3548]\n",
" [3292]\n",
" [3497]\n",
" [3082]\n",
" [3248]\n",
" [3358]\n",
" [3803]\n",
" [3566]\n",
" [3145]\n",
" [3503]\n",
" [3571]\n",
" [3724]\n",
" [3615]\n",
" [3203]\n",
" [3609]\n",
" [3561]\n",
" [3979]\n",
" [3533]\n",
" [3689]\n",
" [3158]\n",
" [4005]\n",
" [3181]\n",
" [3479]\n",
" [3642]\n",
" [3632]\n",
" [3069]\n",
" [3394]\n",
" [3703]\n",
" [3165]\n",
" [3354]\n",
" [3000]\n",
" [3687]\n",
" [3556]\n",
" [2773]\n",
" [3058]\n",
" [3344]\n",
" [3493]\n",
" [3297]\n",
" [3360]\n",
" [3228]\n",
" [3277]\n",
" [3851]\n",
" [3067]\n",
" [3692]\n",
" [3402]\n",
" [3995]\n",
" [3318]\n",
" [2720]\n",
" [2937]\n",
" [3580]\n",
" [2939]\n",
" [2989]\n",
" [3586]\n",
" [3156]\n",
" [3246]\n",
" [3170]\n",
" [3268]\n",
" [3389]\n",
" [3381]\n",
" [2864]\n",
" [3740]\n",
" [3479]\n",
" [3647]\n",
" [3716]\n",
" [3284]\n",
" [4204]\n",
" [3735]\n",
" [3218]\n",
" [3685]\n",
" [3704]\n",
" [3214]\n",
" [3394]\n",
" [3233]\n",
" [3352]\n",
" [3391]]\n"
]
}
],
"source": [
"# relevant variable(s) shown as: [\"Head Size(cm^2)\"]\n",
"x = data.iloc[:, 2:3].values\n",
"print(x)"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Now lets visualise the plot itself\n",
"plt.title('head size x brain weight')\n",
"plt.xlabel('head size')\n",
"plt.ylabel('brain weight')\n",
"plt.scatter(x, y, alpha=0.5) # ...where alpha is size of points\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Learning itself\n",
"### Split sets"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [],
"source": [
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Train dataset"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
"model = LinearRegression()\n",
"model.fit(x_train, y_train)\n",
"predictions = model.predict(x_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Test accuracy using testing values"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Predicted values:</th>\n",
" <th>Actual values</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1306.622675</td>\n",
" <td>1280</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1295.362866</td>\n",
" <td>1321</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1385.441338</td>\n",
" <td>1425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1255.441725</td>\n",
" <td>1250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1321.465150</td>\n",
" <td>1350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1286.918009</td>\n",
" <td>1408</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1502.133904</td>\n",
" <td>1440</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1257.744868</td>\n",
" <td>1210</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1385.953147</td>\n",
" <td>1422</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1339.122578</td>\n",
" <td>1405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1406.425527</td>\n",
" <td>1362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>1138.493254</td>\n",
" <td>1150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>1202.725346</td>\n",
" <td>1175</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>1334.772197</td>\n",
" <td>1430</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>1221.406393</td>\n",
" <td>1120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>1233.689821</td>\n",
" <td>1192</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>1283.079438</td>\n",
" <td>1163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>1355.756387</td>\n",
" <td>1360</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>1243.926011</td>\n",
" <td>1160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>1278.217248</td>\n",
" <td>1355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>1238.552011</td>\n",
" <td>1225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>1418.197146</td>\n",
" <td>1370</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>1353.453244</td>\n",
" <td>1440</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>1271.051915</td>\n",
" <td>1300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>1327.095055</td>\n",
" <td>1275</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>1312.764389</td>\n",
" <td>1350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>1165.107348</td>\n",
" <td>1127</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>1285.126676</td>\n",
" <td>1355</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>1487.291428</td>\n",
" <td>1620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>1084.753256</td>\n",
" <td>1027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>1189.930109</td>\n",
" <td>1142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>1272.075534</td>\n",
" <td>1250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>1243.926011</td>\n",
" <td>1103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>1378.531910</td>\n",
" <td>1280</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>1342.193435</td>\n",
" <td>1340</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>1227.036298</td>\n",
" <td>1322</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>1326.839150</td>\n",
" <td>1374</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>1375.972862</td>\n",
" <td>1296</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>1332.213150</td>\n",
" <td>1240</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>1246.740963</td>\n",
" <td>1132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>1340.913911</td>\n",
" <td>1390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>1145.658587</td>\n",
" <td>1340</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>1144.379063</td>\n",
" <td>1168</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>1345.520197</td>\n",
" <td>1405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>1441.996288</td>\n",
" <td>1485</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>1264.398391</td>\n",
" <td>1255</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>1223.965441</td>\n",
" <td>1140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>1158.453825</td>\n",
" <td>1202</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>1147.961730</td>\n",
" <td>1080</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>1139.005063</td>\n",
" <td>1022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>1224.221345</td>\n",
" <td>1220</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>1290.756580</td>\n",
" <td>1230</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>1269.004677</td>\n",
" <td>1245</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>1418.453051</td>\n",
" <td>1405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>1222.174107</td>\n",
" <td>1195</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>1192.745061</td>\n",
" <td>1265</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>1139.516873</td>\n",
" <td>1078</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>1184.812014</td>\n",
" <td>1250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>1307.390389</td>\n",
" <td>1470</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>1105.737446</td>\n",
" <td>1060</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Predicted values: Actual values\n",
"0 1306.622675 1280\n",
"1 1295.362866 1321\n",
"2 1385.441338 1425\n",
"3 1255.441725 1250\n",
"4 1321.465150 1350\n",
"5 1286.918009 1408\n",
"6 1502.133904 1440\n",
"7 1257.744868 1210\n",
"8 1385.953147 1422\n",
"9 1339.122578 1405\n",
"10 1406.425527 1362\n",
"11 1138.493254 1150\n",
"12 1202.725346 1175\n",
"13 1334.772197 1430\n",
"14 1221.406393 1120\n",
"15 1233.689821 1192\n",
"16 1283.079438 1163\n",
"17 1355.756387 1360\n",
"18 1243.926011 1160\n",
"19 1278.217248 1355\n",
"20 1238.552011 1225\n",
"21 1418.197146 1370\n",
"22 1353.453244 1440\n",
"23 1271.051915 1300\n",
"24 1327.095055 1275\n",
"25 1312.764389 1350\n",
"26 1165.107348 1127\n",
"27 1285.126676 1355\n",
"28 1487.291428 1620\n",
"29 1084.753256 1027\n",
"30 1189.930109 1142\n",
"31 1272.075534 1250\n",
"32 1243.926011 1103\n",
"33 1378.531910 1280\n",
"34 1342.193435 1340\n",
"35 1227.036298 1322\n",
"36 1326.839150 1374\n",
"37 1375.972862 1296\n",
"38 1332.213150 1240\n",
"39 1246.740963 1132\n",
"40 1340.913911 1390\n",
"41 1145.658587 1340\n",
"42 1144.379063 1168\n",
"43 1345.520197 1405\n",
"44 1441.996288 1485\n",
"45 1264.398391 1255\n",
"46 1223.965441 1140\n",
"47 1158.453825 1202\n",
"48 1147.961730 1080\n",
"49 1139.005063 1022\n",
"50 1224.221345 1220\n",
"51 1290.756580 1230\n",
"52 1269.004677 1245\n",
"53 1418.453051 1405\n",
"54 1222.174107 1195\n",
"55 1192.745061 1265\n",
"56 1139.516873 1078\n",
"57 1184.812014 1250\n",
"58 1307.390389 1470\n",
"59 1105.737446 1060"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with\n",
"pred_vs_actual = pd.DataFrame({\"Predicted values:\": y_test_pred, \"Actual values\": y_test})\n",
"pred_vs_actual"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean squared error: 5264.955252408745\n",
"Mean absolute error: 59.995743221875\n",
"Model accuracy: 0.669\n"
]
}
],
"source": [
"print(\"Mean squared error:\", mean_squared_error(y_test, y_test_pred))\n",
"print(\"Mean absolute error:\", mean_absolute_error(y_test, y_test_pred))\n",
"accuracy = model.score(x_test, y_test) # or simply called score method to use the models inherent predictions vs a dataset / subset we give it \n",
"print(\"Model accuracy: {:.3f}\".format(accuracy)) "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}