Skip to content
Permalink
8328ef52ee
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
924 lines (924 sloc) 28.1 KB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notebook showing more logisitic regression using multiple variables involving lots of preprocessing\n",
"#### by Salih MSA"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Importing"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Importing libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import statistics # mean, median, etc.\n",
"\n",
"# Data visualisation functionality\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns\n",
"\n",
"from sklearn.preprocessing import OneHotEncoder # method to preprocess data (specifically converting columns->categorical datas)\n",
"from sklearn.model_selection import train_test_split # method to split dataset into 4\n",
"from sklearn.linear_model import LogisticRegression # linear regression algorithm\n",
"from sklearn.metrics import mean_squared_error, mean_absolute_error # accuracy testing method"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Importing data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(\"titanic.csv\") # import dataset with custom headers, store"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data exploration & Preprocessing\n",
"### Check for possible issues"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<bound method NDFrame._add_numeric_operations.<locals>.sum of PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket \\\n",
"0 False False False False False False False False False \n",
"1 False False False False False False False False False \n",
"2 False False False False False False False False False \n",
"3 False False False False False False False False False \n",
"4 False False False False False False False False False \n",
".. ... ... ... ... ... ... ... ... ... \n",
"886 False False False False False False False False False \n",
"887 False False False False False False False False False \n",
"888 False False False False False True False False False \n",
"889 False False False False False False False False False \n",
"890 False False False False False False False False False \n",
"\n",
" Fare Cabin Embarked \n",
"0 False True False \n",
"1 False False False \n",
"2 False True False \n",
"3 False False False \n",
"4 False True False \n",
".. ... ... ... \n",
"886 False True False \n",
"887 False False False \n",
"888 False True False \n",
"889 False False False \n",
"890 False True False \n",
"\n",
"[891 rows x 12 columns]>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.isnull().sum"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<bound method NDFrame._add_numeric_operations.<locals>.sum of 0 False\n",
"1 False\n",
"2 False\n",
"3 False\n",
"4 False\n",
" ... \n",
"886 False\n",
"887 False\n",
"888 False\n",
"889 False\n",
"890 False\n",
"Length: 891, dtype: bool>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.duplicated().sum"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 891 entries, 0 to 890\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 PassengerId 891 non-null int64 \n",
" 1 Survived 891 non-null int64 \n",
" 2 Pclass 891 non-null int64 \n",
" 3 Name 891 non-null object \n",
" 4 Sex 891 non-null object \n",
" 5 Age 714 non-null float64\n",
" 6 SibSp 891 non-null int64 \n",
" 7 Parch 891 non-null int64 \n",
" 8 Ticket 891 non-null object \n",
" 9 Fare 891 non-null float64\n",
" 10 Cabin 204 non-null object \n",
" 11 Embarked 889 non-null object \n",
"dtypes: float64(2), int64(5), object(5)\n",
"memory usage: 83.7+ KB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>886</th>\n",
" <td>887</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>Montvila, Rev. Juozas</td>\n",
" <td>male</td>\n",
" <td>27.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>211536</td>\n",
" <td>13.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>887</th>\n",
" <td>888</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Graham, Miss. Margaret Edith</td>\n",
" <td>female</td>\n",
" <td>19.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>112053</td>\n",
" <td>30.0000</td>\n",
" <td>B42</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>888</th>\n",
" <td>889</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
" <td>female</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>W./C. 6607</td>\n",
" <td>23.4500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>889</th>\n",
" <td>890</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Behr, Mr. Karl Howell</td>\n",
" <td>male</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>111369</td>\n",
" <td>30.0000</td>\n",
" <td>C148</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>890</th>\n",
" <td>891</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Dooley, Mr. Patrick</td>\n",
" <td>male</td>\n",
" <td>32.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>370376</td>\n",
" <td>7.7500</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>891 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
".. ... ... ... \n",
"886 887 0 2 \n",
"887 888 1 1 \n",
"888 889 0 3 \n",
"889 890 1 1 \n",
"890 891 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
".. ... ... ... ... \n",
"886 Montvila, Rev. Juozas male 27.0 0 \n",
"887 Graham, Miss. Margaret Edith female 19.0 0 \n",
"888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n",
"889 Behr, Mr. Karl Howell male 26.0 0 \n",
"890 Dooley, Mr. Patrick male 32.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S \n",
".. ... ... ... ... ... \n",
"886 0 211536 13.0000 NaN S \n",
"887 0 112053 30.0000 B42 S \n",
"888 2 W./C. 6607 23.4500 NaN S \n",
"889 0 111369 30.0000 C148 C \n",
"890 0 370376 7.7500 NaN Q \n",
"\n",
"[891 rows x 12 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Resolving issues"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# turn any non numerical (be that string or conti. values) categories -> numerical ones\n",
"# we can simply just create another column and fill that with the correct versions of values (ie 0, 1) & replace the intial column\n",
"fSex = pd.get_dummies(data[\"Sex\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n",
"data[\"Sex\"] = fSex.iloc[:, -1] # replace old w in new column\n",
"fEmbarked = pd.get_dummies(data[\"Embarked\"]) # use 'get_dummies' method converts categorical variable into dummy/indicator variables\n",
"data[\"Embarked\"] = fEmbarked.iloc[:, -1] # replace old w in new column"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Another issue - many columns are irrelevant - delete them\n",
"data.drop(columns=\"PassengerId\", inplace=True) # remove old column\n",
"data.drop(columns=\"Name\", inplace=True) # remove old column\n",
"data.drop(columns=\"Ticket\", inplace=True) # remove old column"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Some columns contain too many NaN's (IEEE NotANumber) to make it non-viable to delete the rows afflicted - delete them too\n",
"data.drop(columns=\"Cabin\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# now drop the rows containing NaN's\n",
"data = data.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7.2500</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>71.2833</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>53.1000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>885</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>39.0</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>29.1250</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>886</th>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>27.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13.0000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>887</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>19.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>30.0000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>889</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>30.0000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>890</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>32.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7.7500</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>714 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" Survived Pclass Sex Age SibSp Parch Fare Embarked\n",
"0 0 3 1 22.0 1 0 7.2500 1\n",
"1 1 1 0 38.0 1 0 71.2833 0\n",
"2 1 3 0 26.0 0 0 7.9250 1\n",
"3 1 1 0 35.0 1 0 53.1000 1\n",
"4 0 3 1 35.0 0 0 8.0500 1\n",
".. ... ... ... ... ... ... ... ...\n",
"885 0 3 0 39.0 0 5 29.1250 0\n",
"886 0 2 1 27.0 0 0 13.0000 1\n",
"887 1 1 0 19.0 0 0 30.0000 1\n",
"889 1 1 1 26.0 0 0 30.0000 0\n",
"890 0 3 1 32.0 0 0 7.7500 0\n",
"\n",
"[714 rows x 8 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Now view fixed data\n",
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Learning itself\n",
"### Split sets"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"x: [[ 1. 22. 1. 0. 7.25 1. ]\n",
" [ 0. 38. 1. 0. 71.2833 0. ]\n",
" [ 0. 26. 0. 0. 7.925 1. ]\n",
" ...\n",
" [ 0. 19. 0. 0. 30. 1. ]\n",
" [ 1. 26. 0. 0. 30. 0. ]\n",
" [ 1. 32. 0. 0. 7.75 0. ]]\n",
"y: 0 0\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 0\n",
" ..\n",
"885 0\n",
"886 0\n",
"887 1\n",
"889 1\n",
"890 0\n",
"Name: Survived, Length: 714, dtype: int64\n"
]
}
],
"source": [
"x = data.iloc[:, 2:].values # values we want to classify - we only want\n",
"print(\"x:\", x)\n",
"y = data.iloc[:, 0].values # acceptances for each row (ie either benign (0) or malignant (1))\n",
"print(\"y:\", data.iloc[:, 0])\n",
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0) # split dataset into train, test"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Train model"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"model = LogisticRegression(max_iter=20000)\n",
"model.fit(x_train, y_train)\n",
"predictions = model.predict(x_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Test accuracy using testing values"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Predicted values:</th>\n",
" <th>Actual values</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>175</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>176</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>177</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>179 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Predicted values: Actual values\n",
"0 1 0\n",
"1 1 0\n",
"2 1 1\n",
"3 0 0\n",
"4 0 1\n",
".. ... ...\n",
"174 0 0\n",
"175 0 0\n",
"176 1 1\n",
"177 1 1\n",
"178 0 0\n",
"\n",
"[179 rows x 2 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test_pred = model.predict(x_test) # based on our model, give it values to try to predict with\n",
"pred_vs_actual = pd.DataFrame({\"Predicted values:\": y_test_pred, \"Actual values\": y_test})\n",
"pred_vs_actual"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean squared error: 0.22905027932960895\n",
"Mean absolute error: 0.22905027932960895\n",
"Model accuracy: 0.771\n"
]
}
],
"source": [
"print(\"Mean squared error:\", mean_squared_error(y_test, y_test_pred))\n",
"print(\"Mean absolute error:\", mean_absolute_error(y_test, y_test_pred))\n",
"accuracy = model.score(x_test, y_test) # or simply called score method to use the models inherent predictions vs a dataset / subset we give it \n",
"print(\"Model accuracy: {:.3f}\".format(accuracy)) "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}