Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# importing useful libs "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"%matplotlib inline\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.linear_model import LogisticRegression\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import classification_report"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# IMPORT DATA from CSV FILES "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>891.000000</td>\n",
" <td>891.000000</td>\n",
" <td>891.000000</td>\n",
" <td>891</td>\n",
" <td>891</td>\n",
" <td>714.000000</td>\n",
" <td>891.000000</td>\n",
" <td>891.000000</td>\n",
" <td>891</td>\n",
" <td>891.000000</td>\n",
" <td>204</td>\n",
" <td>889</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>891</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>681</td>\n",
" <td>NaN</td>\n",
" <td>147</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Soholt, Mr. Peter Andreas Lauritz Andersen</td>\n",
" <td>male</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CA. 2343</td>\n",
" <td>NaN</td>\n",
" <td>C23 C25 C27</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>577</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>644</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>446.000000</td>\n",
" <td>0.383838</td>\n",
" <td>2.308642</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>29.699118</td>\n",
" <td>0.523008</td>\n",
" <td>0.381594</td>\n",
" <td>NaN</td>\n",
" <td>32.204208</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>257.353842</td>\n",
" <td>0.486592</td>\n",
" <td>0.836071</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>14.526497</td>\n",
" <td>1.102743</td>\n",
" <td>0.806057</td>\n",
" <td>NaN</td>\n",
" <td>49.693429</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.420000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>223.500000</td>\n",
" <td>0.000000</td>\n",
" <td>2.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>20.125000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>7.910400</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>446.000000</td>\n",
" <td>0.000000</td>\n",
" <td>3.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>28.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>14.454200</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>668.500000</td>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>38.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>31.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>891.000000</td>\n",
" <td>1.000000</td>\n",
" <td>3.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>80.000000</td>\n",
" <td>8.000000</td>\n",
" <td>6.000000</td>\n",
" <td>NaN</td>\n",
" <td>512.329200</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"count 891.000000 891.000000 891.000000 \n",
"unique NaN NaN NaN \n",
"top NaN NaN NaN \n",
"freq NaN NaN NaN \n",
"mean 446.000000 0.383838 2.308642 \n",
"std 257.353842 0.486592 0.836071 \n",
"min 1.000000 0.000000 1.000000 \n",
"25% 223.500000 0.000000 2.000000 \n",
"50% 446.000000 0.000000 3.000000 \n",
"75% 668.500000 1.000000 3.000000 \n",
"max 891.000000 1.000000 3.000000 \n",
"\n",
" Name Sex Age \\\n",
"count 891 891 714.000000 \n",
"unique 891 2 NaN \n",
"top Soholt, Mr. Peter Andreas Lauritz Andersen male NaN \n",
"freq 1 577 NaN \n",
"mean NaN NaN 29.699118 \n",
"std NaN NaN 14.526497 \n",
"min NaN NaN 0.420000 \n",
"25% NaN NaN 20.125000 \n",
"50% NaN NaN 28.000000 \n",
"75% NaN NaN 38.000000 \n",
"max NaN NaN 80.000000 \n",
"\n",
" SibSp Parch Ticket Fare Cabin Embarked \n",
"count 891.000000 891.000000 891 891.000000 204 889 \n",
"unique NaN NaN 681 NaN 147 3 \n",
"top NaN NaN CA. 2343 NaN C23 C25 C27 S \n",
"freq NaN NaN 7 NaN 4 644 \n",
"mean 0.523008 0.381594 NaN 32.204208 NaN NaN \n",
"std 1.102743 0.806057 NaN 49.693429 NaN NaN \n",
"min 0.000000 0.000000 NaN 0.000000 NaN NaN \n",
"25% 0.000000 0.000000 NaN 7.910400 NaN NaN \n",
"50% 0.000000 0.000000 NaN 14.454200 NaN NaN \n",
"75% 1.000000 0.000000 NaN 31.000000 NaN NaN \n",
"max 8.000000 6.000000 NaN 512.329200 NaN NaN "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#import train and test CSV files\n",
"df = pd.read_csv(r\"C:\\Users\\Muejr\\OneDrive\\Desktop\\abdullah\\train.csv\")\n",
"df.describe(include=\"all\")\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PassengerId 0\n",
"Survived 0\n",
"Pclass 0\n",
"Name 0\n",
"Sex 0\n",
"Age 177\n",
"SibSp 0\n",
"Parch 0\n",
"Ticket 0\n",
"Fare 0\n",
"Cabin 687\n",
"Embarked 2\n",
"dtype: int64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.isnull(df).sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# checking the data (graphics)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" survived females: 74.20382165605095\n",
" survived males : 18.890814558058924\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAUK0lEQVR4nO3df5BdZ33f8ffHaxTXxpgSbSMqiVgFgSNS2+BFhA4JpolBpmkFhRQZTx0TEo1aBP1ljNM0TotDU+yWSYjlKCqjOulkUOiYgkiVKIQkDjWh0bqxZctGdCuBtZJVVrgB2WEwa3/7x712r+/eXV3Le3Ylnfdr5o7Oj+ee/cq+0kfnued5nlQVkqT2OmuxC5AkLS6DQJJaziCQpJYzCCSp5QwCSWq5sxe7gGdr6dKldeGFFy52GZJ0Wrn77ruPVdXooHOnXRBceOGFjI+PL3YZknRaSfK12c7ZNSRJLWcQSFLLGQSS1HKNBkGSdUn2J5lIcsOA8xck+WySe5PsS/LuJuuRJM3UWBAkGQG2AFcCa4Crkqzpa/Ze4IGqugS4HPgPSZY0VZMkaaYm7wjWAhNVdaCqHgd2AOv72hRwfpIAzwceAaYbrEmS1KfJIFgOHOrZn+we63Ur8APAEeA+4J9U1ZP9F0qyMcl4kvGpqamm6pWkVmoyCDLgWP+c128G7gH+OnApcGuSF8x4U9W2qhqrqrHR0YHjISRJJ6nJAWWTwMqe/RV0/uXf693Av6vOoggTSQ4CFwF/1mBdkk5x119/PUePHmXZsmXcfPPNi13OGa/JO4I9wOokq7pfAG8Adva1eQj4UYAk3we8AjjQYE2STgNHjx7l8OHDHD16dLFLaYXG7giqajrJZmA3MAJsr6p9STZ1z28FbgJuT3Ifna6kD1bVsaZqkiTN1OhcQ1W1C9jVd2xrz/YR4E1N1iBJmpsjiyWp5QwCSWo5g0CSWs4gkKSWMwgkqeUMAklqOYNAklrOIJCkljMIJKnlGh1ZLOnZeehDf3OxSzglTD/yIuBsph/5mv9NgJfceF+j1/eOQJJaziCQpJYzCCSp5QwCSWo5g0CSWs4gkKSWMwgkqeUaDYIk65LsTzKR5IYB5z+Q5J7u6/4kTyR5UZM1SZKeqbEgSDICbAGuBNYAVyVZ09umqm6pqkur6lLgZ4E7q+qRpmqSJM3U5B3BWmCiqg5U1ePADmD9HO2vAj7RYD2SpAGaDILlwKGe/cnusRmSnAusA+6Y5fzGJONJxqempua9UElqsyaDIAOO1Sxt/y5w12zdQlW1rarGqmpsdHR03gqUJDUbBJPAyp79FcCRWdpuwG4hSVoUTQbBHmB1klVJltD5y35nf6MkFwBvAD7TYC2STiNLz3mS7/sr0yw958nFLqUVGpuGuqqmk2wGdgMjwPaq2pdkU/f81m7TtwG/X1WPNVWLpNPLdRf/xWKX0CqNrkdQVbuAXX3Htvbt3w7c3mQdkqTZObJYklrOIJCkljMIJKnlDAJJajmDQJJaziCQpJYzCCSp5QwCSWo5g0CSWs4gkKSWMwgkqeUMAklqOYNAklrOIJCkljMIJKnlDAJJajmDQJJartEgSLIuyf4kE0lumKXN5UnuSbIvyZ1N1iNJmqmxpSqTjABbgCuASWBPkp1V9UBPmxcCtwHrquqhJH+tqXokSYM1eUewFpioqgNV9TiwA1jf1+ZdwKeq6iGAqvp6g/VIkgZoMgiWA4d69ie7x3q9HPirSf44yd1Jrhl0oSQbk4wnGZ+ammqoXElqpyaDIAOOVd/+2cBlwN8B3gz8fJKXz3hT1baqGquqsdHR0fmvVJJarLHvCOjcAazs2V8BHBnQ5lhVPQY8luRPgEuArzRYlySpR5N3BHuA1UlWJVkCbAB29rX5DPDDSc5Oci7wWuDBBmuSJPVp7I6gqqaTbAZ2AyPA9qral2RT9/zWqnowye8Be4EngY9X1f1N1SRJmqnJriGqahewq+/Y1r79W4BbmqxDkjQ7RxZLUssZBJLUcgaBJLWcQSBJLWcQSFLLGQSS1HIGgSS1nEEgSS1nEEhSyxkEktRyBoEktZxBIEktZxBIUssZBJLUcgaBJLWcQSBJLWcQSFLLGQSS1HKNBkGSdUn2J5lIcsOA85cn+WaSe7qvG5usR5I0U2NrFicZAbYAVwCTwJ4kO6vqgb6mX6iqH2+qDknS3Jq8I1gLTFTVgap6HNgBrG/w50mSTkKTQbAcONSzP9k91u91Se5N8rtJXjnoQkk2JhlPMj41NdVErZLUWk0GQQYcq779/wl8f1VdAvwq8OlBF6qqbVU1VlVjo6Oj81ymJLVbk0EwCazs2V8BHOltUFXfqqpHu9u7gOclWdpgTZKkPk0GwR5gdZJVSZYAG4CdvQ2SLEuS7vbabj3faLAmSVKfOZ8aSnKcmd05T6uqF8xxbjrJZmA3MAJsr6p9STZ1z28F3gH8oyTTwLeBDVU168+TJM2/OYOgqs4HSPIh4Cjwn+n0/V8NnH+ii3e7e3b1Hdvas30rcOuzrlqSNG+G7Rp6c1XdVlXHu/36vwa8vcnCJEkLY9ggeCLJ1UlGkpyV5GrgiSYLkyQtjGGD4F3APwD+T/f1E91jkqTT3FBTTFTVV3FUsCSdkYa6I0jy8iSfT3J/d//iJP+q2dIkSQth2K6h/wj8LPBdgKraS2dcgCTpNDdsEJxbVX/Wd2x6vouRJC28YYPgWJKX0h1cluQdwMONVSVJWjDDrkfwXmAbcFGSw8BBOoPKJEmnuWGD4GtV9WNJzgPOqqrjTRYlSVo4w3YNHUyyDfgh4NEG65EkLbBhg+AVwB/Q6SI6mOTWJK9vrixJ0kIZKgiq6ttV9cmq+vvAq4AXAHc2WpkkaUEMvR5BkjckuY3OqmLn0JlyQpJ0mhvqy+IkB4F7gE8CH6iqxxqtSpK0YIZ9auiSqvpWo5VIkhbFiVYou76qbgY+nGTGymFV9f7GKpMkLYgTfUfwYPfXceDuAa85JVmXZH+SiSQ3zNHuNUme6I5YliQtoBMtVfnZ7ubeqvrzZ3PhJCPAFuAKYBLYk2RnVT0woN1H6KxtLElaYMM+NfTRJF9OclOSVw75nrXARFUdqKrHgR0MXtPgfcAdwNeHvK4kaR4NO47gjcDlwBSwLcl9Q6xHsBw41LM/2T32tCTLgbcBW5lDko1JxpOMT01NDVOyJGlIQ48jqKqjVfUxYBOdR0lvPMFbMugyffu/DHywquZc/7iqtlXVWFWNjY6ODluyJGkIw44j+AHgncA7gG/Q6eb5Fyd42ySwsmd/BXCkr80YsCMJwFLgLUmmq+rTw9QlSXruhh1H8J+ATwBvqqr+v8xnswdYnWQVcJjOimbPWPC+qlY9tZ3kduB3DAFJWlgnDILuUz3/u6p+5dlcuKqmk2ym8zTQCLC9qvYl2dQ9P+f3ApKkhXHCIKiqJ5J8b5Il3ad/hlZVu4BdfccGBkBVXftsri1Jmh9DL0wD3JVkJ/D0PENV9dFGqpIkLZhhg+BI93UWcH5z5UiSFtpQQVBV/6bpQiRJi2PYx0f/iJljAKiqvz3vFUmSFtSwXUPX9WyfA7wdmJ7/ciRJC23YrqH+mUbvSuJSlZJ0Bhi2a+hFPbtn0RkRvKyRiiRJC2rYrqG7+f/fEUwDXwXe00RBkqSFdaIVyl4DHHpqKogkP0nn+4GvAg/M8VZJ0mniRLOP/jrwOECSHwF+CfgN4JvAtmZLkyQthBN1DY1U1SPd7XcC26rqDuCOJPc0W5okaSGc6I5gJMlTYfGjwB/2nBv2+wVJ0insRH+ZfwK4M8kx4NvAFwCSvIxO95Ak6TR3osXrP5zk88CLgd+vqqeeHDqLzlrDkqTT3DDTUH9pwLGvNFOOJGmhDb1msSTpzGQQSFLLNRoESdYl2Z9kIskNA86vT7I3yT1JxpO8vsl6JEkzNfYIaHet4y3AFcAksCfJzqrqHZH8eWBnVVWSi4FPAhc1VZMkaaYm7wjWAhNVdaC71vEOYH1vg6p6tOdJpPMYsOaBJKlZTQbBcuBQz/5k99gzJHlbki8D/w34qUEXSrKx23U0PjU11UixktRWTQZBBhwbtMrZf62qi4C3AjcNulBVbauqsaoaGx0dnecyJandmgyCSWBlz/4K4MhsjavqT4CXJlnaYE2SpD5NBsEeYHWSVUmWABuAnb0NkrwsSbrbrwaWAN9osCZJUp/Gnhqqqukkm4HdwAiwvar2JdnUPb+VztoG1yT5Lp25jN7Z8+WxJGkBNDqDaFXtAnb1Hdvas/0R4CNN1iBJmpsjiyWp5QwCSWo5g0CSWs4gkKSWMwgkqeUMAklqOYNAklrOIJCkljMIJKnlDAJJajmDQJJaziCQpJYzCCSp5QwCSWq5Rqeh1qnt+uuv5+jRoyxbtoybb755scuRtEgMghY7evQohw8fXuwyJC0yu4YkqeUaDYIk65LsTzKR5IYB569Osrf7+mKSS5qsR5I0U2NBkGQE2AJcCawBrkqypq/ZQeANVXUxcBOwral6JEmDNXlHsBaYqKoDVfU4sANY39ugqr5YVf+3u/slYEWD9UiSBmgyCJYDh3r2J7vHZvMe4HcHnUiyMcl4kvGpqal5LFGS1GQQZMCxGtgweSOdIPjgoPNVta2qxqpqbHR0dB5LlCQ1+fjoJLCyZ38FcKS/UZKLgY8DV1bVNxqsR5I0QJNBsAdYnWQVcBjYALyrt0GSlwCfAv5hVX2lwVqe4bIP/OZC/ahT2vnHjjMCPHTsuP9NgLtvuWaxS5AWRWNBUFXTSTYDu4ERYHtV7UuyqXt+K3Aj8L3AbUkApqtqrKmaJEkzNTqyuKp2Abv6jm3t2f5p4KebrEGSNDdHFktSyxkEktRyBoEktZxBIEktZxBIUssZBJLUci5M02JPLjnvGb9KaieDoMUeW/2mxS5B0inAriFJajmDQJJaziCQpJYzCCSp5QwCSWo5g0CSWs4gkKSWMwgkqeUMAklquUaDIMm6JPuTTCS5YcD5i5L8aZLvJLmuyVokSYM1NsVEkhFgC3AFMAnsSbKzqh7oafYI8H7grU3VIUmaW5N3BGuBiao6UFWPAzuA9b0NqurrVbUH+G6DdUiS5tBkECwHDvXsT3aPSZJOIU0GQQYcq5O6ULIxyXiS8ampqedYliSpV5NBMAms7NlfARw5mQtV1baqGquqsdHR0XkpTpLU0WQQ7AFWJ1mVZAmwAdjZ4M+TJJ2Exp4aqqrpJJuB3cAIsL2q9iXZ1D2/NckyYBx4AfBkkn8KrKmqbzVVlyTpmRpdoayqdgG7+o5t7dk+SqfLSJK0SBxZLEktZxBIUssZBJLUcgaBJLWcQSBJLWcQSFLLGQSS1HIGgSS1nEEgSS1nEEhSyxkEktRyBoEktZxBIEktZxBIUssZBJLUcgaBJLWcQSBJLWcQSFLLNRoESdYl2Z9kIskNA84nyce65/cmeXWT9UiSZmosCJKMAFuAK4E1wFVJ1vQ1uxJY3X1tBH6tqXokSYM1eUewFpioqgNV9TiwA1jf12Y98JvV8SXghUle3GBNkqQ+Zzd47eXAoZ79SeC1Q7RZDjzc2yjJRjp3DACPJtk/v6W22lLg2GIXcSrIv//JxS5Bz+Rn8ym/kPm4yvfPdqLJIBhUeZ1EG6pqG7BtPorSMyUZr6qxxa5D6udnc+E02TU0Cazs2V8BHDmJNpKkBjUZBHuA1UlWJVkCbAB29rXZCVzTfXroh4BvVtXD/ReSJDWnsa6hqppOshnYDYwA26tqX5JN3fNbgV3AW4AJ4C+BdzdVj2Zll5tOVX42F0iqZnTJS5JaxJHFktRyBoEktZxBoKcluTzJ7yx2HTozJHl/kgeT/FZD1//XSa5r4tpt0+Q4Aknt9o+BK6vq4GIXorl5R3CGSXJhki8n+XiS+5P8VpIfS3JXkv+VZG339cUkf9799RUDrnNeku1J9nTb9U8PIs0qyVbgbwA7k/zcoM9SkmuTfDrJZ5McTLI5yT/vtvlSkhd12/1M9733JrkjybkDft5Lk/xekruTfCHJRQv7Oz69GQRnppcBvwJcDFwEvAt4PXAd8C+BLwM/UlWvAm4E/u2Aa/wc8IdV9RrgjcAtSc5bgNp1BqiqTXQGh74ROI/ZP0s/SOfzuRb4MPCX3c/lnwLXdNt8qqpeU1WXAA8C7xnwI7cB76uqy+h8zm9r5nd2ZrJr6Mx0sKruA0iyD/h8VVWS+4ALgQuA30iyms6UHs8bcI03AX+vpw/2HOAldP4gSs/GbJ8lgD+qquPA8STfBD7bPX4fnX/IAPxgkl8EXgg8n87YpKcleT7wt4D/kjw9a833NPEbOVMZBGem7/RsP9mz/ySd/+c30fkD+LYkFwJ/POAaAd5eVU7wp+dq4GcpyWs58WcV4HbgrVV1b5Jrgcv7rn8W8BdVden8lt0edg210wXA4e72tbO02Q28L91/YiV51QLUpTPTc/0snQ88nOR5wNX9J6vqW8DBJD/RvX6SXPIca24Vg6CdbgZ+KclddKb/GOQmOl1Ge5Pc392XTsZz/Sz9PPA/gM/R+X5rkKuB9yS5F9jHzLVPNAenmJCklvOOQJJaziCQpJYzCCSp5QwCSWo5g0CSWs4gkJ6F7rw5+5LsTXJPd1CUdFpzZLE0pCSvA34ceHVVfSfJUmDJIpclPWfeEUjDezFwrKq+A1BVx6rqSJLLktzZnflyd5IXJ7kgyf6nZnZN8okkP7Oo1UuzcECZNKTu5Gb/HTgX+APgt4EvAncC66tqKsk7gTdX1U8luQL4EJ2ZYK+tqnWLVLo0J7uGpCFV1aNJLgN+mM50yr8N/CKdqZQ/151KZwR4uNv+c935b7YAzn2jU5Z3BNJJSvIO4L3AOVX1ugHnz6Jzt7AKeEtV7V3gEqWh+B2BNKQkr+iu4fCUS+mszzDa/SKZJM9L8sru+X/WPX8VsL07e6Z0yvGOQBpSt1voV+kskDINTAAbgRXAx+hM73028Mt07gQ+A6ytquNJPgocr6pfWIzapbkYBJLUcnYNSVLLGQSS1HIGgSS1nEEgSS1nEEhSyxkEktRyBoEktdz/A4OplDtdXm1OAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#gender serviver bar plot \n",
"sns.barplot(x=\"Sex\", y=\"Survived\", data=df)\n",
"\n",
"print(\" survived females:\", df[\"Survived\"][df[\"Sex\"] == 'female'].value_counts(normalize = True)[1]*100)\n",
"\n",
"print(\" survived males :\", df[\"Survived\"][df[\"Sex\"] == 'male'].value_counts(normalize = True)[1]*100)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"first class: 62.96296296296296\n",
"second class: 47.28260869565217\n",
"third class: 24.236252545824847\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAATB0lEQVR4nO3dfZBdd33f8ffHazTEjikxVkeMJGMFRKhJjZNsRDumgZA4kUk7guZJxA2lhWjUqSH5AxSnU1weynSQM0wLmKhq4rrJpLhpTYOaKjgpEJuah2hNbGPZmFHlYK3sjVe4xg/1VJb97R97TK+vrrRX8p69kn/v18wd3XPO7579rO/Mfnx+955zUlVIktp1xqQDSJImyyKQpMZZBJLUOItAkhpnEUhS486cdIATdd5559UFF1ww6RiSdFq59dZbD1XVylHbTrsiuOCCC5iZmZl0DEk6rST51rG2OTUkSY2zCCSpcRaBJDXOIpCkxvVaBEk2Jrknyb4kV47Y/t4kt3WPO5M8leTcPjNJkp6ttyJIMgVcA1wGXAi8NcmFg2Oq6uqquriqLgZ+A7ipqh7qK5Mk6Wh9HhFsAPZV1f6qOgxcD2w6zvi3Ap/qMY8kaYQ+i2A1cGBgebZbd5QkZwEbgRuOsX1LkpkkM/Pz80seVJJa1ucJZRmx7lg3P/h7wC3Hmhaqqp3AToDp6enn7Q0Utm3bxtzcHKtWrWL79u2TjiOpEX0WwSywdmB5DXD/McZuxmkh5ubmOHjw4KRjSGpMn1NDe4D1SdYlWcHCH/tdw4OS/DXg9cBneswiSTqG3o4IqupIkiuAG4Ep4Nqq2ptka7d9Rzf0LcCfVNXjfWWRJB1brxedq6rdwO6hdTuGlq8DruszhyTp2DyzWJIaZxFIUuMsAklqnEUgSY2zCCSpcRaBJDXOIpCkxlkEktQ4i0CSGmcRSFLjLAJJapxFIEmNswgkqXEWgSQ1ziKQpMb1ej+CSfuR9/7upCOckHMOPcoUcN+hR0+r7Lde/bZJR5D0HHhEIEmNswgkqXEWgSQ1ziKQpMZZBJLUuF6LIMnGJPck2ZfkymOMeUOS25LsTXJTn3kkSUfr7eujSaaAa4BLgVlgT5JdVXXXwJgXA58ENlbVfUn+el95JEmj9XlEsAHYV1X7q+owcD2waWjMLwGfrqr7AKrqwR7zSJJG6LMIVgMHBpZnu3WDXgl8X5I/S3JrkpFnJiXZkmQmycz8/HxPcSWpTX0WQUasq6HlM4EfAX4G+GngfUleedSLqnZW1XRVTa9cuXLpk0pSw/q8xMQssHZgeQ1w/4gxh6rqceDxJDcDrwG+2WMuSdKAPo8I9gDrk6xLsgLYDOwaGvMZ4O8kOTPJWcBrgbt7zCRJGtLbEUFVHUlyBXAjMAVcW1V7k2zttu+oqruTfBa4A3ga+O2qurOvTJKko/V69dGq2g3sHlq3Y2j5auDqPnNIko7NM4slqXEWgSQ1ziKQpMZZBJLUuOf1rSpPN0+vOPtZ/0rScrAITiGPr/+pSUeQ1CCnhiSpcRaBJDXOIpCkxlkEktQ4PyyWlsi2bduYm5tj1apVbN++fdJxpLFZBNISmZub4+DBg5OOIZ0wp4YkqXEWgSQ1ziKQpMZZBJLUOItAkhpnEUhS4ywCSWqcRSBJjbMIJKlxvRZBko1J7kmyL8mVI7a/Icl3ktzWPa7qM48k6Wi9XWIiyRRwDXApMAvsSbKrqu4aGvrFqvq7feWQJB1fn0cEG4B9VbW/qg4D1wObevx5kqST0GcRrAYODCzPduuG/e0ktyf54ySvHrWjJFuSzCSZmZ+f7yOrJDWrzyLIiHU1tPw14GVV9Rrg48AfjtpRVe2squmqml65cuUSx5SktvVZBLPA2oHlNcD9gwOq6pGqeqx7vht4QZLzeswkSRrSZxHsAdYnWZdkBbAZ2DU4IMmqJOmeb+jyfLvHTJKkIb19a6iqjiS5ArgRmAKuraq9SbZ223cAPwf8kyRHgCeAzVU1PH0kSepRr3co66Z7dg+t2zHw/BPAJ/rMIEk6Ps8slqTGWQSS1DhvXq9T1n0f/JuTjnBCjjx0LnAmRx761mmT/fyrvj7pCDoFeEQgSY2zCCSpcRaBJDXOIpCkxlkEktQ4i0CSGmcRSFLjLAJJapxFIEmNswgkqXEWgSQ1ziKQpMZZBJLUuONefTTJoxx9w/nvqqoXLXkiSdKyOm4RVNU5AEk+CMwBvwcEuBw4p/d0kqTejTs19NNV9cmqerSqHqmq3wJ+ts9gkqTlMW4RPJXk8iRTSc5IcjnwVJ/BJEnLY9wi+CXgF4C/6h4/3607riQbk9yTZF+SK48z7keTPJXk58bMI0laImPdqrKq/hLYdCI7TjIFXANcCswCe5Lsqqq7Roz7CHDjiexfkrQ0xjoiSPLKJJ9Lcme3fFGSf77IyzYA+6pqf1UdBq5ndJm8C7gBePAEckuSlsi4U0P/DvgN4EmAqroD2LzIa1YDBwaWZ7t135VkNfAWYMeYOSRJS2ysqSHgrKr68ySD644s8pqMWDd8TsK/Bn69qp4a2vezd5RsAbYAnH/++YunlSbgvBc+DRzp/pVOH+MWwaEkL6f7Q959qPvAIq+ZBdYOLK8B7h8aMw1c35XAecCbkhypqj8cHFRVO4GdANPT08c8wU2apPdc9PCkI0gnZdwi+Kcs/CF+VZKDwL0snFR2PHuA9UnWAQdZmEp61jeNqmrdM8+TXAf80XAJSJL6NW4RfKuqfjLJ2cAZVfXoYi+oqiNJrmDh20BTwLVVtTfJ1m67nwtI0ilg3CK4N8lngf8EfH7cnVfVbmD30LqRBVBVbx93v5KkpTPut4Z+APgfLEwR3ZvkE0le118sSdJyGasIquqJqvqDqvr7wA8BLwJu6jWZJGlZjH0/giSvT/JJ4GvAC1m45IQk6TQ31mcESe4FbgP+AHhvVT3eaypJ0rIZ98Pi11TVI70mkSRNxGJ3KNtWVduBDyc56kSuqnp3b8kkSctisSOCu7t/Z/oOIkmajMVuVfnfuqd3VNVfLEMeSdIyG/dbQx9N8o0kH0ry6l4TSZKW1bjnEfw48AZgHtiZ5Otj3I9AknQaGPs8gqqaq6qPAVtZ+CrpVb2lkiQtm3HvUPY3kry/u0PZJ4AvsXBZaUnSaW7c8wj+PfAp4KeqavieApKk09iiRdDdXP5/VdW/WYY8kqRltujUUFU9BbwkyYplyCNJWmZj35gGuCXJLuC71xmqqo/2kkqStGzGLYL7u8cZwDn9xZEkLbexiqCqPtB3EEnSZIx7GeovAKMuOvfGJU8kSVpW404NvWfg+QuBnwWOLH0cSdJyG3dq6NahVbck8VaVkvQ8MO6ZxecOPM5LshFYNcbrNia5J8m+JFeO2L4pyR1Jbksyk+R1J/E7SJKeg3Gnhm7l/39GcAT4S+Adx3tBdyLaNcClwCywJ8muqrprYNjngF1VVUkuYuFWmK8aP74k6bk67hFBkh9Nsqqq1lXV9wMfAL7RPe463muBDcC+qtpfVYeB64FNgwOq6rGqeqZgzmbEB9KSpH4tNjX0b4HDAEl+DPhXwH8AvgPsXOS1q4EDA8uz3bpnSfKWJN8A/jvwj0ftKMmWbupoZn5+fpEfK0k6EYsVwVRVPdQ9/0VgZ1XdUFXvA16xyGszYt2or6D+16p6FfBm4EOjdlRVO6tquqqmV65cuciPlSSdiEWLIMkznyP8BPD5gW2Lfb4wC6wdWF7DwtnJI1XVzcDLk5y3yH4lSUtosSL4FHBTks8ATwBfBEjyChamh45nD7A+ybrugnWbgV2DA5K8Ikm65z8MrAC+fcK/hSTppC128/oPJ/kc8FLgTwY+2D0DeNcirz2S5ArgRmAKuLaq9ibZ2m3fwcKJaW9L8iQLRfOLAz9DkrQMFv36aFV9ZcS6b46z86raDeweWrdj4PlHgI+Msy9JUj/GPY9Akp63tm3bxtzcHKtWrWL79u2TjrPsLAJJzZubm+PgwYOTjjExY11iQpL0/GURSFLjLAJJapxFIEmNswgkqXEWgSQ1ziKQpMZZBJLUOItAkhpnEUhS47zEhKReXPLxSyYdYWwrHl7BGZzBgYcPnFa5b3nXLUuyH48IJKlxFoEkNc4ikKTGWQSS1DiLQJIaZxFIUuMsAklqnEUgSY3rtQiSbExyT5J9Sa4csf3yJHd0jy8leU2feSRJR+utCJJMAdcAlwEXAm9NcuHQsHuB11fVRcCHgJ195ZEkjdbnEcEGYF9V7a+qw8D1wKbBAVX1par6393iV4A1PeaRJI3QZxGsBg4MLM92647lHcAfj9qQZEuSmSQz8/PzSxhRkqDOKp4++2nqrJp0lIno86JzGbFu5H/lJD/OQhG8btT2qtpJN200PT3d5jslqTdPXvLkpCNMVJ9FMAusHVheA9w/PCjJRcBvA5dV1bd7zCNJGqHPqaE9wPok65KsADYDuwYHJDkf+DTwy1X1zR6zSJKOobcjgqo6kuQK4EZgCri2qvYm2dpt3wFcBbwE+GQSgCNVNd1XJknS0Xq9MU1V7QZ2D63bMfD8ncA7+8wgSTo+zyyWpMZZBJLUOItAkhpnEUhS4ywCSWqcRSBJjbMIJKlxFoEkNc4ikKTGWQSS1DiLQJIaZxFIUuMsAklqnEUgSY2zCCSpcRaBJDXOIpCkxlkEktQ4i0CSGmcRSFLjLAJJalyvRZBkY5J7kuxLcuWI7a9K8uUk/zfJe/rMIkka7cy+dpxkCrgGuBSYBfYk2VVVdw0Mewh4N/DmvnJIko6vzyOCDcC+qtpfVYeB64FNgwOq6sGq2gM82WMOSdJx9FkEq4EDA8uz3boTlmRLkpkkM/Pz80sSTpK0oM8iyIh1dTI7qqqdVTVdVdMrV658jrEkSYP6LIJZYO3A8hrg/h5/niTpJPRZBHuA9UnWJVkBbAZ29fjzJEknobdvDVXVkSRXADcCU8C1VbU3ydZu+44kq4AZ4EXA00l+Dbiwqh7pK5ck6dl6KwKAqtoN7B5at2Pg+RwLU0aSpAnxzGJJapxFIEmNswgkqXEWgSQ1ziKQpMZZBJLUOItAkhpnEUhS4ywCSWqcRSBJjbMIJKlxFoEkNc4ikKTGWQSS1DiLQJIaZxFIUuMsAklqnEUgSY2zCCSpcRaBJDXOIpCkxvVaBEk2Jrknyb4kV47YniQf67bfkeSH+8wjSTpab0WQZAq4BrgMuBB4a5ILh4ZdBqzvHluA3+orjyRptD6PCDYA+6pqf1UdBq4HNg2N2QT8bi34CvDiJC/tMZMkaciZPe57NXBgYHkWeO0YY1YDDwwOSrKFhSMGgMeS3LO0UU8p5wGHJh3iROQ3/+GkI5xKTq/3719k0glOJafXewfk3Sf0/r3sWBv6LIJRCeskxlBVO4GdSxHqVJdkpqqmJ51DJ8f37/TV8nvX59TQLLB2YHkNcP9JjJEk9ajPItgDrE+yLskKYDOwa2jMLuBt3beH/hbwnap6YHhHkqT+9DY1VFVHklwB3AhMAddW1d4kW7vtO4DdwJuAfcD/Af5RX3lOI01MgT2P+f6dvpp971J11JS8JKkhnlksSY2zCCSpcRbBKSLJtUkeTHLnpLPoxCRZm+QLSe5OsjfJr046k8aX5IVJ/jzJ7d3794FJZ1pufkZwikjyY8BjLJxp/YOTzqPxdWfDv7SqvpbkHOBW4M1VddeEo2kMSQKcXVWPJXkB8D+BX+2udtAEjwhOEVV1M/DQpHPoxFXVA1X1te75o8DdLJwhr9NAd4mbx7rFF3SPpv4P2SKQllCSC4AfAr462SQ6EUmmktwGPAj8aVU19f5ZBNISSfK9wA3Ar1XVI5POo/FV1VNVdTELVzfYkKSp6VmLQFoC3dzyDcDvV9WnJ51HJ6eqHgb+DNg44SjLyiKQnqPuw8bfAe6uqo9OOo9OTJKVSV7cPf8e4CeBb0w21fKyCE4RST4FfBn4gSSzSd4x6Uwa2yXALwNvTHJb93jTpENpbC8FvpDkDhaukfanVfVHE860rPz6qCQ1ziMCSWqcRSBJjbMIJKlxFoEkNc4ikKTGWQTSkCRPdV8BvTPJf05y1nHGvj/Je5Yzn7TULALpaE9U1cXdVWAPA1snHUjqk0UgHd8XgVcAJHlbkju669b/3vDAJL+SZE+3/YZnjiSS/Hx3dHF7kpu7da/uroF/W7fP9cv6W0kDPKFMGpLksar63iRnsnD9oM8CNwOfBi6pqkNJzq2qh5K8H3isqn4zyUuq6tvdPv4l8FdV9fEkXwc2VtXBJC+uqoeTfBz4SlX9fpIVwFRVPTGRX1jN84hAOtr3dJckngHuY+E6Qm8E/ktVHQKoqlH3jvjBJF/s/vBfDry6W38LcF2SXwGmunVfBv5Zkl8HXmYJaJLOnHQA6RT0RHdJ4u/qLiy32OHzdSzcmez2JG8H3gBQVVuTvBb4GeC2JBdX1X9M8tVu3Y1J3llVn1/i30Mai0cE0ng+B/xCkpcAJDl3xJhzgAe6S1Jf/szKJC+vqq9W1VXAIWBtku8H9lfVx4BdwEW9/wbSMXhEII2hqvYm+TBwU5KngL8A3j407H0s3JnsW8DXWSgGgKu7D4PDQqHcDlwJ/IMkTwJzwAd7/yWkY/DDYklqnFNDktQ4i0CSGmcRSFLjLAJJapxFIEmNswgkqXEWgSQ17v8BFZZJgwoRyUUAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#pclass suriver bar plot \n",
"sns.barplot(x=\"Pclass\", y=\"Survived\", data=df)\n",
"\n",
"print(\"first class:\", df[\"Survived\"][df[\"Pclass\"] == 1].value_counts(normalize = True)[1]*100)\n",
"\n",
"print(\"second class:\", df[\"Survived\"][df[\"Pclass\"] == 2].value_counts(normalize = True)[1]*100)\n",
"\n",
"print(\"third class:\", df[\"Survived\"][df[\"Pclass\"] == 3].value_counts(normalize = True)[1]*100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAASjUlEQVR4nO3df7DddX3n8eeLxIxCodQmaywhG9qmtrgr/kjRlh1FrRbsD3a37i6o1XXaybAj1k5bI/0xaus6O41dt7WiNEWqtNasFd1N3ay421qluiiJRX6KkwUkN3AXshQKlFkaeO8f54scb+6Pc5P7PYfD5/mYuXPP98f93tfNJHnd7+d8v59vqgpJUruOmXQASdJkWQSS1DiLQJIaZxFIUuMsAklq3OpJB1iutWvX1qZNmyYdQ5Kmyt69ew9W1br5tk1dEWzatIk9e/ZMOoYkTZUk31xom0NDktQ4i0CSGmcRSFLjLAJJapxFIEmNswgkqXG9FUGSS5PcleT6BbYnyfuS7EtybZLn95VFkrSwPs8IPgyctcj2s4HN3cdW4IM9ZpEkLaC3G8qq6gtJNi2yyznAZTV4IMJVSU5M8syqurOvTFKftm3bxuzsLOvXr2f79u2TjiONbJJ3Fp8E7B9anunWHVYESbYyOGtg48aNYwknLdfs7CwHDhyYdAxp2Sb5ZnHmWTfv49KqakdVbamqLevWzTtVhiTpCE2yCGaAk4eWNwB3TCiLJDVrkkWwC3h9d/XQi4D7fH9Aksavt/cIknwMOBNYm2QGeAfwFICquhjYDbwK2Af8PfDGvrJIkhbW51VD5y2xvYA39fX9JUmj8c5iSWqcRSBJjbMIJKlxFoEkNc4ikKTGWQSS1DiLQJIaZxFIUuMsAklqnEUgSY2zCCSpcRaBJDXOIpCkxlkEktQ4i0CSGmcRSFLjLAJJapxFIEmNswgkqXEWgSQ1ziKQpMZZBJLUOItAkhpnEUhS4ywCSWqcRSBJjbMIJKlxFoEkNc4ikKTGWQSS1DiLQJIaZxFIUuN6LYIkZyW5Ocm+JBfOs/07k/x5kq8luSHJG/vMI0k6XG9FkGQVcBFwNnAqcF6SU+fs9ibgxqo6DTgT+I9J1vSVSZJ0uD7PCE4H9lXVLVX1MLATOGfOPgUcnyTAdwD3AId6zCRJmqPPIjgJ2D+0PNOtG/Z+4IeAO4DrgLdU1aNzD5Rka5I9SfbcfffdfeWVpCb1WQSZZ13NWf5x4Brge4DnAu9PcsJhX1S1o6q2VNWWdevWrXxSSWpYn0UwA5w8tLyBwW/+w94IfLIG9gG3Aj/YYyZJ0hx9FsHVwOYkp3RvAJ8L7Jqzz+3AywGSPAN4FnBLj5kkSXOs7uvAVXUoyQXAFcAq4NKquiHJ+d32i4F3AR9Och2DoaS3VdXBvjJJkg7XWxEAVNVuYPecdRcPvb4DeGWfGSRJi/POYklqnEUgSY2zCCSpcRaBJDXOIpCkxlkEktQ4i0CSGmcRSFLjLAJJapxFIEmNswgkqXEWgSQ1ziKQpMZZBJLUOItAkhpnEUhS4ywCSWqcRSBJjbMIJKlxFoEkNc4ikKTGWQSS1DiLQJIaZxFIUuMsAklq3OpJB9DK2bZtG7Ozs6xfv57t27dPOo6kKWERPInMzs5y4MCBSceQNGUcGpKkxlkEktQ4i0CSGmcRSFLjLAJJalyvRZDkrCQ3J9mX5MIF9jkzyTVJbkjy+T7zSJIOt+jlo0nuB2qh7VV1wiJfuwq4CHgFMANcnWRXVd04tM+JwAeAs6rq9iT/aJn5JUlHadEiqKrjAZL8FjAL/DEQ4LXA8Usc+3RgX1Xd0h1jJ3AOcOPQPq8BPllVt3ff764j+Bn0JOENcdJkjHpD2Y9X1QuHlj+Y5MvAYv9aTwL2Dy3PAC+cs88PAE9J8lcMiuX3quqyETPpScYb4nSk/CXi6IxaBI8keS2wk8FQ0XnAI0t8TeZZN3eYaTXwAuDlwNOA/5Xkqqr6xrcdKNkKbAXYuHHjiJGlpX3+xS9ZsWM9tHoVJDw0M7Mix33JF3zLbFT+EnF0Rn2z+DXAvwb+T/fxr7p1i5kBTh5a3gDcMc8+n6mqB6vqIPAF4LS5B6qqHVW1paq2rFu3bsTIkqRRjHRGUFW3MRjfX46rgc1JTgEOAOdyeHn8V+D9SVYDaxgMHf2nZX4fSdJRGOmMIMkPJPmLJNd3y89J8huLfU1VHQIuAK4AbgI+XlU3JDk/yfndPjcBnwGuBb4CXFJV1x/5jyNJWq5R3yP4Q+CtwB8AVNW1Sf4U+PeLfVFV7QZ2z1l38Zzl9wDvGTWwJGlljfoewbFV9ZU56w6tdBhJ0viNWgQHk3wf3VU/SV4N3NlbKknS2Iw6NPQmYAfwg0kOALcyuKlMkjTlRi2Cb1bVjyU5Djimqu7vM5QkaXxGHRq6NckO4EXAAz3mkSSN2ahnBM8CforBENGHknwa2FlVf91bsobc/lv/dEWOc+iepwOrOXTPN1fkmBvfft3Rh5L0hDfSGUFVPVRVH6+qfwk8DzgB8P53SXoSGPl5BElekuQDwFeBpzKYckKSNOVGGhpKcitwDfBx4K1V9WCvqSRJYzPqewSnVdXf9ZpEkjQRSz2hbFtVbQfeneSwJ5VV1S/0lkySNBZLnRHc1H3e03cQSdJkLPWoyj/vXl5bVX8zhjySpDEb9aqh9yb5epJ3JXl2r4kkSWM16n0ELwXOBO4GdiS5bqnnEUiSpsOoVw1RVbPA+5J8DtgGvJ0lnkegNpzx+2esyHHW3LuGYziG/ffuX5FjfvHNX1yBVNKT36hPKPuhJO/snlD2fuBLDJ5BLEmacqOeEfwR8DHglVU19wH0kqQptmQRJFkF/O+q+r0x5JEkjdmSQ0NV9Qjw3UnWjCGPJGnMRn4wDfDFJLuAb80zVFXv7SWVJGlsRi2CO7qPY4Dj+4sjSRq3kYqgqn6z7yCSpMkYdRrqzwHzTTr3shVPJEkaq1GHhn5l6PVTgZ8BDq18HEnSuI06NLR3zqovJvFRlU8wa5/6KHCo+yxJoxl1aOjpQ4vHAFuA9b0k0hH7lefcO+kIkqbQqENDe3n8PYJDwG3Az/URSJI0Xks9oeyHgf1VdUq3/AYG7w/cBtzYezpJUu+WurP4D4CHAZK8GPgPwEeA+4Ad/UaTJI3DUkNDq6rqnu71vwF2VNXlwOVJruk3miRpHJY6I1iV5LGyeDnwl0PbRn6WgSTpiWup/8w/Bnw+yUHgIeBKgCTfz2B4SJI05RY9I6iqdwO/DHwY+GdV9diVQ8cAb17q4EnOSnJzkn1JLlxkvx9O8kiSV48eXZK0EpYc3qmqq+ZZ942lvq57jsFFwCuAGeDqJLuq6sZ59vtt4IpRQ0uSVs5Ij6o8QqcD+6rqlqp6GNgJnDPPfm8GLgfu6jGLJGkBfRbBScD+oeWZbt23JDkJ+BfAxYsdKMnWJHuS7Ln77rtXPKgktazPIsg86+bOYPq7wNu6p6AtqKp2VNWWqtqybt26FQsoSer3EtAZ4OSh5Q0MHm4zbAuwMwnAWuBVSQ5V1X/pMZckaUifRXA1sDnJKcAB4FzgNcM7PDZ1BUCSDwOfnmQJbNu2jdnZWdavX8/27dsnFUOSxqq3IqiqQ0kuYHA10Crg0qq6Icn53fZF3xeYhNnZWQ4cODDpGM2qY4tHeZQ69rBnIEnqUa93B1fVbmD3nHXzFkBV/ds+s+iJ7x/O+IdJR5Ca1OebxZKkKWARSFLjLAJJapxFIEmNswgkqXEWgSQ17knxcJkXvPWyFTnO8QfvZxVw+8H7V+SYe9/z+qMPJUk984xAkhpnEUhS4ywCSWqcRSBJjbMIJKlxFoEkNc4ikKTGPSnuI1gpj6457ts+S1ILLIIhD25+5aQjSNLYOTQkSY2zCCSpcRaBJDXOIpCkxlkEktQ4i0CSGmcRSFLjLAJJapxFIEmNswgkqXEWgSQ1ziKQpMZZBJLUOItAkhpnEUhS4ywCSWpcr0WQ5KwkNyfZl+TCeba/Nsm13ceXkpzWZx5J0uF6K4Ikq4CLgLOBU4Hzkpw6Z7dbgZdU1XOAdwE7+sojSZpfn2cEpwP7quqWqnoY2AmcM7xDVX2pqv62W7wK2NBjHknSPPp8ZvFJwP6h5RnghYvs/3PAf59vQ5KtwFaAjRs3rlQ+SRP27te9ekWOc89d9w0+z965Isf89T/5xFEfY5r0eUaQedbVvDsmL2VQBG+bb3tV7aiqLVW1Zd26dSsYUVo5J1bx9CpOrHn/mktPWH2eEcwAJw8tbwDumLtTkucAlwBnV9X/7TGP1KvXPfLopCNIR6TPM4Krgc1JTkmyBjgX2DW8Q5KNwCeBn62qb/SYRZK0gN7OCKrqUJILgCuAVcClVXVDkvO77RcDbwe+G/hAEoBDVbWlr0ySpMP1OTREVe0Gds9Zd/HQ658Hfr7PDJKkxXlnsSQ1ziKQpMZZBJLUOItAkhpnEUhS4ywCSWqcRSBJjbMIJKlxFoEkNc4ikKTGWQSS1DiLQJIaZxFIUuMsAklqnEUgSY2zCCSpcRaBJDXOIpCkxlkEktQ4i0CSGmcRSFLjLAJJapxFIEmNswgkqXEWgSQ1ziKQpMZZBJLUOItAkhpnEUhS4ywCSWqcRSBJjbMIJKlxFoEkNa7XIkhyVpKbk+xLcuE825Pkfd32a5M8v888kqTD9VYESVYBFwFnA6cC5yU5dc5uZwObu4+twAf7yiNJml+fZwSnA/uq6paqehjYCZwzZ59zgMtq4CrgxCTP7DGTJGmO1T0e+yRg/9DyDPDCEfY5CbhzeKckWxmcMQA8kOTmlY36bdYCB1fiQPmdN6zEYZZrxfLzjqzIYZZp5f78f2G685Ox51+57JOxFjj431bgQL/x0Sn/uzO/f7zQhj6LYL4/yTqCfaiqHcCOlQi1lCR7qmrLOL5XH8w/WdOcf5qzg/mPRp9DQzPAyUPLG4A7jmAfSVKP+iyCq4HNSU5JsgY4F9g1Z59dwOu7q4deBNxXVXfOPZAkqT+9DQ1V1aEkFwBXAKuAS6vqhiTnd9svBnYDrwL2AX8PvLGvPMswliGoHpl/sqY5/zRnB/MfsVQdNiQvSWqIdxZLUuMsAklqnEUwZKkpMZ7Iklya5K4k1086y3IlOTnJ55LclOSGJG+ZdKblSPLUJF9J8rUu/29OOtORSLIqyd8k+fSksyxXktuSXJfkmiR7Jp1nuZKcmOQTSb7e/Tv4kbF+f98jGOimxPgG8AoGl7VeDZxXVTdONNiIkrwYeIDBndr/ZNJ5lqO7m/yZVfXVJMcDe4F/PkV/9gGOq6oHkjwF+GvgLd3d8lMjyS8BW4ATquonJ51nOZLcBmypqqm8IS7JR4Arq+qS7irLY6vq3nF9f88IHjfKlBhPWFX1BeCeSec4ElV1Z1V9tXt9P3ATgzvMp0I3RcoD3eJTuo+p+g0ryQbgJ4BLJp2lNUlOAF4MfAigqh4eZwmARTBsoekuNEZJNgHPA7482STL0w2rXAPcBfyPqpqq/MDvAtuARycd5AgV8Nkke7spaabJ9wJ3A3/UDc1dkuS4cQawCB430nQX6k+S7wAuB36xqv5u0nmWo6oeqarnMrg7/vQkUzM8l+Qngbuqau+ksxyFM6rq+QxmNH5TN1Q6LVYDzwc+WFXPAx4ExvoepUXwOKe7mKBubP1y4KNV9clJ5zlS3Sn9XwFnTTjKcpwB/HQ3zr4TeFmSP5lspOWpqju6z3cBn2Iw1DstZoCZobPITzAohrGxCB43ypQY6kH3ZuuHgJuq6r2TzrNcSdYlObF7/TTgx4CvTzbV6KrqV6tqQ1VtYvD3/i+r6nUTjjWyJMd1FxnQDam8Epiaq+eqahbYn+RZ3aqXA2O9UKLP2UenykJTYkw41siSfAw4E1ibZAZ4R1V9aLKpRnYG8LPAdd04O8CvVdXuCWZajmcCH+muPDsG+HhVTd0lmFPsGcCnBr9PsBr406r6zGQjLdubgY92v4Tewpin2/HyUUlqnENDktQ4i0CSGmcRSFLjLAJJapxFIEmNswikBSR5pJvN8vokf5bk2KM83qZpnB1WT34WgbSwh6rqud1srg8D54/yRUm8P0dTxSKQRnMl8P1JfirJl7vJwf5nkmcAJHlnkh1JPgtcluQZST7VPaPga0l+tDvOqiR/2D234LPdncjSRFkE0hK63/DPBq5j8KyBF3WTg+1kMGPnY14AnFNVrwHeB3y+qk5jMG/MY3epbwYuqqpnA/cCPzOen0JamKew0sKeNjTlxZUM5kN6FvCfu4fprAFuHdp/V1U91L1+GfB6GMxMCtyX5LuAW6vqsWPuBTb1+yNIS7MIpIU91E0t/S1Jfh94b1XtSnIm8M6hzQ+OcMz/N/T6EcChIU2cQ0PS8nwncKB7/YZF9vsL4N/Btx5ac0LfwaQjZRFIy/NO4M+SXAks9nzctwAvTXIdgyGgZ48hm3REnH1UkhrnGYEkNc4ikKTGWQSS1DiLQJIaZxFIUuMsAklqnEUgSY37/+oB/Kqr3fnLAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#Parch vs. survival bar plot \n",
"sns.barplot(x=\"Parch\", y=\"Survived\", data=df)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#categories for each people ages \n",
"df[\"Age\"] = df[\"Age\"].fillna(-0.5)\n",
"bluk = [-1, 1, 6, 13, 18, 21, 35, 50, np.inf]\n",
"labels = ['Unknown', 'Baby', 'Child', 'Teenager', 'Student', 'Young Adult', 'Adult', 'Senior']\n",
"df['peopleCategory'] = pd.cut(df[\"Age\"], bluk, labels = labels)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#people Category bar plot\n",
"sns.barplot(x=\"peopleCategory\", y=\"Survived\", data=df)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# cleaning the data "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#droping not used data \n",
"df = df.drop(['Cabin'], axis = 1)\n",
"df = df.drop(['Ticket'], axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"queenstown = 77 southampton = 644 cherbourg = 168\n"
]
}
],
"source": [
"#filling the unknown values in emabarkes \n",
"southampton = df[df[\"Embarked\"] == \"S\"].shape[0]\n",
"df = df.fillna({\"Embarked\": \"S\"})\n",
"cherbourg = df[df[\"Embarked\"] == \"C\"].shape[0]\n",
"\n",
"queenstown = df[df[\"Embarked\"] == \"Q\"].shape[0]\n",
"print(\"queenstown = \", queenstown , \"southampton = \", southampton,\"cherbourg = \", cherbourg )"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" <th>Embarked</th>\n",
" <th>peopleCategory</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7.2500</td>\n",
" <td>1</td>\n",
" <td>Young Adult</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>71.2833</td>\n",
" <td>2</td>\n",
" <td>Adult</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" <td>1</td>\n",
" <td>Young Adult</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>53.1000</td>\n",
" <td>1</td>\n",
" <td>Young Adult</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" <td>1</td>\n",
" <td>Young Adult</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Fare Embarked peopleCategory \n",
"0 0 7.2500 1 Young Adult \n",
"1 0 71.2833 2 Adult \n",
"2 0 7.9250 1 Young Adult \n",
"3 0 53.1000 1 Young Adult \n",
"4 0 8.0500 1 Young Adult "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#maping Embarked values to a numerical values\n",
"Emapping = {\"S\": 1, \"C\": 2, \"Q\": 3}\n",
"df['Embarked'] = df['Embarked'].map(Emapping)\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" <th>Embarked</th>\n",
" <th>peopleCategory</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7.2500</td>\n",
" <td>1</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>71.2833</td>\n",
" <td>2</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" <td>1</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>53.1000</td>\n",
" <td>1</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" <td>1</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Fare Embarked peopleCategory \n",
"0 0 7.2500 1 5.0 \n",
"1 0 71.2833 2 6.0 \n",
"2 0 7.9250 1 5.0 \n",
"3 0 53.1000 1 5.0 \n",
"4 0 8.0500 1 5.0 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#maping Age values to numerical values\n",
"Amapping = {'Baby': 1, 'Child': 2, 'Teenager': 3, 'Student': 4, 'Young Adult': 5, 'Adult': 6, 'Senior': 7}\n",
"df['peopleCategory'] = df['peopleCategory'].map(Amapping)\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#dropping the unused feature\n",
"df = df.drop(['Age'], axis = 1)\n",
"df = df.drop(['Name'], axis = 1)\n",
"df = df.drop(['peopleCategory'], axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7.2500</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>71.2833</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>53.1000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass Sex SibSp Parch Fare Embarked\n",
"0 1 0 3 0 1 0 7.2500 1\n",
"1 2 1 1 1 1 0 71.2833 2\n",
"2 3 1 3 1 0 0 7.9250 1\n",
"3 4 1 1 1 1 0 53.1000 1\n",
"4 5 0 3 0 0 0 8.0500 1"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#maping Sex values to numerical values\n",
"Smapping = {\"male\": 0, \"female\": 1}\n",
"df['Sex'] = df['Sex'].map(Smapping)\n",
"\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index([], dtype='object')"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#checking for nan values\n",
"df.columns[np.isnan(df).any()]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# correlations"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Sex</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>PassengerId</th>\n",
" <td>1.000000</td>\n",
" <td>-0.005007</td>\n",
" <td>-0.035144</td>\n",
" <td>-0.042939</td>\n",
" <td>-0.057527</td>\n",
" <td>-0.001652</td>\n",
" <td>0.012658</td>\n",
" <td>-0.030467</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Survived</th>\n",
" <td>-0.005007</td>\n",
" <td>1.000000</td>\n",
" <td>-0.338481</td>\n",
" <td>0.543351</td>\n",
" <td>-0.035322</td>\n",
" <td>0.081629</td>\n",
" <td>0.257307</td>\n",
" <td>0.106811</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Pclass</th>\n",
" <td>-0.035144</td>\n",
" <td>-0.338481</td>\n",
" <td>1.000000</td>\n",
" <td>-0.131900</td>\n",
" <td>0.083081</td>\n",
" <td>0.018443</td>\n",
" <td>-0.549500</td>\n",
" <td>0.045702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Sex</th>\n",
" <td>-0.042939</td>\n",
" <td>0.543351</td>\n",
" <td>-0.131900</td>\n",
" <td>1.000000</td>\n",
" <td>0.114631</td>\n",
" <td>0.245489</td>\n",
" <td>0.182333</td>\n",
" <td>0.116569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SibSp</th>\n",
" <td>-0.057527</td>\n",
" <td>-0.035322</td>\n",
" <td>0.083081</td>\n",
" <td>0.114631</td>\n",
" <td>1.000000</td>\n",
" <td>0.414838</td>\n",
" <td>0.159651</td>\n",
" <td>-0.059961</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Parch</th>\n",
" <td>-0.001652</td>\n",
" <td>0.081629</td>\n",
" <td>0.018443</td>\n",
" <td>0.245489</td>\n",
" <td>0.414838</td>\n",
" <td>1.000000</td>\n",
" <td>0.216225</td>\n",
" <td>-0.078665</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Fare</th>\n",
" <td>0.012658</td>\n",
" <td>0.257307</td>\n",
" <td>-0.549500</td>\n",
" <td>0.182333</td>\n",
" <td>0.159651</td>\n",
" <td>0.216225</td>\n",
" <td>1.000000</td>\n",
" <td>0.062142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Embarked</th>\n",
" <td>-0.030467</td>\n",
" <td>0.106811</td>\n",
" <td>0.045702</td>\n",
" <td>0.116569</td>\n",
" <td>-0.059961</td>\n",
" <td>-0.078665</td>\n",
" <td>0.062142</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass Sex SibSp Parch \\\n",
"PassengerId 1.000000 -0.005007 -0.035144 -0.042939 -0.057527 -0.001652 \n",
"Survived -0.005007 1.000000 -0.338481 0.543351 -0.035322 0.081629 \n",
"Pclass -0.035144 -0.338481 1.000000 -0.131900 0.083081 0.018443 \n",
"Sex -0.042939 0.543351 -0.131900 1.000000 0.114631 0.245489 \n",
"SibSp -0.057527 -0.035322 0.083081 0.114631 1.000000 0.414838 \n",
"Parch -0.001652 0.081629 0.018443 0.245489 0.414838 1.000000 \n",
"Fare 0.012658 0.257307 -0.549500 0.182333 0.159651 0.216225 \n",
"Embarked -0.030467 0.106811 0.045702 0.116569 -0.059961 -0.078665 \n",
"\n",
" Fare Embarked \n",
"PassengerId 0.012658 -0.030467 \n",
"Survived 0.257307 0.106811 \n",
"Pclass -0.549500 0.045702 \n",
"Sex 0.182333 0.116569 \n",
"SibSp 0.159651 -0.059961 \n",
"Parch 0.216225 -0.078665 \n",
"Fare 1.000000 0.062142 \n",
"Embarked 0.062142 1.000000 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"DfCorr = df.corr() \n",
"DfCorr"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1. , 0.92, 0.94, ..., -0.08, -0.08, -0.1 ],\n",
" [ 0.92, 1. , 0.9 , ..., -0.1 , -0.1 , -0.12],\n",
" [ 0.94, 0.9 , 1. , ..., 0.16, 0.17, 0.14],\n",
" ...,\n",
" [-0.08, -0.1 , 0.16, ..., 1. , 1. , 1. ],\n",
" [-0.08, -0.1 , 0.17, ..., 1. , 1. , 1. ],\n",
" [-0.1 , -0.12, 0.14, ..., 1. , 1. , 1. ]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"corr_matrix = np.corrcoef(df).round(decimals=2)\n",
"corr_matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# data spliting"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"\n",
"pred = df.drop(['Survived', 'PassengerId'], axis=1)\n",
"target = df[\"Survived\"]\n",
"x_train, x_test, y_train, y_test = train_test_split(pred, target, test_size = 0.22, random_state = 0)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Apply Support Vector Machines Model"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"72.59\n"
]
}
],
"source": [
"# Support Vector Machines\n",
"\n",
"svc = SVC()\n",
"svc.fit(x_train, y_train)\n",
"y_pred = svc.predict(x_test)\n",
"acc_svc = round(accuracy_score(y_pred, y_test) * 100, 2)\n",
"print(acc_svc)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# confusion matrix for Support Vector Machines Model"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Confusion matrix : \n",
" [[ 26 47]\n",
" [ 7 117]]\n",
"Outcome values : \n",
" 26 47 7 117\n",
"Classification report : \n",
" precision recall f1-score support\n",
"\n",
" 1 0.79 0.36 0.49 73\n",
" 0 0.71 0.94 0.81 124\n",
"\n",
" accuracy 0.73 197\n",
" macro avg 0.75 0.65 0.65 197\n",
"weighted avg 0.74 0.73 0.69 197\n",
"\n"
]
}
],
"source": [
"# confusion matrix\n",
"matrix = confusion_matrix(y_test,y_pred, labels=[1,0])\n",
"print('Confusion matrix : \\n',matrix)\n",
"\n",
"tp, fn, fp, tn = confusion_matrix(y_test,y_pred,labels=[1,0]).reshape(-1)\n",
"print('Outcome values : \\n', tp, fn, fp, tn)\n",
"\n",
"# classification report \n",
"matrix = classification_report(y_test,y_pred,labels=[1,0])\n",
"print('Classification report : \\n',matrix)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Apply Logistic Regression Model"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"80.71\n"
]
}
],
"source": [
"# Logistic Regression\n",
"logreg = LogisticRegression()\n",
"logreg.fit(x_train, y_train)\n",
"y_pred = logreg.predict(x_test)\n",
"acc_logreg = round(accuracy_score(y_pred, y_test) * 100, 2)\n",
"print(acc_logreg)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# confusion matrix for Logistic Regression Model"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Confusion matrix : \n",
" [[ 52 21]\n",
" [ 17 107]]\n",
"Outcome values : \n",
" 52 21 17 107\n",
"Classification report : \n",
" precision recall f1-score support\n",
"\n",
" 1 0.75 0.71 0.73 73\n",
" 0 0.84 0.86 0.85 124\n",
"\n",
" accuracy 0.81 197\n",
" macro avg 0.79 0.79 0.79 197\n",
"weighted avg 0.81 0.81 0.81 197\n",
"\n"
]
}
],
"source": [
"# confusion matrix\n",
"matrix = confusion_matrix(y_test,y_pred, labels=[1,0])\n",
"print('Confusion matrix : \\n',matrix)\n",
"\n",
"tp, fn, fp, tn = confusion_matrix(y_test,y_pred,labels=[1,0]).reshape(-1)\n",
"print('Outcome values : \\n', tp, fn, fp, tn)\n",
"\n",
"# classification report \n",
"matrix = classification_report(y_test,y_pred,labels=[1,0])\n",
"print('Classification report : \\n',matrix)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}