Skip to content
Permalink
0020ffc1d1
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
553 lines (553 sloc) 93.4 KB
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# ALE Prediction with MLP Regression models\n"
],
"metadata": {
"id": "-f9nAiuTzs6P"
}
},
{
"cell_type": "markdown",
"source": [
"## Load Dataset"
],
"metadata": {
"id": "K3Fx3gFJ0DjR"
}
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "Hnx1f1SbtCWF",
"outputId": "b00cdea1-4dff-45f8-807c-5821d44e9938"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" anchor_ratio trans_range node_density iterations ale sd_ale\n",
"0 30 15 200 40 0.773546 0.250555\n",
"1 15 15 100 70 0.911941 0.498329\n",
"2 30 15 100 50 0.814867 0.255546\n",
"3 15 20 100 20 1.435332 0.394603\n",
"4 30 15 100 40 1.265909 0.302943"
],
"text/html": [
"\n",
" <div id=\"df-ad4387bd-4aa1-47c1-9ff0-5a2cbab2241f\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>anchor_ratio</th>\n",
" <th>trans_range</th>\n",
" <th>node_density</th>\n",
" <th>iterations</th>\n",
" <th>ale</th>\n",
" <th>sd_ale</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>15</td>\n",
" <td>200</td>\n",
" <td>40</td>\n",
" <td>0.773546</td>\n",
" <td>0.250555</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>100</td>\n",
" <td>70</td>\n",
" <td>0.911941</td>\n",
" <td>0.498329</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>30</td>\n",
" <td>15</td>\n",
" <td>100</td>\n",
" <td>50</td>\n",
" <td>0.814867</td>\n",
" <td>0.255546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>15</td>\n",
" <td>20</td>\n",
" <td>100</td>\n",
" <td>20</td>\n",
" <td>1.435332</td>\n",
" <td>0.394603</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>30</td>\n",
" <td>15</td>\n",
" <td>100</td>\n",
" <td>40</td>\n",
" <td>1.265909</td>\n",
" <td>0.302943</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ad4387bd-4aa1-47c1-9ff0-5a2cbab2241f')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-ad4387bd-4aa1-47c1-9ff0-5a2cbab2241f button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-ad4387bd-4aa1-47c1-9ff0-5a2cbab2241f');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 134
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn import metrics\n",
"from sklearn.model_selection import train_test_split\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"%matplotlib inline\n",
"\n",
"# load dataset\n",
"filename = \"/content/mcs_ds_edited_iter_shuffled.csv\"\n",
"df=pd.read_csv(filename)\n",
"\n",
"df.head(5)"
]
},
{
"cell_type": "markdown",
"source": [
"### Analyse features and shape of dataframe"
],
"metadata": {
"id": "FMn6W9ib0YEd"
}
},
{
"cell_type": "code",
"source": [
"df.info"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mT6-MdhHVV7u",
"outputId": "02294e1f-ff95-40a7-9af5-249d5fd70965"
},
"execution_count": 135,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<bound method DataFrame.info of anchor_ratio trans_range node_density iterations ale sd_ale\n",
"0 30 15 200 40 0.773546 0.250555\n",
"1 15 15 100 70 0.911941 0.498329\n",
"2 30 15 100 50 0.814867 0.255546\n",
"3 15 20 100 20 1.435332 0.394603\n",
"4 30 15 100 40 1.265909 0.302943\n",
".. ... ... ... ... ... ...\n",
"102 18 23 100 20 1.287472 0.375227\n",
"103 14 17 200 60 0.899102 0.231822\n",
"104 15 20 100 50 1.171140 0.372001\n",
"105 20 20 100 40 1.234493 0.599834\n",
"106 15 15 200 40 0.635426 0.139791\n",
"\n",
"[107 rows x 6 columns]>"
]
},
"metadata": {},
"execution_count": 135
}
]
},
{
"cell_type": "markdown",
"source": [
"### Check dataframe for null values"
],
"metadata": {
"id": "yieJ3Dzw0QfJ"
}
},
{
"cell_type": "code",
"source": [
"df.isnull().any()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1gKtnpBGyTRH",
"outputId": "f773732e-1d3d-4dec-d0d3-966ff300bf56"
},
"execution_count": 136,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"anchor_ratio False\n",
"trans_range False\n",
"node_density False\n",
"iterations False\n",
"ale False\n",
"sd_ale False\n",
"dtype: bool"
]
},
"metadata": {},
"execution_count": 136
}
]
},
{
"cell_type": "markdown",
"source": [
"## Split dataset into training data and testing data (80% training, 20% testing)"
],
"metadata": {
"id": "MN3wne520hsp"
}
},
{
"cell_type": "code",
"source": [
"# Spliting data into Features and predictand\n",
"X=df[['anchor_ratio', 'trans_range', 'node_density', 'iterations']]\n",
"y=df['ale']\n",
"\n",
"# Split dataset into training set and test set, 80% training and 20% test\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)"
],
"metadata": {
"id": "C-A2F-_OunCN"
},
"execution_count": 137,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Define function for Method 1 MLP Regression model"
],
"metadata": {
"id": "42DvP0FD01AZ"
}
},
{
"cell_type": "code",
"source": [
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.neural_network import MLPRegressor\n",
"\n",
"# Function contains method 1 regression model pipline, evaulation metrics and scatter plot\n",
"def mlppipe(X_train,X_test,y_train,y_test):\n",
" mlp = make_pipeline(\n",
" StandardScaler(),\n",
" MLPRegressor(hidden_layer_sizes=(12, 6), random_state=15, \n",
" activation='tanh', solver='lbfgs',max_iter=204),\n",
" )\n",
" mlp.fit(X_train,y_train)\n",
" exp_y = y_test\n",
" pred_y = mlp.predict(X_test)\n",
" print(\"r2 score= \", metrics.r2_score(exp_y, pred_y))\n",
" print(\"mean sqrd log err= \", metrics.mean_squared_log_error(exp_y, pred_y))\n",
" print(\"rmse =\", metrics.mean_squared_error(exp_y, pred_y, squared=False))\n",
" plt.figure(figsize=(10,10))\n",
" sns.regplot(exp_y, pred_y, fit_reg=True, scatter_kws={\"s\": 125})\n"
],
"metadata": {
"id": "wvUAlwT13NDV"
},
"execution_count": 138,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Define function for Method 2 MLP Regression model"
],
"metadata": {
"id": "0dxJsSn-1R-X"
}
},
{
"cell_type": "code",
"source": [
"# Function contains method 2 regression model pipline, evaulation metrics and scatter plot\n",
"def mamlppipe(X_train,X_test,y_train,y_test):\n",
" mamlp = make_pipeline(\n",
" MaxAbsScaler(),\n",
" MLPRegressor(hidden_layer_sizes=(12, 6), random_state=15, \n",
" activation='tanh', solver='lbfgs',max_iter=204),\n",
" )\n",
" mamlp.fit(X_train,y_train)\n",
" exp_y = y_test\n",
" pred_y = mamlp.predict(X_test)\n",
" print(\"r2 score= \", metrics.r2_score(exp_y, pred_y))\n",
" print(\"mean sqrd log err= \", metrics.mean_squared_log_error(exp_y, pred_y))\n",
" print(\"rmse =\", metrics.mean_squared_error(exp_y, pred_y, squared=False))\n",
" plt.figure(figsize=(10,10))\n",
" sns.regplot(exp_y, pred_y, fit_reg=True, scatter_kws={\"s\": 125})"
],
"metadata": {
"id": "mrRtSOblJFjU"
},
"execution_count": 139,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### Method 1 results and Scatter Plot"
],
"metadata": {
"id": "LxpNz02WLb3_"
}
},
{
"cell_type": "code",
"source": [
"# Display Method 1 results\n",
"print('Results for Standard Scaled data: \\n')\n",
"mlppipe(X_train,X_test,y_train,y_test)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 852
},
"id": "pzERcBgR5e5R",
"outputId": "c956092d-6cc6-443e-eb3d-2206a8c6c1f4"
},
"execution_count": 140,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Results for Standard Scaled data: \n",
"\n",
"r2 score= 0.5618865477413895\n",
"mean sqrd log err= 0.009957664776347337\n",
"rmse = 0.20084428776208\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:541: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
" self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n",
"/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 720x720 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"source": [
"### Method 2 results and scatterplot"
],
"metadata": {
"id": "mrRWbjKPLT_v"
}
},
{
"cell_type": "code",
"source": [
"# Display Method 2 results\n",
"print('Results for Max Absolute Scaled data: \\n')\n",
"mamlppipe(X_train,X_test,y_train,y_test)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 852
},
"id": "NINM_w7SLKJw",
"outputId": "c349a31e-fafa-48c1-fd1e-2fee3ea60aec"
},
"execution_count": 141,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Results for Max Absolute Scaled data: \n",
"\n",
"r2 score= 0.8063502154220884\n",
"mean sqrd log err= 0.004067404844870628\n",
"rmse = 0.13352872413004094\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.8/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:541: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
" self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n",
"/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 720x720 with 1 Axes>"
],
"image/png": "\n"
},
"metadata": {
"needs_background": "light"
}
}
]
}
]
}