Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"forward_statistics =['value', 'was_home', 'last_season_position', 'percent_value',\n",
" 'position rank', 'goals_scored_ex', 'assists_ex', 'total_points_ex',\n",
" 'minutes_ex', 'goals_conceded_ex', 'creativity_ex', 'influence_ex',\n",
" 'threat_ex', 'bonus_ex', 'bps_ex', 'ict_index_ex', 'now_cost_ex', 'GW', 'opponent_last_season_position',\n",
" 'mean assists 3','mean bonus 3', 'mean bps 3','mean creativity 3', 'mean goals_scored 3',\n",
" 'mean ict_index 3', 'mean influence 3', 'mean minutes 3', 'mean penalties_missed 3', 'mean threat 3',\n",
" 'mean total_points 3','mean value 3', 'mean match_result 3', 'std bps 3', 'std creativity 3',\n",
" 'std ict_index 3', 'std influence 3', 'std minutes 3',\n",
" 'std threat 3', 'std total_points 3', 'std value 3']\n",
"\n",
"leak_columns = [\n",
" \"name\",\n",
" \"team\",\n",
"] \n",
"\n",
"\n",
"dropped_columns = [\n",
" \"season\",\n",
" \"opponent\",\n",
" \"match_result\",\n",
" \n",
" \"assists\",\n",
" \"penalties_missed\",\n",
" \"bonus\",\n",
" \"bps\",\n",
" \"clean_sheets\",\n",
" \"creativity\",\n",
" \"goals_conceded\",\n",
" \"goals_scored\",\n",
" \"ict_index\",\n",
" \"influence\",\n",
" \"own_goals\",\n",
" \"penalties_saved\",\n",
" \"red_cards\",\n",
" \"saves\",\n",
" \"selected\",\n",
" \"threat\",\n",
" \"transfers_balance\",\n",
" \"transfers_in\",\n",
" \"transfers_out\",\n",
" \"yellow_cards\",\n",
" \"team Goal scored\",\n",
" \"team Goal conceded\"\n",
"] \n",
"\n",
"midfielder_statistics =['value', 'was_home', 'last_season_position', 'percent_value',\n",
" 'position rank', 'goals_scored_ex', 'assists_ex', 'total_points_ex',\n",
" 'minutes_ex', 'goals_conceded_ex', 'creativity_ex', 'influence_ex',\n",
" 'threat_ex', 'bonus_ex', 'bps_ex', 'ict_index_ex', 'now_cost_ex', 'GW', 'opponent_last_season_position',\n",
" 'mean assists 3','mean bonus 3', 'mean bps 3','mean creativity 3', 'mean goals_scored 3',\n",
" 'mean ict_index 3', 'mean influence 3', 'mean minutes 3', 'mean penalties_missed 3', 'mean threat 3',\n",
" 'mean total_points 3','mean value 3', 'mean match_result 3', 'std bps 3', 'std creativity 3',\n",
" 'std ict_index 3', 'std influence 3', 'std minutes 3',\n",
" 'std threat 3', 'std total_points 3', 'std value 3']\n",
"\n",
"goalkeeper_statistics = ['value', 'was_home', 'last_season_position', 'percent_value',\n",
" 'position rank', 'total_points_ex', 'minutes_ex', 'goals_conceded_ex',\n",
" 'bonus_ex', 'bps_ex', 'ict_index_ex', 'clean_sheets_ex',\n",
" 'red_cards_ex', 'now_cost_ex', 'GW', 'opponent_last_season_position',\n",
" 'mean bonus 3', 'mean bps 3', 'mean clean_sheets 3', 'mean goals_conceded 3',\n",
" 'mean ict_index 3', 'mean minutes 3',\n",
" 'mean own_goals 3', 'mean penalties_saved 3',\n",
" 'mean saves 3', 'mean threat 3',\n",
" 'mean total_points 3',\n",
" 'mean value 3', 'mean match_result 3', 'std bps 3',\n",
" 'std ict_index 3', 'std influence 3', 'std minutes 3',\n",
" 'std threat 3', 'std total_points 3', 'std value 3']\n",
"\n",
"statistics =['value', 'position','was_home', 'last_season_position', 'percent_value',\n",
" 'position rank', 'goals_scored_ex', 'assists_ex', 'total_points_ex',\n",
" 'minutes_ex', 'goals_conceded_ex', 'creativity_ex', 'influence_ex',\n",
" 'threat_ex', 'bonus_ex', 'bps_ex', 'ict_index_ex', 'clean_sheets_ex',\n",
" 'yellow_cards_ex','now_cost_ex', 'GW', 'opponent_last_season_position', 'mean assists 3',\n",
" 'mean bonus 3', 'mean bps 3', 'mean clean_sheets 3',\n",
" 'mean creativity 3', 'mean goals_conceded 3', 'mean goals_scored 3',\n",
" 'mean ict_index 3', 'mean influence 3', 'mean minutes 3',\n",
" 'mean own_goals 3',\n",
" 'mean red_cards 3', 'mean threat 3','mean total_points 3',\n",
" 'mean value 3', 'mean match_result 3', 'std bps 3', 'std creativity 3',\n",
" 'std ict_index 3', 'std influence 3', 'std minutes 3',\n",
" 'std threat 3', 'std total_points 3', 'std value 3','mean saves 3','mean assists all',\n",
" 'mean bonus all', 'mean bps all', 'mean clean_sheets all',\n",
" 'mean creativity all', 'mean goals_conceded all', 'mean goals_scored all',\n",
" 'mean ict_index all', 'mean influence all', 'mean minutes all',\n",
" 'mean own_goals all',\n",
" 'mean red_cards all', 'mean threat all','mean total_points all',\n",
" 'mean value all', 'mean match_result all',\n",
" 'mean team Goal scored 3','mean team Goal scored all','mean team Goal conceded 3','mean team Goal conceded all',\"ratio_goal_scored all\",\"ratio_goal_scored 3\",\n",
" 'opp mean team Goal scored 3','opp mean team Goal conceded 3','opp mean team Goal scored all','opp mean team Goal conceded all',\"opp mean match_result all\"]\n",
"\n",
"\n",
"date_cols=[\"day_of week\",\"month\",\"hour\",\"week\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def convert_minutes(val):\n",
" if val > 10:\n",
" return 1\n",
" else:\n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pip install catboost"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from hyperopt import tpe,hp,fmin,STATUS_OK,Trials\n",
"from hyperopt.pyll.base import scope"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import warnings\n",
"import os\n",
"\n",
"\n",
"warnings.filterwarnings(\"ignore\")\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.ensemble import (\n",
" RandomForestClassifier,\n",
" RandomForestRegressor,\n",
" GradientBoostingRegressor,\n",
")\n",
"from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
"from sklearn.metrics import (\n",
" mean_squared_error,\n",
" mean_absolute_error,\n",
" confusion_matrix,\n",
" accuracy_score,\n",
" f1_score,\n",
")\n",
"from lightgbm import LGBMRegressor, LGBMClassifier\n",
"from catboost import CatBoostClassifier, CatBoostRegressor\n",
"from sklearn.model_selection import StratifiedKFold,KFold"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gameweek=26"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"train = pd.read_csv(\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Cleaned_Data\\\\cleaned_previous_seasons.csv\", index_col=0)\n",
"\n",
"\n",
"old_gameweek_paths = [f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Cleaned_Data\\\\2023-24\\\\GW{i}.csv\" for i in range(1, gameweek)]\n",
"old_gameweek_cleaned = [pd.read_csv(path) for path in old_gameweek_paths]\n",
"old_gameweeks = pd.concat(old_gameweek_cleaned, ignore_index=True)\n",
"\n",
"# Ensure old gameweeks data has the same columns as the initial training data\n",
"old_gameweeks = old_gameweeks[train.columns]\n",
"\n",
"# Combine the initial training data with the old gameweeks data\n",
"train = pd.concat([train, old_gameweeks], ignore_index=True)\n",
"\n",
"# Load data for the current gameweek to be used for prediction\n",
"test = pd.read_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Cleaned_Data\\\\2023-24\\\\GW{gameweek}.csv\", index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os \n",
"directory_path = f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\"\n",
"\n",
"if not os.path.exists(directory_path):\n",
" os.makedirs(directory_path, exist_ok=True)\n",
" print(f\"Directory {directory_path} created!\")\n",
"else:\n",
" print(f\"Directory {directory_path} already exists.\") \n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train[\"position\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train[\"position\"]=train[\"position\"].replace({\"GKP\":\"GK\" })"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# Function to preprocess and feature engineer the dataset\n",
"def preprocess_and_feature_engineer(df):\n",
" df[\"index\"] = df[\"name\"] + df[\"kickoff_time\"].astype(\"str\")\n",
" df.drop_duplicates(\"index\", keep=\"last\", inplace=True)\n",
" df = df.set_index(\"index\")\n",
" df[\"date\"] = pd.to_datetime(df[\"kickoff_time\"], utc=True, format='ISO8601')\n",
" df[\"day_of_week\"] = df[\"date\"].dt.day_name()\n",
" df[\"month\"] = df[\"date\"].dt.month\n",
" df[\"hour\"] = df[\"date\"].dt.hour\n",
" df[\"week\"] = df[\"date\"].dt.isocalendar().week\n",
" df.drop([\"kickoff_time\", \"date\"], axis=1, inplace=True)\n",
" return df\n",
"\n",
"# Function to convert categorical columns to numerical\n",
"def convert_categorical_to_numerical(df, categorical_columns):\n",
" for col in df.columns:\n",
" if df[col].dtype == \"object\" and col in categorical_columns:\n",
" df[col], _ = pd.factorize(df[col])\n",
" return df\n",
"\n",
"# Function to handle missing columns\n",
"def add_missing_columns(df, expected_columns):\n",
" missing_columns = [col for col in expected_columns if col not in df.columns]\n",
" print(\"Missing columns:\", missing_columns)\n",
" for col in missing_columns:\n",
" df[col] = 0\n",
" return df\n",
"\n",
"# Applying the preprocessing steps\n",
"train = preprocess_and_feature_engineer(train)\n",
"test = preprocess_and_feature_engineer(test)\n",
"\n",
"# Ensure test data has the same columns as train data\n",
"test = test[train.columns]\n",
"\n",
"# Copy datasets before further modification (if necessary)\n",
"train_copy = train.copy()\n",
"test_copy = test.copy()\n",
"\n",
"# Apply conversions\n",
"train[\"minutes\"] = train[\"minutes\"].apply(convert_minutes)\n",
"train = convert_categorical_to_numerical(train, [\"team\", \"name\", \"position\"])\n",
"test = convert_categorical_to_numerical(test, [\"team\", \"name\", \"position\"])\n",
"\n",
"# Update target and drop unused columns\n",
"target = train[[\"minutes\", \"GW\", \"position\"]]\n",
"train.drop([\"total_points\", \"minutes\"], axis=1, inplace=True)\n",
"test.drop([\"total_points\", \"minutes\"], axis=1, inplace=True)\n",
"\n",
"# Handle dropped columns\n",
"dropped_columns = ['team Goal scored', 'team Goal conceded']\n",
"train.drop(dropped_columns, axis=1, inplace=True, errors='ignore')\n",
"test.drop(dropped_columns, axis=1, inplace=True, errors='ignore')\n",
"\n",
"# Convert 'position' to categorical\n",
"train[\"position\"] = train[\"position\"].astype(\"category\")\n",
"test[\"position\"] = test[\"position\"].astype(\"category\")\n",
"\n",
"# Replace values in 'was_home'\n",
"train[\"was_home\"] = train[\"was_home\"].replace({True: 0, False: 1})\n",
"test[\"was_home\"] = test[\"was_home\"].replace({True: 0, False: 1})\n",
"\n",
"# Ensure all expected columns are present\n",
"train = add_missing_columns(train, expected_columns)\n",
"test = add_missing_columns(test, expected_columns)\n",
"\n",
"# Select relevant columns\n",
"train = train[statistics + leak_columns + date_cols]\n",
"test = test[statistics + leak_columns + date_cols]\n",
"\n",
"# Splitting the dataset for training\n",
"x, val, y, y_val = train_test_split(\n",
" train.drop(leak_columns, axis=1),\n",
" target[\"minutes\"], # Change to \"total_points\" as needed\n",
" test_size=0.1,\n",
" random_state=0,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"params={'colsample_bylevel': 0.8070621518153563, 'learning_rate': 0.04765984972709895, 'max_depth': 7, 'reg_lambda': 5, 'scale_pos_weight': 2.5,'subsample': 0.6794390204583894}\n",
"model=CatBoostClassifier(**params,cat_features=[\"position\"],random_state=0,early_stopping_rounds=500,use_best_model=True,verbose=500,n_estimators=10000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.fit(x, y,eval_set=[(val,y_val)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(confusion_matrix(model.predict(val), y_val))\n",
"print(\n",
" f\"starting Accuracy score {accuracy_score(model.predict(val), y_val)}\"\n",
")\n",
"\n",
"print(\n",
" f\" starting f1 score: {f1_score(model.predict(val), y_val)}\"\n",
")\n",
"\n",
"feature_importance = pd.DataFrame(\n",
" {\"column\": x.columns, \"imp\": model.feature_importances_}\n",
").sort_values(\n",
" \"imp\", ascending=False\n",
") "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_importance.head(50)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"feature_importance.tail(20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_val"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for col in ['value', 'percent_value']:\n",
" if col in data_for_prediction.columns:\n",
" data_for_prediction[col] = pd.to_numeric(data_for_prediction[col], errors='coerce')\n",
" data_for_prediction[col].fillna(data_for_prediction[col].mean(), inplace=True)\n",
"\n",
"\n",
"for col in ['month', 'hour', 'week']:\n",
" if col in data_for_prediction.columns:\n",
" \n",
" data_for_prediction[col] = data_for_prediction[col].astype(float)\n",
" \n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predicted_minutes=model.predict(val)\n",
"val_=pd.DataFrame({\"ind\":val.index,\"actul_minutes\":y_val,\"predicted_minutes\":predicted_minutes,\"position\":val[\"position\"]})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"STARTING GOALKEEPERS PERFORMANCE!!!\")\n",
"a=val_[val_[\"position\"]==\"GK\"][\"actul_minutes\"]\n",
"b=val_[val_[\"position\"]==\"GK\"][\"predicted_minutes\"]\n",
"print(confusion_matrix(a,b))\n",
"print(\n",
" f\"starting Accuracy score {accuracy_score(a,b)}\"\n",
")\n",
"\n",
"print(\n",
" f\" starting f1 score: {f1_score(a,b)}\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"STARTING DEFENDERS PERFORMANCE!!!\")\n",
"a=val_[val_[\"position\"]==\"DEF\"][\"actul_minutes\"]\n",
"b=val_[val_[\"position\"]==\"DEF\"][\"predicted_minutes\"]\n",
"print(confusion_matrix(a,b))\n",
"print(\n",
" f\"starting Accuracy score {accuracy_score(a,b)}\"\n",
")\n",
"\n",
"print(\n",
" f\" starting f1 score: {f1_score(a,b)}\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"STARTING MIDFIELDERS PERFORMANCE!!!\")\n",
"a=val_[val_[\"position\"]==\"MID\"][\"actul_minutes\"]\n",
"b=val_[val_[\"position\"]==\"MID\"][\"predicted_minutes\"]\n",
"print(confusion_matrix(a,b))\n",
"print(\n",
" f\"starting Accuracy score {accuracy_score(a,b)}\"\n",
")\n",
"\n",
"print(\n",
" f\" starting f1 score: {f1_score(a,b)}\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"STARTING FORWARDS PERFORMANCE!!!\")\n",
"a=val_[val_[\"position\"]==\"FWD\"][\"actul_minutes\"]\n",
"b=val_[val_[\"position\"]==\"FWD\"][\"predicted_minutes\"]\n",
"print(confusion_matrix(a,b))\n",
"print(\n",
" f\"starting Accuracy score {accuracy_score(a,b)}\"\n",
")\n",
"\n",
"print(\n",
" f\" starting f1 score: {f1_score(a,b)}\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_copy[test_copy[\"minutes\"]==1][\"position\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"train = train_copy[train_copy[\"minutes\"] > 0]\n",
"test = test_copy[test_copy[\"minutes\"] > 0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Ensure test data has the same columns as train data\n",
"test = test[train.columns]\n",
"\n",
"# Re-apply preprocessing steps with refactored functions\n",
"train = preprocess_and_feature_engineer(train)\n",
"test = preprocess_and_feature_engineer(test)\n",
"\n",
"# Apply conversions\n",
"train[\"minutes\"] = train[\"minutes\"].apply(convert_minutes) # Only if needed, depends on context\n",
"train = convert_categorical_to_numerical(train, [\"team\", \"name\", \"position\"])\n",
"test = convert_categorical_to_numerical(test, [\"team\", \"name\", \"position\"])\n",
"\n",
"# Update target for total_points and adjust dataset accordingly\n",
"target = train[[\"total_points\", \"GW\", \"position\"]] # Adjusted to use \"total_points\" as target\n",
"train.drop([\"total_points\", \"minutes\"], axis=1, inplace=True) # Remove target and unused columns\n",
"test.drop([\"total_points\", \"minutes\"], axis=1, inplace=True) # Ensure consistency with train\n",
"\n",
"# Dropping columns not needed in the model and handling categorical data\n",
"dropped_columns = ['team Goal scored', 'team Goal conceded']\n",
"train.drop(dropped_columns, axis=1, inplace=True, errors='ignore')\n",
"test.drop(dropped_columns, axis=1, inplace=True, errors='ignore')\n",
"\n",
"# Handle 'position' as categorical and 'was_home' replacement\n",
"train[\"position\"] = train[\"position\"].astype(\"category\")\n",
"test[\"position\"] = test[\"position\"].astype(\"category\")\n",
"train[\"was_home\"] = train[\"was_home\"].replace({True: 0, False: 1})\n",
"test[\"was_home\"] = test[\"was_home\"].replace({True: 0, False: 1})\n",
"\n",
"expected_columns = statistics + leak_columns + date_cols\n",
"\n",
"# Ensure all expected columns are present and select relevant columns for modeling\n",
"train = add_missing_columns(train, expected_columns)\n",
"test = add_missing_columns(test, expected_columns)\n",
"train = train[statistics + leak_columns + date_cols]\n",
"test = test[statistics + leak_columns + date_cols]\n",
"\n",
"# Splitting the dataset for training with \"total_points\" as the target\n",
"x, val, y, y_val = train_test_split(\n",
" train.drop(leak_columns, axis=1),\n",
" target[\"total_points\"], # Updated target\n",
" test_size=0.1,\n",
" random_state=0,\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"#cross_validator to splite the data into folds\n",
"folds=KFold(n_splits=8,shuffle=True,random_state=0)\n",
"\n",
"#a dataframe to store the predictions made by each fold\n",
"predictions_df=pd.DataFrame()\n",
"\n",
"#list to save the mean absolute errors from validatingon each folds\n",
"rmse_val=[]\n",
"rmse_X=[]\n",
"\n",
"#a simple catboost regressor\n",
"model=LGBMRegressor(**{'colsample_bytree': 0.4199299182268318, 'learning_rate': 0.0032874466037521254, 'max_depth': 9, 'min_split_gain': 0.5685369160138952, 'num_leaves': 99, 'reg_alpha': 0.5621526419488447, 'reg_lambda': 0, 'subsample': 0.6534153111773866}, verbose=-50,random_state=0,early_stopping_rounds=200,n_estimators=10000)\n",
"\n",
"#train, make predictions and check the validation accuracy on each fold\n",
"for i,(train_index,test_index) in enumerate(folds.split(train.drop(leak_columns, axis=1),target[\"total_points\"])):\n",
" train_fold=train.drop(leak_columns, axis=1).iloc[train_index]\n",
" val_fold=train.drop(leak_columns, axis=1).iloc[test_index]\n",
" y_fold=target[\"total_points\"].iloc[train_index]\n",
" y_val_fold=target[\"total_points\"].iloc[test_index]\n",
"\n",
"\n",
" model.fit(train_fold,y_fold,eval_set=[(val_fold,y_val_fold)])\n",
" print(i+1)\n",
" prediction=model.predict(test.drop(leak_columns, axis=1))\n",
" predictions_df[i]=prediction\n",
" rmse_val.append(mean_squared_error(model.predict(val_fold),y_val_fold,squared=False))\n",
" rmse_X.append(mean_squared_error(model.predict(train_fold),y_fold,squared=False))\n",
"print(rmse_val)\n",
"print(rmse_X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(np.mean(rmse_val))\n",
"print(np.mean(rmse_X))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"predictions_df[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test[\"points\"]=np.mean(predictions_df, axis=1).values\n",
"\n",
"test[leak_columns + [\"points\", \"value\"]].sort_values(\n",
" \"points\", ascending=False\n",
").to_csv(\"points.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test[test[\"position\"]==\"MID\"].sort_values(by=\"points\",ascending=False).head(5)[[\"name\",\"points\",\"team\"]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test[test[\"position\"]==\"DEF\"].sort_values(by=\"points\",ascending=False).head(5)[[\"name\",\"points\",\"team\"]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test[test[\"position\"]==\"GKP\"].sort_values(by=\"points\",ascending=False).head(5)[[\"name\",\"points\",\"team\"]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test[test[\"position\"]==\"FWD\"].sort_values(by=\"points\",ascending=False).head(5)[[\"name\",\"points\",\"team\"]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test[\"points\"].sort_values(ascending=False).head(50)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"feature_importance = pd.DataFrame(\n",
" {\"column\": x.columns, \"imp\": model.feature_importances_}\n",
").sort_values(\n",
" \"imp\", ascending=False\n",
") "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_copy[test_copy[\"position\"]==\"DEF\"][[\"name\",\"team\",\"minutes\"]].to_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\\\\GW{gameweek}\\\\defenders_minutes.csv\")\n",
"test_copy[test_copy[\"position\"]==\"GKP\"][[\"name\",\"team\",\"minutes\"]].to_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\\\\GW{gameweek}\\\\goalkeepers_minutes.csv\")\n",
"test_copy[test_copy[\"position\"]==\"MID\"][[\"name\",\"team\",\"minutes\"]].to_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\\\\GW{gameweek}\\\\midfielders_minutes.csv\")\n",
"test_copy[test_copy[\"position\"]==\"FWD\"][[\"name\",\"team\",\"minutes\"]].to_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\\\\GW{gameweek}\\\\forwards_minutes.csv\")\n",
"test[test[\"position\"]==\"DEF\"][[\"name\",\"team\",\"points\",\"value\"]].to_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\\\\GW{gameweek}\\\\defenders_points.csv\")\n",
"test[test[\"position\"]==\"GKP\"][[\"name\",\"team\",\"points\",\"value\"]].to_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\\\\GW{gameweek}\\\\goalkeepers_points.csv\")\n",
"test[test[\"position\"]==\"MID\"][[\"name\",\"team\",\"points\",\"value\"]].to_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\\\\GW{gameweek}\\\\midfielders_points.csv\")\n",
"test[test[\"position\"]==\"FWD\"][[\"name\",\"team\",\"points\",\"value\"]].to_csv(f\"C:\\\\Users\\\\prane\\\\Downloads\\\\FPL\\\\GW_PointsPredictor\\\\Predicted_outcomes\\\\GW{gameweek}\\\\forwards_points.csv\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "4.4.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}