Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
from GW_PointsPredictor.Utility_functions import *
import numpy as np
import pandas as pd
from scipy import stats
# Consolidated mapping for club positions across different seasons
season_positions = {
"2019-20": {"Liverpool": 1, "Man City": 2,"Man Utd":3, "Chelsea":4,"Leicester":5,"Spurs":6,"Wolves":7,"Arsenal":8, "Sheffield Utd":9,"Burnley":10,"Southampton":11,"Everton":12,"Newcastle":13,"Crystal Palace":14,"Brighton":15,"West Ham":16,"Aston Villa":17, "AFC Bournemouth" : 18, "Watford" : 19, "Norwich City": 20 },
"2020-21": {"Man City": 1, "Man Utd": 2,"Liverpool":3, "Chelsea":4,"Leicester":5,"West Ham":6,"Spurs":7,"Arsenal":8, "Leeds Utd":9,"Everton":10,"Aston Villa":11,"Newcastle":12,"Wolves":13,"Crystal Palace":14,"Southampton":15,"Brighton":16,"Bunrley":17, "Fulham" : 18, "West Brom" : 19, "Sheffield Utd": 20 },
"2021-22": {"Man City": 1, "Liverpool": 2,"Chelea":3, "Spurs":4,"Arsenal":5,"Man Utd":6,"West Ham":7,"Leicester":8, "Brighton":9,"Wolves":10,"Newcastle":11,"Crystal Palace":12,"Brentford":13,"Aston Villa":14,"Southampton":15,"Everton":16,"Leeds Utd":17, "Burnley" : 18, "Watford" : 19, "Norwich City": 20 },
"2022-23": {"Man City": 1, "Arsenal": 2,"Man Utd":3, "Newcastle":4,"Liverpool":5,"Brighton":6,"Aston Villa":7,"Spurs":8, "Brentford":9,"Fulham":10,"Crystal Palace":11,"Chelsea":12,"Wolves":13,"West Ham":14,"AFC Bournemouth":15,"Nottingham Forest":16,"Everton":17, "Leeds Utd" : 18, "Leicester" : 19, "Southampton": 20 }
}
# Simplify split_test to directly split data without iterating
def split_test(data, gameweek):
"""
Splits data into the specified gameweek and other gameweeks.
"""
data_gw = data[data["GW"] == gameweek]
data_other_gw = data[data["GW"] != gameweek]
return data_gw, data_other_gw
# Vectorized approach to check_win, drastically improving efficiency
def check_win(df):
"""
Returns a list indicating win (3 points), draw (1 point), or loss (0 points) for each row in DataFrame.
"""
result = df["team_a_score"] - df["team_h_score"]
home_win = (result < 0) & df["was_home"]
away_win = (result > 0) & ~df["was_home"]
draw = result == 0
points = np.where(draw, 1, np.where(home_win | away_win, 3, 0))
return points.tolist()
#season position retrieval to use the season_positions dictionary
def get_season_pos(club, year):
"""
Retrieves the season position of a club for a given year.
"""
return season_positions.get(year, {}).get(club, 20)
# Using pandas .mode() method directly
def find_mode(vals):
# Assuming vals is a pandas Series
mode_val = vals.mode()
return mode_val.iloc[0] if not mode_val.empty else np.nan
def find_mean(vals):
"""
Compute the mean of the provided values, ignoring -1.
"""
vals_filtered = [val for val in vals if val != -1]
return np.mean(vals_filtered) if vals_filtered else np.nan
def find_max(vals):
return vals.max()
# Calculating standard deviation with pandas .std()
def find_std(vals):
return vals.std()
# Counting occurrences of a specific value
def find_value_count(vals, to_count):
counts = vals.value_counts()
return counts.get(to_count, 0)
def get_opp_team(data):
"""
Returns the list of opponent teams for each row in the DataFrame.
"""
return np.where(data["team"] == data["home_team"], data["away_team"], data["home_team"])
def create_features(data, mean_features, std_features, no_last_stats):
"""
Create features based on mean and standard deviation for the given features over the last N statistics.
"""
for feature in mean_features:
data[f"mean_{feature}_{no_last_stats}"] = data[f"last_{no_last_stats}_{feature}"].apply(find_mean)
for feature in std_features:
data[f"std_{feature}_{no_last_stats}"] = data[f"last_{no_last_stats}_{feature}"].apply(np.std)
return data
def calculate_rolling_stats(data, stat, n=3):
"""
Calculate rolling statistics (e.g., mean, sum) for the last N games for each player.
Parameters:
data (pd.DataFrame): DataFrame containing player stats.
stat (str): The statistic column name to calculate rolling stats for.
n (int): Number of games to include in rolling calculation.
Returns:
pd.DataFrame: DataFrame with new columns for rolling statistics.
"""
data = data.sort_values(by=['name', 'date'])
# Group by player and calculate rolling stats, shifting to not include the current game
data[f'{stat}_rolling_mean_last_{n}'] = data.groupby('name')[stat].transform(lambda x: x.rolling(window=n, min_periods=1).mean().shift())
data[f'{stat}_rolling_sum_last_{n}'] = data.groupby('name')[stat].transform(lambda x: x.rolling(window=n, min_periods=1).sum().shift())
return data