Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import pandas as pd
from Feature_engineering import check_win, get_last_season_pos
# Define years and gameweeks to iterate over
years = ["2020-21", "2021-22", "2022-23"]
previous_years = ["2019-20", "2020-21", "2021-22"]
gws = [f"gw{i}" for i in range(1, 39)]
# Pre-computing gameweek URLs for efficiency
gw_urls = [
f"https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/{year}/gws/{gw}.csv"
for year in years
for gw in gws
]
def calculate_ratio_team_value(df, name): #Calculates the ratio of a player's value to the total team value
player_row = df[df["name"] == name]
team_value = df[df["team"] == player_row["team"].iloc[0]]["value"].sum()
player_value = player_row["value"].iloc[0]
return player_value * 100 / team_value
def calculate_position_rank(df, name): #Calculates the rank of a player within their team based on value and position
player_row = df[df["name"] == name]
same_position = df[(df["position"] == player_row["position"].iloc[0]) & (df["team"] == player_row["team"].iloc[0])]
return (same_position["value"] > player_row["value"].iloc[0]).sum()
list_dfs = []
for i, year in enumerate(years):
print(year)
# Load player previous stats and clean data
player_prev_stats_url = f"https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/{previous_years[i]}/cleaned_players.csv"
player_prev_stats = pd.read_csv(player_prev_stats_url)
player_prev_stats["name"] = player_prev_stats["first_name"] + " " + player_prev_stats["second_name"]
player_prev_stats.drop(["first_name", "second_name"], axis=1, inplace=True)
player_prev_stats.columns = player_prev_stats.columns + "_ex"
# Load and prepare teams data
teams_url = f"https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/{year}/teams.csv"
teams = pd.read_csv(teams_url, encoding="latin-1")[["id", "name"]]
teams.columns = ["opponent_team", "opponent"]
teams["opponent_last_season_position"] = teams["opponent"].apply(get_last_season_pos, year=year)
for gameweek in gws:
print(gameweek)
df = pd.read_csv(gw_urls[gws.index(gameweek) + (i * 38)], encoding="latin-1")
# Enhance DataFrame with additional calculations and merges
df["last_season_position"] = df["team"].apply(get_last_season_pos, year=year)
df["percent_value"] = df["name"].apply(calculate_ratio_team_value, df=df)
df["position_rank"] = df["name"].apply(calculate_position_rank, df=df)
df["match_result"] = check_win(df)
df = pd.merge(df, player_prev_stats, left_on="name", right_on="name_ex", how="left").drop("name_ex", axis=1)
df["season"] = year
df["GW"] = int(gameweek[2:])
df = pd.merge(df, teams, on="opponent_team", how="left")
list_dfs.append(df)
# Combine all gathered data and save to CSV
all_data = pd.concat(list_dfs)
all_data.to_csv("Default_data/previous_seasons.csv", index=False)