Skip to content
Permalink
Browse files
encapsulated the preprocessing into usefull functions
  • Loading branch information
Boyan-Yordanov committed Mar 4, 2023
1 parent 6b18801 commit 181075c056b42c0fa3c3866f4d61247fba85229b
Showing 1 changed file with 90 additions and 0 deletions.
@@ -0,0 +1,90 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


def extract_games(df):
all_games = []
for games in df:
all_games.append(games)
return all_games

def extract_moves(games):
all_white_moves = []
all_black_moves = []
for game in games:
game = game.split(",")
for move in game:
move = move.split(" ")
all_black_moves.append(move[1::2])
all_white_moves.append(move[0::2])
return all_white_moves, all_black_moves

def number_of_takes(player_games):
all_takes = []
for game in player_games:
takes = 0
for moves in game:
takes = takes + moves.count("x")
all_takes.append(takes)
return all_takes

def mate_games(chess_data):
chess_data = chess_data[chess_data.rated != False]
chess_data = chess_data[chess_data.victory_status == "mate"]
chess_data = chess_data[chess_data.turns > 4]
chess_data.drop_duplicates(subset=['id'], inplace=True)
chess_data[['whites_opening','blacks_opening']] = chess_data.opening_name.str.split(": ", 1, expand=True)
chess_data[['time_limit','increment']] = chess_data.increment_code.str.split("+", 1, expand=True).astype('int')
chess_data.drop(['id', 'rated', 'white_id', 'black_id','opening_name', 'increment_code', 'victory_status'],axis=1,inplace=True)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for column_name in ['winner','whites_opening','blacks_opening', 'opening_eco']:
chess_data[column_name] = le.fit_transform(chess_data[column_name])
games_df = extract_games(chess_data.moves)
white_moves, black_moves = extract_moves(games_df)
white_took = number_of_takes(white_moves)
wtdf = pd.DataFrame(data = white_took, columns=["white_took"])
black_took = number_of_takes(black_moves)
btdf = pd.DataFrame(data = black_took, columns=["black_took"])
chess_data = pd.concat([chess_data,wtdf],axis=1)
chess_data = pd.concat([chess_data,btdf],axis=1)
chess_data.drop(['moves', 'last_move_at', 'created_at'], axis=1, inplace=True)
chess_data = chess_data[chess_data.turns < 200]
chess_data.reset_index(inplace = True)
chess_data.drop(['index'],axis=1,inplace=True)
x = chess_data.iloc[:, chess_data.columns != 'winner']
y = chess_data.iloc[:, 2]
return x, y

def draw_and_mate_games(chess_data):
chess_data = chess_data[chess_data.rated != False]
chess_data = chess_data[chess_data.victory_status != "outoftime"]
chess_data = chess_data[chess_data.victory_status != "resign"]
chess_data = chess_data[chess_data.turns > 4]
chess_data.drop_duplicates(subset=['id'], inplace=True)
chess_data[['whites_opening','blacks_opening']] = chess_data.opening_name.str.split(": ", 1, expand=True)
chess_data[['time_limit','increment']] = chess_data.increment_code.str.split("+", 1, expand=True).astype('int')
chess_data.drop(['id', 'rated', 'white_id', 'black_id','opening_name', 'increment_code'],axis=1,inplace=True)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for column_name in ['winner','whites_opening','blacks_opening', 'opening_eco', 'victory_status']:
chess_data[column_name] = le.fit_transform(chess_data[column_name])
games_df = extract_games(chess_data.moves)
white_moves, black_moves = extract_moves(games_df)
white_took = number_of_takes(white_moves)
wtdf = pd.DataFrame(data = white_took, columns=["white_took"])
black_took = number_of_takes(black_moves)
btdf = pd.DataFrame(data = black_took, columns=["black_took"])
chess_data = pd.concat([chess_data,wtdf],axis=1)
chess_data = pd.concat([chess_data,btdf],axis=1)
chess_data.drop(['moves', 'last_move_at', 'created_at'], axis=1, inplace=True)
chess_data = chess_data[chess_data.turns < 200]
chess_data.reset_index(inplace = True)
chess_data.drop(['index'],axis=1,inplace=True)
x = chess_data.iloc[:, chess_data.columns != 'winner']
y = chess_data.iloc[:, 2]
return x, y

0 comments on commit 181075c

Please sign in to comment.