From d287c982f670b97f0f2688874bbe6733957b94b6 Mon Sep 17 00:00:00 2001 From: Tereso del Rio Date: Sat, 23 Sep 2023 16:58:03 +0100 Subject: [PATCH] Some unused code removed --- .github/workflows/tests.yaml | 2 +- create_clean_dataset.py | 29 ++- from_poly_set_to_features.py | 125 ----------- .../dataset_manipulation.py | 11 - .../exploit_symmetries.py | 88 -------- replicating_Dorians_features.py | 6 +- test_models.py | 111 ++-------- train_models.py | 204 ++++-------------- 8 files changed, 74 insertions(+), 502 deletions(-) delete mode 100644 from_poly_set_to_features.py delete mode 100644 packages/dataset_manipulation/exploit_symmetries.py diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 29d872a..784068b 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -4,7 +4,7 @@ on: [push] jobs: test: - runs-on: ubuntu-latest + runs-on: windows-latest steps: - name: Checkout code diff --git a/create_clean_dataset.py b/create_clean_dataset.py index 0e73222..83581f6 100644 --- a/create_clean_dataset.py +++ b/create_clean_dataset.py @@ -11,25 +11,24 @@ from dataset_manipulation import remove_notunique_features else: from packages.dataset_manipulation import remove_notunique_features -from from_poly_set_to_features import poly_set_feature_extractor from find_filename import find_dataset_filename from find_filename import find_other_filename -def create_dataframe(dataset): - all_features = [] - all_labels = dataset[1][:] - all_timings = dataset[2][:] - all_original_polynomials = [] - for index, all_projections in enumerate(dataset[0]): - original_polynomials = all_projections[0][0] - all_original_polynomials.append(original_polynomials) - names, all_features =\ - poly_set_feature_extractor(all_original_polynomials, - determine_standarization=True, - determine_unique_features=True) - return np.array(all_original_polynomials), np.array(names),\ - np.array(all_features), np.array(all_labels), np.array(all_timings) +# def create_dataframe(dataset): +# all_features = [] +# all_labels = dataset[1][:] +# all_timings = dataset[2][:] +# all_original_polynomials = [] +# for index, all_projections in enumerate(dataset[0]): +# original_polynomials = all_projections[0][0] +# all_original_polynomials.append(original_polynomials) +# names, all_features =\ +# poly_set_feature_extractor(all_original_polynomials, +# determine_standarization=True, +# determine_unique_features=True) +# return np.array(all_original_polynomials), np.array(names),\ +# np.array(all_features), np.array(all_labels), np.array(all_timings) # dataset_filename = os.path.join(os.path.dirname(__file__), diff --git a/from_poly_set_to_features.py b/from_poly_set_to_features.py deleted file mode 100644 index 86d7482..0000000 --- a/from_poly_set_to_features.py +++ /dev/null @@ -1,125 +0,0 @@ -"""This file will contain the functions necessary to convert -a list of sets of polynomials to a list of their features. -This features will be unique and standarised""" -import numpy as np -import pickle -from packages.dataset_manipulation import augmentate_dataset -from find_filename import find_other_filename -from replicating_Dorians_features import features_from_set_of_polys - - -def poly_set_feature_extractor(sets_of_polys, determine_unique_features=False, - determine_standarization=False): - """Given a list of polynomial sets will return a list of its features""" - features_list = [] - for set_of_polys in sets_of_polys: - names, features = features_from_set_of_polys(set_of_polys) - features_list.append(features) - if determine_unique_features: - # if we want to find unique feature names - find_unique_features(names, features_list) - unique_names, unique_features = get_unique_features(names, features_list) - if determine_standarization: - find_standarizing_values(unique_names, unique_features) - standarized_features = get_standarized_features(unique_names, unique_features) - return names, standarized_features - - -# def features_set_of_polys(original_polynomials): -# instance_features = [] -# names = [] -# nvar = len(original_polynomials[0][0]) - 1 -# for var in range(nvar): -# degrees = [[monomial[var] for monomial in poly] -# for poly in original_polynomials] -# var_features, var_features_names = create_features(degrees, -# variable=var) -# instance_features += var_features -# names += var_features_names -# sdegrees = [[sum(monomial) for monomial in poly -# if monomial[var]!=0]+[0] -# for poly in original_polynomials] -# svar_features, svar_features_names = create_features(sdegrees, -# variable=var, -# sv=True) -# instance_features += svar_features -# names += svar_features_names -# return names, instance_features - - -def find_unique_features(names, features): - """ - Saves the name of unique features in the assigned file. - - When two features share the same value for all the instances, - or they are the same after adition or multiplication, - one of them is not considered unique. - """ - # we want to look for uniqueness after augmenting to discard - # some that might look equal - # creating labels and timing for the augmentate_dataset function - labels = [0]*len(features) - timings = [[0, 0]]*len(features) - augmented_features, _, _ = augmentate_dataset(features, labels, timings) - # now we look for the unique features - unique_features = [] - unique_names = [] - for index, feature in enumerate(zip(*augmented_features)): - if (any([np.array_equal(feature, ex_feature) - for ex_feature in unique_features]) - or np.std(feature) == 0): - # check if this feature has been already recorded - pass - elif feature.count(feature[0]) == len(feature): - # check if it is a constant list - pass - else: - # if none of the previous conditions then - unique_features.append(feature) - unique_names.append(names[index]) - unique_names_filename = find_other_filename('unique_names') - with open(unique_names_filename, 'wb') as unique_names_file: - pickle.dump(unique_names, unique_names_file) - - -def get_unique_features(names, features): - """Return the features corresponding to a name in 'unique_names'.""" - # We recover the list of unique feature names - unique_names_filename = find_other_filename('unique_names') - with open(unique_names_filename, 'rb') as unique_names_file: - unique_names = pickle.load(unique_names_file) - # we keep only the features that are unique - unique_features = [] - index = 0 - for feature in zip(*features): - if names[index] in unique_names: - unique_features.append(feature) - index += 1 - return unique_names, np.transpose(unique_features) - - -def find_standarizing_values(names, features_list): - """Finds and saves the mean and std of the different features - so that features can be standarised in a consistent way - before giving them to the machine learning models""" - standarizing_values = dict() - for name, features in zip(names, features_list): - standarizing_values[name] = (np.mean(features), np.std(features)) - standarizing_values_filename = find_other_filename('standarizing_values') - with open(standarizing_values_filename, 'wb') as standarizing_values_file: - pickle.dump(standarizing_values, standarizing_values_file) - - -def get_standarized_features(names, features): - """Returns the standarised features.""" - # We recover the list of unique feature names - standarizing_values_filename = find_other_filename('standarizing_values') - with open(standarizing_values_filename, 'rb') as standarizing_values_file: - standarizing_values = pickle.load(standarizing_values_file) - # we keep only the features that are unique - standarized_features = [] - index = 0 - for index, feature in enumerate(zip(*features)): - mean, std = standarizing_values[names[index]] - standarized_features.append((feature-mean)/std) - return np.transpose(standarized_features) diff --git a/packages/dataset_manipulation/dataset_manipulation.py b/packages/dataset_manipulation/dataset_manipulation.py index 8e685e9..fcba1bf 100644 --- a/packages/dataset_manipulation/dataset_manipulation.py +++ b/packages/dataset_manipulation/dataset_manipulation.py @@ -2,8 +2,6 @@ import numpy as np import math import random -from .exploit_symmetries import give_all_symmetries -from .exploit_symmetries import augmentate_timings from itertools import permutations # from sklearn.preprocessing import normalize @@ -106,15 +104,6 @@ def name_unique_features(names, features): return new_names -def get_unique_feature_names(unique_names, names, features): - """Return the features corresponding to a name in 'unique_names'.""" - unique_features = [] - for index, feature in enumerate(zip(*features)): - if names[index] in unique_names: - unique_features.append(feature) - return np.transpose(unique_features) - - def remove_notunique_features(names, features, nvar=3): # creating some targets and timing because the function requires them timings = [list(range(math.factorial(nvar)))]*len(features) diff --git a/packages/dataset_manipulation/exploit_symmetries.py b/packages/dataset_manipulation/exploit_symmetries.py deleted file mode 100644 index 755b747..0000000 --- a/packages/dataset_manipulation/exploit_symmetries.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -Exploit symmetries in three variable polynomials to generate up to six -instances out of each existing one. - -The task at hand consists in classify this features. -We will take advantage of the fact that we can change the target by -reordering the features. - -This file will contain: -- features_to_canonical_ordering: a function able to reorder the features so -that the target becomes '1', this ordering is called the canonical order. -- give_all_symmetries: a function that given the canonical order returns the -reorderings for each of the possible targets '1','2',...,'6'. -""" -from itertools import permutations - - -def get_perms(variables): - perms = [list(elem) for elem in permutations(variables)] - return perms - - -def features_to_canonical_target(features, optimal_ordering, nvar=3): - """ - Reorder the features for the target to be '1'. - - This is done by reordering the features according to the optimal variable - ordering of the set of polynomials. - """ - perms = get_perms(list(range(nvar))) - best_variable_ordering = perms[optimal_ordering] - nfeatures = len(features) - split_features = [features[int(var*nfeatures/nvar): - int((var+1)*nfeatures/nvar)] - for var in range(nvar)] - ordered_features = [split_features[best_variable_ordering[i]] - for i in range(nvar)] - return ordered_features - - -def give_all_symmetries(features, optimal_ordering=0, nvar=3): - """Reorder the features for all possible targets. - Returns a list of of all symmetries, the first one - corresponding to the optimal ordering""" - perms = get_perms(list(range(nvar))) - ordered_features = features_to_canonical_target(features, - optimal_ordering) - all_symmetries = [] - for perm in perms: - new_order_features = [0]*nvar - for index, var in enumerate(perm): - new_order_features[var] = ordered_features[index] - flatten_new_order_features = [elem for lst in new_order_features - for elem in lst] - all_symmetries.append(flatten_new_order_features) - return all_symmetries - - -def augmentate_timings(timings, optimal_ordering, nvar=3): - """Given all the timings returns a list of all the possible reorderings - so that the first reordering corresponds to the optimal ordering and - the others follow that""" - perms = get_perms(list(range(nvar))) - best_variable_ordering = perms[optimal_ordering] - new_perms = get_perms(best_variable_ordering) - all_timings = [] - for perm in new_perms: - # compute in which index this perm used to be - perm_index = perms.index(perm) - # find associated timing and append - all_timings.append(reorder_timings(timings, perm_index, nvar=3)) - return all_timings - - -def reorder_timings(timings, first_ordering, nvar=3): - """Given all the timings reorder them so that the first one - corresponds to first_ordering and the rest from the usual - permutations done from it""" - perms = get_perms(list(range(nvar))) - first_variable_ordering = perms[first_ordering] - new_perms = get_perms(first_variable_ordering) - new_timings = [] - for perm in new_perms: - # compute in which index this perm used to be - perm_index = perms.index(perm) - # find associated timing and append - new_timings.append(timings[perm_index]) - return new_timings diff --git a/replicating_Dorians_features.py b/replicating_Dorians_features.py index 688858d..4453afd 100644 --- a/replicating_Dorians_features.py +++ b/replicating_Dorians_features.py @@ -1,11 +1,7 @@ -""" -IS THIS BEING USED? -YES, IT IS! -""" - import itertools # from xml.sax.handler import all_features import numpy as np + from config.general_values import operations diff --git a/test_models.py b/test_models.py index 662c82a..6f60264 100644 --- a/test_models.py +++ b/test_models.py @@ -1,16 +1,11 @@ -import csv import math import pickle import importlib.util -import numpy as np -from sklearn import metrics -from config.general_values import dataset_qualities -from config.ml_models import all_models + + from config.ml_models import regressors from config.ml_models import classifiers from config.ml_models import heuristics -from find_filename import find_output_filename -from find_filename import find_dataset_filename from find_filename import find_model_filename from main_heuristics import ordering_choices_heuristics # from train_models import ordering_choice_reinforcement @@ -22,97 +17,21 @@ from packages.dataset_manipulation.dataset_manipulation import augmentate_instance -# def test_model(trained_model_filename, test_dataset_filename): +# def test_model(ml_model, paradigm, testing_method='Augmented'): +# test_dataset_filename = find_dataset_filename('Test', +# testing_method) +# with open(test_dataset_filename, 'rb') as test_dataset_file: +# testing_dataset = pickle.load(test_dataset_file) +# trained_model_filename = find_model_filename(paradigm, +# ml_model) # with open(trained_model_filename, 'rb') as trained_model_file: # model = pickle.load(trained_model_file) -# with open(test_dataset_filename, 'rb') as test_dataset_file: -# x_test, y_test, _ = pickle.load(test_dataset_file) -# y_pred = model.predict(x_test) -# return metrics.accuracy_score(y_test, y_pred) - - -def test_results(training_method): - output_filename = find_output_filename(training_method) - with open(output_filename, 'w') as output_file: - writer_balanced = csv.writer(output_file) - writer_balanced.writerow(["Name"] + dataset_qualities) - for ml_model in all_models: - trained_model_filename = find_model_filename(training_method, - ml_model) - accuracy = dict() - for testing_method in dataset_qualities: - test_dataset_filename = find_dataset_filename('Test', - testing_method) - accuracy[testing_method] = test_model(trained_model_filename, - test_dataset_filename) - round_accuracies = [round(acc, 2) - for acc in [accuracy[method] - for method in dataset_qualities]] - writer_balanced.writerow([ml_model + "-" + training_method] + - round_accuracies) - - -def test_classifier(ml_model, testing_method='Augmented'): - trained_model_filename = find_model_filename('Classification', - ml_model) - test_dataset_filename = find_dataset_filename('Test', - testing_method) - with open(trained_model_filename, 'rb') as trained_model_file: - model = pickle.load(trained_model_file) - with open(test_dataset_filename, 'rb') as test_dataset_file: - x_test, y_test, all_timings = pickle.load(test_dataset_file) - chosen_indices = [return_regressor_choice(model, features) for features in x_test] - return compute_metrics(chosen_indices, y_test, all_timings) - - -def timings_in_test(model, testing_method='Augmented', training_method=None): - test_dataset_filename = find_dataset_filename('Test', - testing_method) - with open(test_dataset_filename, 'rb') as test_dataset_file: - x_test, _, all_timings = pickle.load(test_dataset_file) - if model == 'optimal': - t_pred = [min(timings) for timings in all_timings] - else: - trained_model_filename = find_model_filename(training_method, - model) - with open(trained_model_filename, 'rb') as trained_model_file: - model = pickle.load(trained_model_file) - y_pred = model.predict(x_test) - # This doesn't work because agumenteed and balanced - # only return one timing, not 6 - t_pred = [timings[y] for timings, y in zip(all_timings, y_pred)] - return t_pred - - -def test_regressor(ml_model): - trained_model_filename = find_model_filename('Regression', - ml_model) - test_dataset_filename = find_dataset_filename('Test', - 'Regression') - with open(trained_model_filename, 'rb') as trained_model_file: - model = pickle.load(trained_model_file) - with open(test_dataset_filename, 'rb') as test_dataset_file: - x_test, y_test, all_timings = pickle.load(test_dataset_file) - y_pred = model.predict(x_test) - avg_error = sum([abs(p-t) for p, t in zip(y_pred, y_test)])/len(y_pred) - print(f"{ml_model} gave {avg_error}") - - -def test_model(ml_model, paradigm, testing_method='Augmented'): - test_dataset_filename = find_dataset_filename('Test', - testing_method) - with open(test_dataset_filename, 'rb') as test_dataset_file: - testing_dataset = pickle.load(test_dataset_file) - trained_model_filename = find_model_filename(paradigm, - ml_model) - with open(trained_model_filename, 'rb') as trained_model_file: - model = pickle.load(trained_model_file) - chosen_indices = choose_indices(model, testing_dataset) - return compute_metrics(chosen_indices, - testing_dataset['labels'], - testing_dataset['timings'], - testing_dataset['cells'], - ml_model) +# chosen_indices = choose_indices(model, testing_dataset) +# return compute_metrics(chosen_indices, +# testing_dataset['labels'], +# testing_dataset['timings'], +# testing_dataset['cells'], +# ml_model) def choose_indices(model_name, testing_dataset, paradigm='', training_quality='Augmented'): diff --git a/train_models.py b/train_models.py index 0ffb47b..4eb4c22 100644 --- a/train_models.py +++ b/train_models.py @@ -1,18 +1,17 @@ -import math + import pickle -import random from yaml_tools import read_yaml_from_file from config.ml_models import all_models from find_filename import find_dataset_filename from find_filename import find_hyperparams_filename from find_filename import find_model_filename -from find_filename import find_other_filename -from dataset_manipulation import give_all_symmetries -import numpy as np +# from find_filename import find_other_filename +# from dataset_manipulation import give_all_symmetries +# import numpy as np # from sklearn import metrics -from itertools import combinations -from replicating_Dorians_features import compute_features_for_var -from test_models import compute_metrics +# from itertools import combinations +# from replicating_Dorians_features import compute_features_for_var +# from test_models import compute_metrics def train_model(model_name, paradigm, training_quality): @@ -33,156 +32,39 @@ def train_model(model_name, paradigm, training_quality): return model -def train_regression_model(model_name, method): - train_data_filename = find_dataset_filename('Train', method=method) - with open(train_data_filename, 'rb') as train_data_file: - train_dataset = pickle.load(train_data_file) - # hyperparams_file = find_hyperparams_filename(method, model_name) - # hyperparams = read_yaml_from_file(hyperparams_file) - train_dataset['features'] = np.asarray([x_t for x_t, t_t in zip(train_dataset['features'], train_dataset['timings']) - if t_t[:4] != 'Over'], dtype=float) - train_dataset['timings'] = np.asarray([t_t for t_t in train_dataset['timings'] - if t_t[:4] != 'Over'], dtype=float) - #### - # IS THIS REALLY DOING SOMTHING? - # What if we used twice timelimit instead - current_model = ml_regressors[model_name] - reg = current_model() # **hyperparams) - reg.fit(train_dataset['features'], train_dataset['timings']) - # trained_model_filename = find_model_filename(method, model_name, 'regression') - # with open(trained_model_filename, 'wb') as trained_model_file: - # pickle.dump(reg, trained_model_file) - return reg - - -def choose_using_regression(x_test, regressor): - timings = regressor.predict(give_all_symmetries(x_test, 0)) - return np.argmin(timings) - - -def test_regression_model(method, regressor): - test_data_filename = find_dataset_filename('Test', method=method) - with open(test_data_filename, 'rb') as test_data_file: - x_test, y_test, t_test = pickle.load(test_data_file) - x_test = np.asarray([x_t for x_t, t_t in zip(x_test, t_test) - if t_t[:4] != 'Over'], dtype=float) - y_test = np.asarray([y_t for y_t, t_t in zip(y_test, t_test) - if t_t[:4] != 'Over'], dtype=float) - y_pred = [choose_using_regression(x_i, regressor) for x_i in x_test] - - -def train_reinforcement_model(model_name, method='Normal'): - train_data_filename = find_dataset_filename('Train', method=method) - with open(train_data_filename, 'rb') as train_data_file: - train_dataset = pickle.load(train_data_file) - # hyperparams_file = find_hyperparams_filename(method, model_name) - # hyperparams = read_yaml_from_file(hyperparams_file) - current_model = all_models[model_name] - # model = current_model(**hyperparams) - model = current_model() - first_polys = train_dataset['projections'][0][0][0] - first_features = get_vars_features(first_polys) - first_labels = [random.random() for _ in range(len(first_features))] - model.fit(first_features, first_labels) - training_features, training_labels = [], [] - for i in range(30): - for projections, timings \ - in zip(train_dataset['projections'], train_dataset['timings']): - new_training_features, new_training_labels = \ - training_instances_reinforcement(model, projections, timings) - training_features += new_training_features - training_labels += new_training_labels - model.fit(training_features, training_labels) - print(test_reinforcement_model(model)) - trained_model_filename = find_model_filename('reinforcement', model_name) - with open(trained_model_filename, 'wb') as trained_model_file: - pickle.dump(model, trained_model_file) - - -def training_instances_reinforcement(model, projections, timings): - original_polynomials = projections[0][0] - nvar = len(original_polynomials[0][0]) - 1 - vars_features = get_vars_features(original_polynomials) - evaluations = [model.predict([var_features])[0] - for var_features in vars_features] - timing = [] - for var in range(nvar): - # retruns the polynomials after projection wrt var - projected_polynomials = projections[var * math.factorial(nvar-1)][1] - new_var = var_choice_reinforcement(model, projected_polynomials) - ordering_chosen = new_var + var * math.factorial(nvar-1) - timing.append(timings[ordering_chosen]) - # now compute which part of the difference between - # evaluations[i]/evaluations[j] and timing[i]/timing[j] - # corresponds to each evaluation - instances_features = [] - instances_labels = [] - pairs = list(combinations(range(nvar), 2)) - for i, j in pairs: - correction_coefficient = \ - math.sqrt((timing[i]/timing[j])/(evaluations[i]/evaluations[j])) - instances_features += [vars_features[i], vars_features[j]] - instances_labels += [evaluations[i]*correction_coefficient, - evaluations[j]/correction_coefficient] - return instances_features, instances_labels - - -def get_vars_features(polynomials): - '''Will return the features of each variable - in the given set of polynomials''' - vars_features = [] - nvar = len(polynomials[0][0]) - 1 - unique_features_filename = find_other_filename("unique_features") - with open(unique_features_filename, 'rb') as unique_features_file: - unique_names = pickle.load(unique_features_file) - for var in range(nvar): - var_features, var_names = \ - compute_features_for_var(polynomials, var) - var_features = [feature for feature, name - in zip(var_features, var_names) - if name in unique_names] - vars_features.append(var_features) - return vars_features - - -def var_choice_reinforcement(model, polynomials): - '''This function will return the next variable to project - chosen by the model trained using reinforcement''' - vars_features = get_vars_features(polynomials) - evaluations = model.predict(vars_features) - min_value = np.min(evaluations) - min_indices = np.where(evaluations == min_value)[0] - # Randomly select one of the minimal indices - return np.random.choice(min_indices) - - -def ordering_choice_reinforcement(model, projections): - '''This function will return the ordering chosen by the RL model''' - nvar = len(projections[0]) - ordering = 0 - for level in range(nvar-1): - polynomials = projections[ordering][level] - next_var = var_choice_reinforcement(model, polynomials) - ordering += next_var * math.factorial(nvar-1-level) - return ordering - - -def test_reinforcement_model(model_name, method='Normal', nvar=3): - train_data_filename = find_dataset_filename('Test', method=method) - with open(train_data_filename, 'rb') as train_data_file: - testing_dataset = pickle.load(train_data_file) - # trained_model_filename = find_model_filename('reinforcement', model_name) - # with open(trained_model_filename, 'rb') as trained_model_file: - # model = pickle.load(trained_model_file) - model = model_name - chosen_indices = [ordering_choice_reinforcement(model, projections) - for projections in testing_dataset['projections']] - metrics = compute_metrics(chosen_indices, - testing_dataset['labels'], - testing_dataset['timings'], - testing_dataset['cells'], - 'reinfocement') - augmented_metrics = {key: metrics[key] if key in ['Accuracy', 'Markup'] - else math.factorial(nvar)*metrics[key] - for key in metrics} - return augmented_metrics +# def train_regression_model(model_name, method): +# train_data_filename = find_dataset_filename('Train', method=method) +# with open(train_data_filename, 'rb') as train_data_file: +# train_dataset = pickle.load(train_data_file) +# # hyperparams_file = find_hyperparams_filename(method, model_name) +# # hyperparams = read_yaml_from_file(hyperparams_file) +# train_dataset['features'] = np.asarray([x_t for x_t, t_t in zip(train_dataset['features'], train_dataset['timings']) +# if t_t[:4] != 'Over'], dtype=float) +# train_dataset['timings'] = np.asarray([t_t for t_t in train_dataset['timings'] +# if t_t[:4] != 'Over'], dtype=float) +# #### +# # IS THIS REALLY DOING SOMTHING? +# # What if we used twice timelimit instead +# current_model = ml_regressors[model_name] +# reg = current_model() # **hyperparams) +# reg.fit(train_dataset['features'], train_dataset['timings']) +# # trained_model_filename = find_model_filename(method, model_name, 'regression') +# # with open(trained_model_filename, 'wb') as trained_model_file: +# # pickle.dump(reg, trained_model_file) +# return reg + + +# def choose_using_regression(x_test, regressor): +# timings = regressor.predict(give_all_symmetries(x_test, 0)) +# return np.argmin(timings) + + +# def test_regression_model(method, regressor): +# test_data_filename = find_dataset_filename('Test', method=method) +# with open(test_data_filename, 'rb') as test_data_file: +# x_test, y_test, t_test = pickle.load(test_data_file) +# x_test = np.asarray([x_t for x_t, t_t in zip(x_test, t_test) +# if t_t[:4] != 'Over'], dtype=float) +# y_test = np.asarray([y_t for y_t, t_t in zip(y_test, t_test) +# if t_t[:4] != 'Over'], dtype=float) +# y_pred = [choose_using_regression(x_i, regressor) for x_i in x_test]