Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
Tereso del Rio committed Jun 11, 2023
1 parent 35cd856 commit e181625
Showing 19 changed files with 250 additions and 145 deletions.
135 changes: 62 additions & 73 deletions choose_hyperparams.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,3 @@
"""
The experiments in [1] are replicated with some changes.

The first change is that the testing data is balanced, so that all targets
are almost equally common.
Then we use three training sets; dataset as in [1], balanced dataset
and data augmentation dataset.

[1]Florescu, D., England, M. (2020). A Machine Learning Based Software Pipeline
to Pick the Variable Ordering for Algorithms with Polynomial Inputs.
Bigatti, A., Carette, J., Davenport, J., Joswig, M., de Wolff, T. (eds)
Mathematical Software, ICMS 2020. ICMS 2020. Lecture Notes in Computer Science,
vol 12097. Springer, Cham. https://doi.org/10.1007/978-3-030-52200-1_30
"""


import os
import pickle
import csv
@@ -47,66 +31,71 @@ def choose_hyperparams(ml_model, method):
chosen by cross validation is created"""
this_dataset_file = find_dataset_filename('train', method=method)
with open(this_dataset_file, 'rb') as f:
method_x_train, method_y_train = pickle.load(f)
hyperparams = k_folds_ml(method_x_train, method_y_train, model=ml_model)
x_train, y_train, _ = pickle.load(f)
hyperparams = k_folds_ml(x_train, y_train, model=ml_model)
hyperparams_filename = find_hyperparams_filename(method, ml_model)
write_yaml_to_file(hyperparams, hyperparams_filename)


test_balanced_dataset_file = os.path.join(os.path.dirname(__file__),
'datasets', 'test',
'balanced_test_dataset.txt')
with open(test_balanced_dataset_file, 'rb') as g:
balanced_x_test, balanced_y_test = pickle.load(g)
# test_balanced_dataset_file = os.path.join(os.path.dirname(__file__),
# 'datasets', 'test',
# 'balanced_test_dataset.txt')
# with open(test_balanced_dataset_file, 'rb') as g:
# balanced_x_test, balanced_y_test = pickle.load(g)

# test_normal_dataset_file = os.path.join(os.path.dirname(__file__),
# 'datasets', 'test',
# 'normal_test_dataset.txt')
# with open(test_normal_dataset_file, 'rb') as g:
# normal_x_test, normal_y_test = pickle.load(g)

# output_file_balanced = os.path.join(os.path.dirname(__file__),
# 'ml_results_k_fold_tested_in_balanced.csv')
# with open(output_file_balanced, 'w') as f_balanced:
# writer_balanced = csv.writer(f_balanced)
# writer_balanced.writerow(["Name"] + dataset_types)
# output_file_normal = os.path.join(os.path.dirname(__file__),
# 'ml_results_k_fold_tested_in_normal.csv')
# with open(output_file_normal, 'w') as f_normal:
# writer_normal = csv.writer(f_normal)
# writer_normal.writerow(["Name"] + dataset_types)
# for ml_model in ml_models:
# print(f"Model: {ml_model}")
# acc_balanced = dict()
# acc_normal = dict()
# for method in dataset_types:
# this_dataset_file = os.path.join(os.path.dirname(__file__),
# 'datasets', 'train',
# f'{method}_train_dataset.txt')
# with open(this_dataset_file, 'rb') as f:
# x_train, y_train, _ = pickle.load(f)
# hyperparams = k_folds_ml(x_train, y_train,
# model=ml_model)
# write_yaml_to_file(hyperparams,
# os.path.join(os.path.dirname(__file__),
# 'config', 'hyperparams',
# f'{method}_{ml_model}'))
# current_classifier = classifiers[ml_model]
# clf = current_classifier(**hyperparams)
# clf.fit(x_train, y_train)
# acc_balanced[method] = clf.score(balanced_x_test,
# balanced_y_test)
# acc_normal[method] = clf.score(normal_x_test, normal_y_test)
# method_filename = os.path.join(os.path.dirname(__file__),
# 'config', 'models',
# f'{method}_trained_model.txt')
# with open(method_filename, 'wb') as method_file:
# pickle.dump(clf, method_file)
# round_accuracies_balanced = [round(acc, 2)
# for acc in [acc_balanced[method_here]
# for method_here in dataset_types]]
# round_accuracies_normal = [round(acc, 2)
# for acc in [acc_normal[method_here]
# for method_here in dataset_types]]
# writer_balanced.writerow([ml_model] + round_accuracies_balanced)
# writer_normal.writerow([ml_model] + round_accuracies_normal)

test_normal_dataset_file = os.path.join(os.path.dirname(__file__),
'datasets', 'test',
'normal_test_dataset.txt')
with open(test_normal_dataset_file, 'rb') as g:
normal_x_test, normal_y_test = pickle.load(g)

output_file_balanced = os.path.join(os.path.dirname(__file__),
'ml_results_k_fold_tested_in_balanced.csv')
with open(output_file_balanced, 'w') as f_balanced:
writer_balanced = csv.writer(f_balanced)
writer_balanced.writerow(["Name"] + dataset_types)
output_file_normal = os.path.join(os.path.dirname(__file__),
'ml_results_k_fold_tested_in_normal.csv')
with open(output_file_normal, 'w') as f_normal:
writer_normal = csv.writer(f_normal)
writer_normal.writerow(["Name"] + dataset_types)
for ml_model in ml_models:
print(f"Model: {ml_model}")
acc_balanced = dict()
acc_normal = dict()
for method in dataset_types:
this_dataset_file = os.path.join(os.path.dirname(__file__),
'datasets', 'train',
f'{method}_train_dataset.txt')
with open(this_dataset_file, 'rb') as f:
method_x_train, method_y_train = pickle.load(f)
hyperparams = k_folds_ml(method_x_train, method_y_train,
model=ml_model)
write_yaml_to_file(hyperparams,
os.path.join(os.path.dirname(__file__),
'config', 'hyperparams',
f'{method}_{ml_model}'))
current_classifier = classifiers[ml_model]
clf = current_classifier(**hyperparams)
clf.fit(method_x_train, method_y_train)
acc_balanced[method] = clf.score(balanced_x_test,
balanced_y_test)
acc_normal[method] = clf.score(normal_x_test, normal_y_test)
method_file = os.path.join(os.path.dirname(__file__),
'config', 'models',
f'{method}_trained_model.txt')
with open(method_file, 'wb') as f_method:
pickle.dump(clf, f_method)
round_accuracies_balanced = [round(acc, 2)
for acc in [acc_balanced[method_here]
for method_here in dataset_types]]
round_accuracies_normal = [round(acc, 2)
for acc in [acc_normal[method_here]
for method_here in dataset_types]]
writer_balanced.writerow([ml_model] + round_accuracies_balanced)
writer_normal.writerow([ml_model] + round_accuracies_normal)
# model = 'KNN'
# method = 'balanced'
# choose_hyperparams(model, method)
30 changes: 29 additions & 1 deletion create_clean_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""This file contains a function that given the raw dataset containing
the sets of polynomials and its timings for each order, creates a dataset
containing a set of unique features and its class"""

import os
import pickle
import numpy as np
from replicating_Dorians_features import extract_features
@@ -6,6 +11,30 @@
from dataset_manipulation import remove_notunique_features
else:
from packages.dataset_manipulation import remove_notunique_features
from from_poly_set_to_features import poly_set_feature_extractor


def create_dataframe(dataset):
all_features = []
all_targets = dataset[1][:]
all_timings = dataset[2][:]
all_original_polynomials = []
for index, all_projections in enumerate(dataset[0]):
original_polynomials = all_projections[0][0]
all_original_polynomials.append(original_polynomials)
names, all_features = poly_set_feature_extractor(all_original_polynomials,
determine_standarization=True,
determine_unique_features=True)
return np.array(all_original_polynomials), np.array(names),\
np.array(all_features), np.array(all_targets), np.array(all_timings)


dataset_filename = os.path.join(os.path.dirname(__file__),
'DatasetsBeforeProcessing',
'dataset_without_repetition_return_ncells.txt')
with open(dataset_filename, 'rb') as f:
dataset = pickle.load(f)
original_polys_list, names, features_list, targets_list, timings_list = create_dataframe(dataset)


def cleaning_dataset(dataset_filename, clean_dataset_filename):
@@ -20,7 +49,6 @@ def cleaning_dataset(dataset_filename, clean_dataset_filename):
targets = np.array(targets_list)
timings = np.array(timings_list)
original_polys = np.array(original_polys_list)

with open(clean_dataset_filename, 'wb') as clean_dataset_file:
dataset = pickle.dump((original_polys, unique_names,
unique_features, targets, timings),
4 changes: 2 additions & 2 deletions datasets/dataset_instances.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dataset,zero,one,two,three,four,five,total
train normal dataset,326,74,105,41,163,106,815
train balanced dataset,130,120,135,143,135,152,815
train balanced dataset,118,136,125,149,134,153,815
train augmented dataset,815,815,815,815,815,815,4890
test normal dataset,80,19,30,10,39,26,204
test balanced dataset,34,31,32,37,39,31,204
test balanced dataset,39,32,36,29,31,37,204
test augmented dataset,204,204,204,204,204,204,1224
Binary file modified datasets/test/augmented_test_dataset.txt
Binary file not shown.
Binary file modified datasets/test/balanced_test_dataset.txt
Binary file not shown.
Binary file modified datasets/test/normal_test_dataset.txt
Binary file not shown.
Binary file modified datasets/train/augmented_train_dataset.txt
Binary file not shown.
Binary file modified datasets/train/balanced_train_dataset.txt
Binary file not shown.
Binary file modified datasets/train/normal_train_dataset.txt
Binary file not shown.
20 changes: 12 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
@@ -22,16 +22,20 @@
from test_models import test_results


original_dataset_file = find_dataset_filename('unclean')
clean_dataset_filename = find_dataset_filename('clean')
cleaning_dataset(original_dataset_file, clean_dataset_filename)
create_train_test_datasets()
# original_dataset_file = find_dataset_filename('unclean')
# clean_dataset_filename = find_dataset_filename('clean')
# cleaning_dataset(original_dataset_file, clean_dataset_filename)
# create_train_test_datasets()

# for ml_model in ml_models:
# for method in dataset_types:
# print(f"Choosing hyperparameters for {ml_model} in {method}")
# choose_hyperparams(ml_model, method)
for ml_model in ml_models:
print(f"Training {ml_model}")
for method in dataset_types:
choose_hyperparams(ml_model, method)
for ml_model in ml_models:
for method in dataset_types:
print(f"for {method}")
train_model(ml_model, method)
for testing_method in ['normal', 'balanced']:
for testing_method in dataset_types:
print(f"Testing {testing_method}")
test_results(testing_method)
3 changes: 2 additions & 1 deletion packages/build/lib/dataset_manipulation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .dataset_manipulation import augmentate_dataset # noqa401
from .dataset_manipulation import balance_dataset # noqa401
from .dataset_manipulation import name_unique_features # noqa401
from .dataset_manipulation import remove_notunique_features # noqa401
from .dataset_manipulation import remove_notunique_features # noqa401
from .exploit_symmetries import give_all_symmetries # noqa401
41 changes: 34 additions & 7 deletions packages/build/lib/dataset_manipulation/dataset_manipulation.py
Original file line number Diff line number Diff line change
@@ -3,11 +3,12 @@
import math
import random
from .exploit_symmetries import give_all_symmetries
# from sklearn.preprocessing import normalize

nvar = 3


def augmentate_dataset(features, targets):
def augmentate_dataset(features, targets, timings):
"""
Multiply the size of the dataset by 6.

@@ -17,13 +18,16 @@ def augmentate_dataset(features, targets):
"""
symmetric_features = []
symmetric_targets = []
for features, target in zip(features, targets):
symmetric_timings = []
for features, target, timing in zip(features, targets, timings):
symmetric_features += give_all_symmetries(features, int(target))
symmetric_targets += list(range(math.factorial(nvar)))
return np.array(symmetric_features), np.array(symmetric_targets)
symmetric_timings += list(timing)
return np.array(symmetric_features), np.array(symmetric_targets), \
np.array(symmetric_timings)


def balance_dataset(features, targets):
def balance_dataset(features, targets, timings):
"""
Balance the dataset so all targets are almost equally common.

@@ -33,13 +37,16 @@ def balance_dataset(features, targets):
"""
balanced_features = []
balanced_targets = []
for features, target in zip(features, targets):
balanced_timings = []
for features, target, timing in zip(features, targets, timings):
symmetric_features = give_all_symmetries(features, int(target))
possible_targets = list(range(math.factorial(nvar)))
new_target = random.choice(possible_targets)
balanced_features.append(symmetric_features[new_target])
balanced_targets.append(new_target)
return np.array(balanced_features), np.array(balanced_targets)
balanced_timings.append(timing[new_target])
return np.array(balanced_features), np.array(balanced_targets),\
np.array(balanced_timings)


def name_unique_features(names, features):
@@ -57,16 +64,36 @@ def name_unique_features(names, features):
for ex_feature in new_features])
or np.std(feature) == 0):
rep += 1
elif feature.count(feature[0]) == len(feature):
print(names[index])
else:
# if 'max_in_polys_max_sig'==names[index][:20]:
# print("Check ", feature.count(feature[0])==len(feature))
# print(names[index])
# print(len(feature))
new_features.append(feature)
new_names.append(names[index])
return new_names


def remove_notunique_features(unique_names, names, features):
def get_unique_feature_names(unique_names, names, features):
"""Return the features corresponding to a name in 'unique_names'."""
unique_features = []
for index, feature in enumerate(zip(*features)):
if names[index] in unique_names:
unique_features.append(feature)
return np.transpose(unique_features)


def remove_notunique_features(names, features):
# creating some targets and timing because the function requires them
targets = [0]*len(features)
timings = [[0, 0]]*len(features)
augmented_features, _, _ = augmentate_dataset(features, targets, timings)
# normalized_augmented_features = normalize(augmented_features)
unique_names = name_unique_features(names, augmented_features)
unique_features = []
for index, feature in enumerate(zip(*features)):
if names[index] in unique_names:
unique_features.append(feature)
return unique_names, np.transpose(unique_features)
Original file line number Diff line number Diff line change
@@ -37,7 +37,9 @@ def features_to_canonical_target(features, optimal_ordering):


def give_all_symmetries(features, optimal_ordering):
"""Reorder the features for all possible targets."""
"""Reorder the features for all possible targets.
Returns a list of of all symmetries, the first one
corresponding to the optimal ordering"""
ordered_features = features_to_canonical_target(features,
optimal_ordering)
all_symmetries = []
3 changes: 2 additions & 1 deletion packages/dataset_manipulation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .dataset_manipulation import augmentate_dataset # noqa401
from .dataset_manipulation import balance_dataset # noqa401
from .dataset_manipulation import name_unique_features # noqa401
from .dataset_manipulation import remove_notunique_features # noqa401
from .dataset_manipulation import remove_notunique_features # noqa401
from .exploit_symmetries import give_all_symmetries # noqa401
15 changes: 10 additions & 5 deletions packages/dataset_manipulation/dataset_manipulation.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
import math
import random
from .exploit_symmetries import give_all_symmetries
from sklearn.preprocessing import normalize
# from sklearn.preprocessing import normalize

nvar = 3

@@ -23,7 +23,8 @@ def augmentate_dataset(features, targets, timings):
symmetric_features += give_all_symmetries(features, int(target))
symmetric_targets += list(range(math.factorial(nvar)))
symmetric_timings += list(timing)
return np.array(symmetric_features), np.array(symmetric_targets), np.array(symmetric_timings)
return np.array(symmetric_features), np.array(symmetric_targets), \
np.array(symmetric_timings)


def balance_dataset(features, targets, timings):
@@ -44,7 +45,8 @@ def balance_dataset(features, targets, timings):
balanced_features.append(symmetric_features[new_target])
balanced_targets.append(new_target)
balanced_timings.append(timing[new_target])
return np.array(balanced_features), np.array(balanced_targets), np.array(balanced_timings)
return np.array(balanced_features), np.array(balanced_targets),\
np.array(balanced_timings)


def name_unique_features(names, features):
@@ -58,11 +60,14 @@ def name_unique_features(names, features):
new_names = []
rep = 0
for index, feature in enumerate(zip(*features)):
# print(feature)
# if any([type(xfeature) == str for xfeature in feature]):
# print(feature)
if (any([np.array_equal(feature, ex_feature)
for ex_feature in new_features])
or np.std(feature) == 0):
rep += 1
elif feature.count(feature[0])==len(feature):
elif feature.count(feature[0]) == len(feature):
print(names[index])
else:
# if 'max_in_polys_max_sig'==names[index][:20]:
@@ -86,7 +91,7 @@ def get_unique_feature_names(unique_names, names, features):
def remove_notunique_features(names, features):
# creating some targets and timing because the function requires them
targets = [0]*len(features)
timings = [[0,0]]*len(features)
timings = [[0, 0]]*len(features)
augmented_features, _, _ = augmentate_dataset(features, targets, timings)
# normalized_augmented_features = normalize(augmented_features)
unique_names = name_unique_features(names, augmented_features)
4 changes: 3 additions & 1 deletion packages/dataset_manipulation/exploit_symmetries.py
Original file line number Diff line number Diff line change
@@ -37,7 +37,9 @@ def features_to_canonical_target(features, optimal_ordering):


def give_all_symmetries(features, optimal_ordering):
"""Reorder the features for all possible targets."""
"""Reorder the features for all possible targets.
Returns a list of of all symmetries, the first one
corresponding to the optimal ordering"""
ordered_features = features_to_canonical_target(features,
optimal_ordering)
all_symmetries = []
49 changes: 28 additions & 21 deletions replicating_Dorians_features.py
Original file line number Diff line number Diff line change
@@ -3,29 +3,28 @@
from xml.sax.handler import all_features
import numpy as np

nvar=3




def aveg(given_list):
return sum(given_list)/len(given_list)


def aveg_not_zero(given_list):
return sum(given_list)/max(1,len([1 for elem in given_list if elem!=0]))


def identity(input):
return input


def sign(input):
if type(input)==list:
if type(input) == list:
return [sign(elem) for elem in input]
else:
if input>0:
if input > 0:
return 1
elif input<0:
elif input < 0:
return -1
elif input==0:
elif input == 0:
return 0
else:
raise Exception("How is this possible?")
@@ -51,21 +50,29 @@ def extract_features(dataset):
all_original_polynomials = []
for index, all_projections in enumerate(dataset[0]):
original_polynomials = all_projections[0][0]
# the original polynomials are the initial polynomials of any
# of the possible projections (also of the first one)
all_original_polynomials.append(original_polynomials)
names = []
instance_features = []
all_targets.append(dataset[1][index])
all_timings.append(dataset[2][index])
for var in range(nvar):
degrees = [[monomial[var] for monomial in poly]
for poly in original_polynomials]
var_features, var_features_names = create_features(degrees,
variable=var)
instance_features += var_features
names += var_features_names
sdegrees = [[sum(monomial) for monomial in poly if monomial[var]!=0]+[0] for poly in original_polynomials]
svar_features, svar_features_names = create_features(sdegrees, variable=var, sv=True)
instance_features += svar_features
names += svar_features_names
names, instance_features = features_from_set_of_polys(original_polynomials)
all_features.append(instance_features)
return np.array(all_original_polynomials), np.array(names), np.array(all_features), np.array(all_targets), np.array(all_timings)


def features_from_set_of_polys(original_polynomials):
instance_features = []
names = []
nvar = len(original_polynomials[0][0]) - 1
for var in range(nvar):
degrees = [[monomial[var] for monomial in poly]
for poly in original_polynomials]
var_features, var_features_names = create_features(degrees,
variable=var)
instance_features += var_features
names += var_features_names
sdegrees = [[sum(monomial) for monomial in poly if monomial[var]!=0]+[0] for poly in original_polynomials]
svar_features, svar_features_names = create_features(sdegrees, variable=var, sv=True)
instance_features += svar_features
names += svar_features_names
return names, instance_features
30 changes: 7 additions & 23 deletions test_train_datasets.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
"""
The experiments in [1] are replicated with some changes.

The first change is that the testing data is balanced, so that all targets
are almost equally common.
Then we use three training sets; dataset as in [1], balanced dataset
and data augmentation dataset.

[1]Florescu, D., England, M. (2020). A Machine Learning Based Software Pipeline
to Pick the Variable Ordering for Algorithms with Polynomial Inputs.
Bigatti, A., Carette, J., Davenport, J., Joswig, M., de Wolff, T. (eds)
Mathematical Software, ICMS 2020. ICMS 2020. Lecture Notes in Computer Science,
vol 12097. Springer, Cham. https://doi.org/10.1007/978-3-030-52200-1_30
"""


import os
import pickle
import csv
import importlib.util
@@ -29,6 +12,7 @@
from packages.dataset_manipulation import augmentate_dataset
from sklearn.model_selection import train_test_split
from find_filename import find_dataset_filename
from find_filename import find_other_filename


def count_instances(my_dataset, instance):
@@ -40,7 +24,9 @@ def create_train_test_datasets():
with open(clean_dataset_filename, 'rb') as clean_dataset_file:
_, names, features, targets, timings = pickle.load(clean_dataset_file)
unique_names, unique_features = remove_notunique_features(names, features)

unique_features_filename = find_other_filename("unique_features")
with open(unique_features_filename, 'wb') as unique_features_file:
pickle.dump(unique_features_filename, unique_features_file)
x = dict() # to keep the features
y = dict() # to keep the labels
t = dict() # to keep the timings
@@ -58,11 +44,9 @@ def create_train_test_datasets():
writer.writerow(['dataset'] + ['zero', 'one', 'two', 'three', 'four', 'five', 'total'])
for usage in ['train', 'test']:
for method in ['normal', 'balanced', 'augmented']:
this_dataset_file = os.path.join(os.path.dirname(__file__),
'datasets', usage,
f'{method}_{usage}_dataset.txt')
with open(this_dataset_file, 'wb') as f:
pickle.dump((x[f'{usage}_{method}'], y[f'{usage}_{method}']), f)
this_dataset_filename = find_dataset_filename(usage, method=method)
with open(this_dataset_filename, 'wb') as this_dataset_file:
pickle.dump((x[f'{usage}_{method}'], y[f'{usage}_{method}'], t[f'{usage}_{method}']), this_dataset_file)

writer.writerow([f'{usage} {method} dataset']
+ [str(count_instances(y[f'{usage}_{method}'], i))
57 changes: 56 additions & 1 deletion train_models.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,75 @@
import pickle
from yaml_tools import read_yaml_from_file
from config.ml_models import classifiers
from config.ml_models import ml_regressors
from config.ml_models import regressors
from find_filename import find_dataset_filename
from find_filename import find_hyperparams_filename
from find_filename import find_model_filename
from dataset_manipulation import give_all_symmetries
import numpy as np
from sklearn import metrics


def train_model(ml_model, method):
train_data_filename = find_dataset_filename('train', method=method)
hyperparams_file = find_hyperparams_filename(method, ml_model)
with open(train_data_filename, 'rb') as train_data_file:
x_train, y_train = pickle.load(train_data_file)
x_train, y_train, _ = pickle.load(train_data_file)
hyperparams = read_yaml_from_file(hyperparams_file)
current_classifier = classifiers[ml_model]
clf = current_classifier(**hyperparams)
clf.fit(x_train, y_train)
trained_model_filename = find_model_filename(method, ml_model)
with open(trained_model_filename, 'wb') as trained_model_file:
pickle.dump(clf, trained_model_file)


def train_regression_model(ml_model, method):
train_data_filename = find_dataset_filename('train', method=method)
with open(train_data_filename, 'rb') as train_data_file:
x_train, _, t_train = pickle.load(train_data_file)
# hyperparams_file = find_hyperparams_filename(method, ml_model)
# hyperparams = read_yaml_from_file(hyperparams_file)
x_train = np.asarray([x_t for x_t, t_t in zip(x_train, t_train)
if t_t[:4] != 'Over'], dtype=float)
t_train = np.asarray([t_t for t_t in t_train
if t_t[:4] != 'Over'], dtype=float)
current_classifier = regressors[ml_model]
# print(t_train)
print("her")
reg = current_classifier() # **hyperparams)
reg.fit(x_train, t_train)
# trained_model_filename = find_model_filename(method, ml_model, 'regression')
# with open(trained_model_filename, 'wb') as trained_model_file:
# pickle.dump(reg, trained_model_file)
print("Real")
print(t_train[10:20])
print("Predicted")
print(reg.predict(x_train)[10:20])
print(metrics.mean_squared_error(reg.predict(x_train), t_train))
return reg


def choose_using_regression(x_test, regressor):
timings = regressor.predict(give_all_symmetries(x_test, 0))
return np.argmin(timings)


def test_regression_model(method, regressor):
test_data_filename = find_dataset_filename('test', method=method)
with open(test_data_filename, 'rb') as test_data_file:
x_test, y_test, t_test = pickle.load(test_data_file)
x_test = np.asarray([x_t for x_t, t_t in zip(x_test, t_test)
if t_t[:4] != 'Over'], dtype=float)
y_test = np.asarray([y_t for y_t, t_t in zip(y_test, t_test)
if t_t[:4] != 'Over'], dtype=float)
y_pred = [choose_using_regression(x_i, regressor) for x_i in x_test]
print("ACC", metrics.accuracy_score(y_test, y_pred))


# for ml_reg in ml_regressors:
# print(ml_reg)
# regressor = train_regression_model(ml_reg, 'balanced')
# print(ml_reg)
# test_regression_model('balanced', regressor)

0 comments on commit e181625

Please sign in to comment.