Skip to content

Commit

Permalink
Staring to unify
Browse files Browse the repository at this point in the history
Starting to create a unified way of representing/testing/training different models
Tereso del Rio committed Sep 21, 2023
1 parent b1a0475 commit 4d43ecb
Showing 39 changed files with 212 additions and 321 deletions.
4 changes: 2 additions & 2 deletions Heuristics/heuristics_guess.py
Original file line number Diff line number Diff line change
@@ -10,9 +10,9 @@
from .heuristic_tools import greedy_heuristics, expensive_heuristics, create_pseudorderings, ml_models


def choose_order_given_projections(projections, method="gmods"):
def ordering_given_projections(projections, method="gmods"):
'''Returns the order guessed by the heuristic requested'''
if method in greedy_heuristics or type(method) == int:
if method in greedy_heuristics or type(method) == int or method == 'T1':
guess = greedy_heuristic_guess(projections, heuristic=method)
return guess
elif method in expensive_heuristics:
14 changes: 8 additions & 6 deletions Heuristics/heuristics_rules.py
Original file line number Diff line number Diff line change
@@ -52,19 +52,19 @@ def choose_variables_minimizing(degrees_list, measure='gmods', var_list=''):
# elif measure == 'avegsumdeg':
# sum_degrees_overall_polys = [np.average([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
# return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
# elif measure == 'avegavegdeg':
# aveg_degrees_overall_polys = [np.average([np.average([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
# return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(aveg_degrees_overall_polys)] # var_list is filtered
elif measure == 'avegavegdeg':
aveg_degrees_overall_polys = [np.average([np.average([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(aveg_degrees_overall_polys)] # var_list is filtered

# elif measure == 'maxsumdeg':
# sum_degrees_overall_polys = [max([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
# return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
elif measure == 'sumsignsumdeg':
sum_degrees_overall_polys = [np.sum(np.sign([np.sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list])) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
# elif measure == 'sumsumdeg':
# sum_degrees_overall_polys = [sum([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
# return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
elif measure == 'sumsumdeg':
sum_degrees_overall_polys = [sum([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
# elif measure == 'avegvegsigndeg':
# sum_degrees_overall_polys = [np.average([np.average([np.sign(monomial[var]) for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
# return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
@@ -119,6 +119,8 @@ def choose_variables_minimizing(degrees_list, measure='gmods', var_list=''):
def get_order_measure(heuristic, if_tie='random'):
if heuristic == 'brown':
order_measure = ['brown1', 'brown2', 'brown3', if_tie]
elif heuristic == 'T1':
order_measure = ['gmods', 'avegavegdeg', 'sumsumdeg']
elif type(heuristic) == int:
order_measure = list(paper_all_pos[heuristic])+[if_tie]
else:
10 changes: 5 additions & 5 deletions choose_hyperparams.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
import pickle
import csv
from config.ml_models import ml_models
from config.ml_models import sklearn_models
from config.ml_models import classifiers
from config.ml_models import all_models
from config.general_values import dataset_qualities
from config.hyperparameters_grid import grid
from sklearn.model_selection import GridSearchCV
@@ -17,7 +17,7 @@ def k_folds_ml(x_train, y_train, model, random_state=0):

The hyperparameters of the models are chosen using 5-fold cross validation.
"""
current_classifier = sklearn_models[model]
current_classifier = all_models[model]
current_grid = grid[model]
rf_cv = GridSearchCV(estimator=current_classifier(),
param_grid=current_grid,
@@ -63,7 +63,7 @@ def choose_hyperparams(ml_model, method):
# with open(output_file_normal, 'w') as f_normal:
# writer_normal = csv.writer(f_normal)
# writer_normal.writerow(["Name"] + dataset_qualities)
# for ml_model in ml_models:
# for ml_model in classifiers:
# print(f"Model: {ml_model}")
# acc_balanced = dict()
# acc_normal = dict()
@@ -79,7 +79,7 @@ def choose_hyperparams(ml_model, method):
# os.path.join(os.path.dirname(__file__),
# 'config', 'hyperparams',
# f'{method}_{ml_model}'))
# current_classifier = sklearn_models[ml_model]
# current_classifier = all_models[ml_model]
# clf = current_classifier(**hyperparams)
# clf.fit(x_train, y_train)
# acc_balanced[method] = clf.score(balanced_x_test,
77 changes: 48 additions & 29 deletions config/hyperparameters_grid.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,89 @@
"""Contains the grid of hyperparameters that each model will try"""

grid = dict()
grid['RF'] = {
grid['RF-Classifier'] = {
'n_estimators': [200, 300, 400, 500],
'max_features': ['sqrt', 'log2'],
'max_depth': [4, 5, 6, 7, 8],
'criterion': ['gini', 'entropy']
}
grid['KNN'] = {
'n_neighbors': [1,3,5,7,12],
grid['KNN-Classifier'] = {
'n_neighbors': [1, 3, 5, 7, 12],
'weights': ['uniform', 'distance'],
'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
#'leaf_size': range(1, 10, 3),
#'p': range(1, 4, 1)
# 'leaf_size': range(1, 10, 3),
# 'p': range(1, 4, 1)
}
grid['MLP'] = {
'hidden_layer_sizes': [(5,5), (15,15), (20,20), (10,10,10), (20,20,20)], #[(i,i) for i in range(50, 20, 5)],# +[(i,i, i) for i in range(50, 20, 5)],
grid['MLP-Classifier'] = {
'hidden_layer_sizes': [(5, 5), (15, 15), (20, 20),
(10, 10, 10), (20, 20, 20)],
'activation': ['tanh', 'relu'],
'solver': ['sgd', 'adam'],
'learning_rate': ['constant','adaptive'],
'learning_rate': ['constant', 'adaptive'],
'alpha': [0.05, 0.005],
'max_iter': [1000]
}
grid['DT'] = {
grid['DT-Classifier'] = {
'criterion': ['gini', 'entropy'],
'splitter': ['best', 'random'],
'max_depth': [1,4,7,10,13,16,19]
'max_depth': [1, 4, 7, 10, 13, 16, 19]
}
grid['SVC'] = {
'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
grid['SVM-Classifier'] = {
'kernel': ['rbf', 'sigmoid'],
'tol': [0.0316],
'C': [5,100,200,300],
'C': [5, 100, 300],
'gamma': ['scale', 'auto']
}
grid['GB-Classifier'] = {
'n_estimators': [50, 200],
'learning_rate': [0.01, 0.1],
'max_depth': [3, 5],
'min_samples_split': [2, 4],
'min_samples_leaf': [1, 3]
}

grid['RFR'] = {
grid['RF-Regressor'] = {
'criterion': ['squared_error', 'friedman_mse'],
"max_depth": [1,3,7],
"min_samples_leaf": [1,5,10],
"max_depth": [1, 3, 7],
"min_samples_leaf": [1, 5, 10],
}
grid['KNNR'] = {
grid['KNN-Regressor'] = {
'n_neighbors': [3, 5, 10],
'weights': ['uniform', 'distance'],
'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}
grid['MLPR'] = {
grid['MLP-Regressor'] = {
'hidden_layer_sizes': [(100,), (20, 20), (10, 10, 10)],
'activation': ['logistic', 'tanh', 'relu'],
'solver': ['adam', 'sgd'],
'alpha': [0.0001, 0.001, 0.01]
}
grid['DTR'] = {
"splitter":["best","random"],
"max_depth" : [1,3,7,12],
"min_samples_leaf":[1,5,10],
grid['DT-Regressor'] = {
"splitter": ["best", "random"],
"max_depth": [1, 3, 7, 12],
"min_samples_leaf": [1, 5, 10],
# "min_weight_fraction_leaf":[0.1,0.5,0.9],
# "max_features":["auto","log2","sqrt",None],
# "max_leaf_nodes":[None,10,50,90]
}
grid['SVR'] = {
'kernel': ('linear', 'rbf','poly'),
'C':[1.5, 10],
'gamma': [1e-7, 1e-4],
'epsilon':[0.1,0.2,0.5,0.3]
grid['SVM-Regressor'] = {
'kernel': ['rbf'],
'C': [0.1, 1, 10],
'gamma': [1e-4, 1e-3, 1e-2],
'epsilon': [0.1, 0.2]
# 'kernel': ('linear', 'rbf', 'poly'),
# 'C': [1.5, 10],
# 'gamma': [1e-7, 1e-4],
# 'epsilon': [0.1, 0.2, 0.5]
}
grid['GB-Regressor'] = {
'n_estimators': [50, 200],
'learning_rate': [0.01, 0.1],
'max_depth': [3, 5],
'min_samples_split': [2, 4],
'min_samples_leaf': [1, 3]
}
grid['SGD'] = {
'loss':["squared_error", "huber", "epsilon_insensitive"],
'penalty':["l2", "l1", "elasticnet"]
'loss': ["squared_error", "huber", "epsilon_insensitive"],
'penalty': ["l2", "l1", "elasticnet"]
}
3 changes: 0 additions & 3 deletions config/hyperparams/augmented_DT.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/augmented_KNN.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions config/hyperparams/augmented_MLP.yaml

This file was deleted.

6 changes: 0 additions & 6 deletions config/hyperparams/augmented_RF.yaml

This file was deleted.

4 changes: 0 additions & 4 deletions config/hyperparams/augmented_SVC.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/bal_DT.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/bal_KNN.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions config/hyperparams/bal_MLP.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/balanced_DT.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/balanced_KNN.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions config/hyperparams/balanced_MLP.yaml

This file was deleted.

6 changes: 0 additions & 6 deletions config/hyperparams/balanced_RF.yaml

This file was deleted.

4 changes: 0 additions & 4 deletions config/hyperparams/balanced_SVC.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/basic_DT.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/basic_KNN.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions config/hyperparams/basic_MLP.yaml

This file was deleted.

5 changes: 0 additions & 5 deletions config/hyperparams/basic_RF.yaml

This file was deleted.

4 changes: 0 additions & 4 deletions config/hyperparams/basic_SVC.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/normal_DT.yaml

This file was deleted.

3 changes: 0 additions & 3 deletions config/hyperparams/normal_KNN.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions config/hyperparams/normal_MLP.yaml

This file was deleted.

6 changes: 0 additions & 6 deletions config/hyperparams/normal_RF.yaml

This file was deleted.

4 changes: 0 additions & 4 deletions config/hyperparams/normal_SVC.yaml

This file was deleted.

46 changes: 21 additions & 25 deletions config/ml_models.py
Original file line number Diff line number Diff line change
@@ -5,38 +5,34 @@
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor

ml_models = [
'KNN',
'DT',
'SVC',
'RF',
'MLP'
]

ml_regressors = [
'DTR',
'SVR',
'RFR',
'KNNR',
'MLPR'
]
classifiers = {
'DT-Classifier': DecisionTreeClassifier,
'KNN-Classifier': KNeighborsClassifier,
'RF-Classifier': RandomForestClassifier,
'SVM-Classifier': SVC,
'MLP-Classifier': MLPClassifier,
# 'GB-Classifier': GradientBoostingClassifier
}

sklearn_models = {
'DT': DecisionTreeClassifier,
'KNN': KNeighborsClassifier,
'RF': RandomForestClassifier,
'SVC': SVC,
'MLP': MLPClassifier,
'DTR': DecisionTreeRegressor,
'KNNR': KNeighborsRegressor,
'RFR': RandomForestRegressor,
'SVR': SVR,
'MLPR': MLPRegressor
regressors = {
'DT-Regressor': DecisionTreeRegressor,
'KNN-Regressor': KNeighborsRegressor,
'RF-Regressor': RandomForestRegressor,
'SVM-Regressor': SVR,
'MLP-Regressor': MLPRegressor,
# 'GB-Regressor': GradientBoostingRegressor
}

all_models = {**classifiers, **regressors}

heuristics = []#'T1', 'gmods', 'brown', 'random', 'virtual-best']
8 changes: 4 additions & 4 deletions find_filename.py
Original file line number Diff line number Diff line change
@@ -3,16 +3,16 @@
from config.general_values import purposes


def find_hyperparams_filename(method, ml_model):
def find_hyperparams_filename(model_name, paradigm, training_quality):
return os.path.join(os.path.dirname(__file__),
'config', 'hyperparams',
f'{method}_{ml_model}')
f'{model_name}-{paradigm}-{training_quality}')


def find_model_filename(method, ml_model):
def find_model_filename(model_name, paradigm, training_quality):
return os.path.join(os.path.dirname(__file__),
'config', 'models',
f'{method}_{ml_model}.txt')
f'{model_name}-{paradigm}-{training_quality}.txt')


def find_dataset_filename(purpose, method=None):
2 changes: 0 additions & 2 deletions from_poly_set_to_features.py
Original file line number Diff line number Diff line change
@@ -118,8 +118,6 @@ def get_standarized_features(names, features):
standarizing_values = pickle.load(standarizing_values_file)
# we keep only the features that are unique
standarized_features = []
# for featurex in zip(*features):
# print(type(featurex), len(features))
index = 0
for index, feature in enumerate(zip(*features)):
mean, std = standarizing_values[names[index]]
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@
# Hyperparameter tuning take a very long time,
# if tune_hyperparameters is used to decide whether to tune them
# or to used previously tuned
tune_hyperparameters = False
tune_hyperparameters = True
train_the_models = True
paradigm = 'classification'

178 changes: 78 additions & 100 deletions main_heuristics.py
Original file line number Diff line number Diff line change
@@ -4,117 +4,95 @@
import random
# import numpy as np
from Heuristics.heuristics_guess import not_greedy_heuristic_guess
from Heuristics.heuristics_guess import choose_order_given_projections
from Heuristics.heuristics_guess import ordering_given_projections
from find_filename import find_dataset_filename
from test_models import compute_metrics
from config.ml_models import heuristics

random.seed(0)

nvar = 3
testing_method = 'Normal'
test_dataset_filename = find_dataset_filename('Test',
testing_method)
with open(test_dataset_filename, 'rb') as test_dataset_file:
testing_dataset = pickle.load(test_dataset_file)
output_file = "heuristics_output_acc_time.csv"


# TESTING GMODS IN AUUGMENTED : Features 2, 67 and 132
def choose_gmods(features):
a = []
# # print(features)
# a.append(features[2])
# a.append(features[67])
# a.append(features[132])
if a[0]==min(a):
if a[1]<=a[2]:
return 0
else:
return 1
elif a[1]==min(a):
if a[0]<=a[2]:
return 2
else:
return 3
elif a[2]==min(a):
if a[0]<=a[1]:
return 4
else:
return 5
# # TESTING GMODS IN AUUGMENTED : Features 2, 67 and 132
# def choose_gmods(features):
# a = []
# # # print(features)
# # a.append(features[2])
# # a.append(features[67])
# # a.append(features[132])
# if a[0] == min(a):
# if a[1] <= a[2]:
# return 0
# else:
# return 1
# elif a[1] == min(a):
# if a[0] <= a[2]:
# return 2
# else:
# return 3
# elif a[2]==min(a):
# if a[0]<=a[1]:
# return 4
# else:
# return 5


# Testing in heuristics that make all the choice at once
first_heuristic = 1
for heuristic in ['T1', 'gmods', 'brown', 'random', 'virtual-best']:
# for heuristic in ['gmods', 'virtual best']:
reps = 100
sum_metrics = dict()
for i in range(reps):
if heuristic == 'virtual-best':
# chosen_indices = [np.argmin(timings) for timings in testing_dataset['timings']]
chosen_indices = testing_dataset['labels']
elif heuristic == 'random':
chosen_indices = [random.randint(0, 5) for timings in testing_dataset['timings']]
else:
chosen_indices = [not_greedy_heuristic_guess(projection[0][0], heuristic)
def ordering_choices_heuristics(heuristic, testing_dataset, greedy=False):
if heuristic == 'virtual-best':
chosen_indices = testing_dataset['labels']
elif heuristic == 'random':
chosen_indices = [random.randint(0, len(timings)-1)
for timings in testing_dataset['timings']]
else:
if greedy:
chosen_indices = [ordering_given_projections(projection, heuristic)
for projection in testing_dataset['projections']]
# chosen_indices = [choose_gmods(features)
# for features in testing_dataset['features']]
metrics = compute_metrics(chosen_indices,
testing_dataset['labels'],
testing_dataset['timings'],
testing_dataset['cells'],
heuristic)
if len(sum_metrics) == 0:
sum_metrics = metrics
else:
sum_metrics = {key: metrics[key] + sum_metrics[key] for key in metrics}
aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
augmented_metrics = {key: aveg_metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*aveg_metrics[key] for key in sum_metrics}
chosen_indices = [not_greedy_heuristic_guess(projection[0][0],
heuristic)
for projection in testing_dataset['projections']]
return chosen_indices

print(heuristic, augmented_metrics)
if first_heuristic == 1:
first_heuristic = 0
keys = list(augmented_metrics.keys())
with open(output_file, 'a') as f:
f.write('Choosing the whole ordering in the beggining \n')
f.write(', '.join(['Model'] + keys) + '\n')
with open(output_file, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow([heuristic] + [augmented_metrics[key] for key in keys])

# # Testing on greedy heuristics
# for heuristic in ['brown', 'gmods', 'random', 'virtual best']:
# reps = 100
# sum_metrics = dict()
# for i in range(reps):
# if heuristic == 'virtual best':
# chosen_indices = [np.argmin(timings) for timings in testing_dataset['timings']]
# elif heuristic == 'random':
# chosen_indices = [random.randint(0, 5) for timings in testing_dataset['timings']]
# else:
# chosen_indices = [choose_order_given_projections(projection, heuristic)
# for projection in testing_dataset['projections']]
# metrics = compute_metrics(chosen_indices,
# testing_dataset['labels'],
# testing_dataset['timings'],
# testing_dataset['cells'],
# heuristic)
# if len(sum_metrics) == 0:
# sum_metrics = metrics
# else:
# sum_metrics = {key: metrics[key] + sum_metrics[key] for key in metrics}
# aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
# augmented_metrics = {key: aveg_metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*aveg_metrics[key] for key in sum_metrics}

# print(heuristic, augmented_metrics)
# if first_heuristic == 1:
# first_heuristic = 0
# keys = list(augmented_metrics.keys())
# with open(output_file, 'a') as f:
# f.write('Now choosing greedily \n')
# f.write(', '.join(['Model'] + keys) + '\n')
# with open(output_file, 'a', newline='') as f:
# writer = csv.writer(f)
# writer.writerow([heuristic] + [augmented_metrics[key] for key in keys])
# # print(sum(min(timings) for timings in testing_dataset['timings']))
if __name__ == "__main__":
test_dataset_filename = find_dataset_filename('Test',
testing_method)
with open(test_dataset_filename, 'rb') as test_dataset_file:
testing_dataset = pickle.load(test_dataset_file)
output_file = "heuristics_output_acc_time.csv"

# Testing in heuristics that make all the choice at once
first_heuristic = 1
for greedy in [True, False]:
for heuristic in heuristics:
# for heuristic in ['gmods', 'virtual best']:
reps = 100
for i in range(reps):
chosen_indices = ordering_choices_heuristics(heuristic,
testing_dataset,
greedy=greedy)
metrics = compute_metrics(chosen_indices,
testing_dataset)
if i == 0:
sum_metrics = metrics
else:
sum_metrics = {key: metrics[key] + sum_metrics[key]
for key in metrics}
aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
augmented_metrics = {key: aveg_metrics[key]
if key in ['Accuracy', 'Markup']
else math.factorial(nvar)*aveg_metrics[key]
for key in sum_metrics}

print('not-'*(not greedy) + 'greedy-' + heuristic,
augmented_metrics)
if first_heuristic == 1:
first_heuristic = 0
keys = list(augmented_metrics.keys())
with open(output_file, 'a') as f:
f.write(', '.join(['Model'] + keys) + '\n')
with open(output_file, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow(['not-'*(not greedy) + 'greedy-' + heuristic]
+ [augmented_metrics[key] for key in keys])
2 changes: 1 addition & 1 deletion main_regression.py
Original file line number Diff line number Diff line change
@@ -57,7 +57,7 @@
# C:\Software\Python37\Lib\site-packages\sklearn\neighbors\_regression.py
print(f"Testing models trained in {ml_model}")
metrics = test_model(ml_model, paradigm=paradigm,
testing_method=testing_method)
testing_method=testing_method)
if first_time == 1:
first_time = 0
keys = list(metrics.keys())
2 changes: 1 addition & 1 deletion make_plots.py
Original file line number Diff line number Diff line change
@@ -96,4 +96,4 @@ def create_adversarial_plot(
plt.cla()


create_adversarial_plot()
# create_adversarial_plot()
1 change: 0 additions & 1 deletion packages/dataset_manipulation/dataset_manipulation.py
Original file line number Diff line number Diff line change
@@ -28,7 +28,6 @@ def augmentate_instance(features, timings, cells, nvar):
return augmented_features, augmented_timings, augmented_cells



def augmentate_dataset(all_features, all_timings, all_cells, nvar):
"""
Multiply the size of the dataset by math.factorial(nvar).
2 changes: 0 additions & 2 deletions replicating_Dorians_features.py
Original file line number Diff line number Diff line change
@@ -57,8 +57,6 @@ def extract_features(dataset):
all_original_polynomials = []
all_projections = []
all_cells = []
for index, elem in enumerate(dataset):
print(index, elem[0])
for index, projections in enumerate(dataset[0]):
all_projections.append(projections)
original_polynomials = projections[0][0]
56 changes: 32 additions & 24 deletions test_models.py
Original file line number Diff line number Diff line change
@@ -5,8 +5,10 @@
import numpy as np
from sklearn import metrics
from config.general_values import dataset_qualities
from config.ml_models import ml_models
from config.ml_models import ml_regressors
from config.ml_models import all_models
from config.ml_models import regressors
from config.ml_models import classifiers
from config.ml_models import heuristics
from find_filename import find_output_filename
from find_filename import find_dataset_filename
from find_filename import find_model_filename
@@ -34,7 +36,7 @@ def test_results(training_method):
with open(output_filename, 'w') as output_file:
writer_balanced = csv.writer(output_file)
writer_balanced.writerow(["Name"] + dataset_qualities)
for ml_model in ml_models:
for ml_model in all_models:
trained_model_filename = find_model_filename(training_method,
ml_model)
accuracy = dict()
@@ -97,35 +99,44 @@ def test_regressor(ml_model):


def test_model(ml_model, paradigm, testing_method='augmented'):
trained_model_filename = find_model_filename(paradigm,
ml_model)
# print(trained_model_filename, paradigm, ml_model)
test_dataset_filename = find_dataset_filename('Test',
testing_method)
with open(trained_model_filename, 'rb') as trained_model_file:
model = pickle.load(trained_model_file)
with open(test_dataset_filename, 'rb') as test_dataset_file:
testing_dataset = pickle.load(test_dataset_file)
if ml_model in ml_regressors and paradigm == 'regression':
chosen_indices = [return_regressor_choice(model, features)
for features in testing_dataset['features']]
elif ml_model in ml_models:
# print('testing_method', testing_method)
chosen_indices = [model.predict([features])[0]
for features in testing_dataset['features']]
elif paradigm == 'reinforcement' and testing_method == 'Normal':
chosen_indices = [ordering_choice_reinforcement(model, projections)
for projections in testing_dataset['projections']]
# print(chosen_indices)
# print("here2")
trained_model_filename = find_model_filename(paradigm,
ml_model)
with open(trained_model_filename, 'rb') as trained_model_file:
model = pickle.load(trained_model_file)
chosen_indices = choose_indices(model, testing_dataset)
return compute_metrics(chosen_indices,
testing_dataset['labels'],
testing_dataset['timings'],
testing_dataset['cells'],
ml_model)


def compute_metrics(chosen_indices, labels, all_timings, all_cells, model):
def choose_indices(ml_model, dataset, paradigm=''):
trained_model_filename = find_model_filename(paradigm, ml_model)
with open(trained_model_filename, 'rb') as trained_model_file:
model = pickle.load(trained_model_file)
if ml_model in regressors:
chosen_indices = [return_regressor_choice(model, features)
for features in dataset['features']]
elif ml_model in classifiers:
chosen_indices = [model.predict([features])[0]
for features in dataset['features']]
elif paradigm == 'reinforcement':
chosen_indices = [ordering_choice_reinforcement(model, projections)
for projections in dataset['projections']]
elif ml_model in heuristics:
ordering_choices_heuristics(model, dataset)
return chosen_indices


def compute_metrics(chosen_indices, testing_dataset):
labels = testing_dataset['labels']
all_timings = testing_dataset['timings']
all_cells = testing_dataset['cells']
metrics = dict()
correct = 0
metrics['TotalTime'] = 0
@@ -143,9 +154,6 @@ def compute_metrics(chosen_indices, labels, all_timings, all_cells, model):
metrics['TotalCells'] += cells[chosen_index]
chosen_times = [timings[index] for index, timings
in zip(chosen_indices, all_timings)]
timings_lists_filename = find_timings_lists(model)
with open(timings_lists_filename, 'wb') as timings_lists_file:
pickle.dump(chosen_times, timings_lists_file)
metrics['TotalTime'] = sum(chosen_times)
total_instances = len(chosen_indices)
metrics['Accuracy'] = correct/total_instances
3 changes: 0 additions & 3 deletions test_train_datasets.py
Original file line number Diff line number Diff line change
@@ -116,11 +116,9 @@ def create_regression_datasets(taking_logarithms=True):
# we will use the augmented dataset here
with open(this_dataset_filename, 'rb') as this_dataset_file:
regression_dataset = pickle.load(this_dataset_file)
# print("regression_dataset['timings']", len(regression_dataset['timings']), regression_dataset['timings'])
regression_dataset['labels'] = \
[timings[0] for timings
in regression_dataset['timings']]
# print("regression_dataset['labels']", len(regression_dataset['labels']), regression_dataset['labels'])
if taking_logarithms:
regression_dataset['labels'] = \
[log(label) for label
@@ -133,7 +131,6 @@ def create_regression_datasets(taking_logarithms=True):
# classification_dataset['labels'] = \
# [np.argmin(timings) for timings
# in regression_dataset['timings']]
# print(classification_dataset['labels'])


# create_regression_datasets(taking_logarithms=False)
17 changes: 11 additions & 6 deletions train_models.py
Original file line number Diff line number Diff line change
@@ -2,15 +2,14 @@
import pickle
import random
from yaml_tools import read_yaml_from_file
from config.ml_models import sklearn_models
from config.ml_models import ml_regressors
from config.ml_models import all_models
from find_filename import find_dataset_filename
from find_filename import find_hyperparams_filename
from find_filename import find_model_filename
from find_filename import find_other_filename
from dataset_manipulation import give_all_symmetries
import numpy as np
from sklearn import metrics
# from sklearn import metrics
from itertools import combinations
from replicating_Dorians_features import compute_features_for_var
from test_models import compute_metrics
@@ -22,13 +21,16 @@ def train_model(ml_model, method):
with open(train_data_filename, 'rb') as train_data_file:
train_dataset = pickle.load(train_data_file)
hyperparams = read_yaml_from_file(hyperparams_file)
current_model = sklearn_models[ml_model]
current_model = all_models[ml_model]
model = current_model(**hyperparams)
# model = current_model()
print('here')
model.fit(train_dataset['features'], train_dataset['labels'])
trained_model_filename = find_model_filename(method, ml_model)
print('here2')
with open(trained_model_filename, 'wb') as trained_model_file:
pickle.dump(model, trained_model_file)
return model


def train_regression_model(ml_model, method):
@@ -75,7 +77,7 @@ def train_reinforcement_model(ml_model, method='Normal'):
train_dataset = pickle.load(train_data_file)
# hyperparams_file = find_hyperparams_filename(method, ml_model)
# hyperparams = read_yaml_from_file(hyperparams_file)
current_model = sklearn_models[ml_model]
current_model = all_models[ml_model]
# model = current_model(**hyperparams)
model = current_model()
first_polys = train_dataset['projections'][0][0][0]
@@ -148,7 +150,10 @@ def var_choice_reinforcement(model, polynomials):
chosen by the model trained using reinforcement'''
vars_features = get_vars_features(polynomials)
evaluations = model.predict(vars_features)
return np.argmin(evaluations)
min_value = np.min(evaluations)
min_indices = np.where(evaluations == min_value)[0]
# Randomly select one of the minimal indices
return np.random.choice(min_indices)


def ordering_choice_reinforcement(model, projections):

0 comments on commit 4d43ecb

Please sign in to comment.