Staring to unify

Starting to create a unified way of representing/testing/training different models
delriot · Sep 21, 2023 · 4d43ecb · 4d43ecb
1 parent b1a0475
commit 4d43ecb
Showing 39 changed files with 212 additions and 321 deletions.
diff --git a/Heuristics/heuristics_guess.py b/Heuristics/heuristics_guess.py
@@ -10,9 +10,9 @@
 from .heuristic_tools import greedy_heuristics, expensive_heuristics, create_pseudorderings, ml_models
 
 
-def choose_order_given_projections(projections, method="gmods"):
+def ordering_given_projections(projections, method="gmods"):
     '''Returns the order guessed by the heuristic requested'''
-    if method in greedy_heuristics or type(method) == int:
+    if method in greedy_heuristics or type(method) == int or method == 'T1':
         guess = greedy_heuristic_guess(projections, heuristic=method)
         return guess
     elif method in expensive_heuristics:

diff --git a/Heuristics/heuristics_rules.py b/Heuristics/heuristics_rules.py
@@ -52,19 +52,19 @@ def choose_variables_minimizing(degrees_list, measure='gmods', var_list=''):
     # elif measure == 'avegsumdeg':
     #     sum_degrees_overall_polys = [np.average([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
     #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
-    # elif measure == 'avegavegdeg':
-    #     aveg_degrees_overall_polys = [np.average([np.average([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
-    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(aveg_degrees_overall_polys)] # var_list is filtered
+    elif measure == 'avegavegdeg':
+        aveg_degrees_overall_polys = [np.average([np.average([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(aveg_degrees_overall_polys)] # var_list is filtered
 
     # elif measure == 'maxsumdeg':
     #     sum_degrees_overall_polys = [max([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
     #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
     elif measure == 'sumsignsumdeg':
         sum_degrees_overall_polys = [np.sum(np.sign([np.sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list])) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
         return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
-    # elif measure == 'sumsumdeg':
-    #     sum_degrees_overall_polys = [sum([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
-    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    elif measure == 'sumsumdeg':
+        sum_degrees_overall_polys = [sum([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
     # elif measure == 'avegvegsigndeg':
     #     sum_degrees_overall_polys = [np.average([np.average([np.sign(monomial[var]) for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
     #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
@@ -119,6 +119,8 @@ def choose_variables_minimizing(degrees_list, measure='gmods', var_list=''):
 def get_order_measure(heuristic, if_tie='random'):
     if heuristic == 'brown':
         order_measure = ['brown1', 'brown2', 'brown3', if_tie]
+    elif heuristic == 'T1':
+        order_measure = ['gmods', 'avegavegdeg', 'sumsumdeg']
     elif type(heuristic) == int:
         order_measure = list(paper_all_pos[heuristic])+[if_tie]
     else:

diff --git a/choose_hyperparams.py b/choose_hyperparams.py
@@ -1,8 +1,8 @@
 import os
 import pickle
 import csv
-from config.ml_models import ml_models
-from config.ml_models import sklearn_models
+from config.ml_models import classifiers
+from config.ml_models import all_models
 from config.general_values import dataset_qualities
 from config.hyperparameters_grid import grid
 from sklearn.model_selection import GridSearchCV
@@ -17,7 +17,7 @@ def k_folds_ml(x_train, y_train, model, random_state=0):
 
     The hyperparameters of the models are chosen using 5-fold cross validation.
     """
-    current_classifier = sklearn_models[model]
+    current_classifier = all_models[model]
     current_grid = grid[model]
     rf_cv = GridSearchCV(estimator=current_classifier(),
                          param_grid=current_grid,
@@ -63,7 +63,7 @@ def choose_hyperparams(ml_model, method):
 #     with open(output_file_normal, 'w') as f_normal:
 #         writer_normal = csv.writer(f_normal)
 #         writer_normal.writerow(["Name"] + dataset_qualities)
-#         for ml_model in ml_models:
+#         for ml_model in classifiers:
 #             print(f"Model: {ml_model}")
 #             acc_balanced = dict()
 #             acc_normal = dict()
@@ -79,7 +79,7 @@ def choose_hyperparams(ml_model, method):
 #                                    os.path.join(os.path.dirname(__file__),
 #                                                 'config', 'hyperparams',
 #                                                 f'{method}_{ml_model}'))
-#                 current_classifier = sklearn_models[ml_model]
+#                 current_classifier = all_models[ml_model]
 #                 clf = current_classifier(**hyperparams)
 #                 clf.fit(x_train, y_train)
 #                 acc_balanced[method] = clf.score(balanced_x_test,

diff --git a/config/hyperparameters_grid.py b/config/hyperparameters_grid.py
@@ -1,70 +1,89 @@
 """Contains the grid of hyperparameters that each model will try"""
 
 grid = dict()
-grid['RF'] = {
+grid['RF-Classifier'] = {
     'n_estimators': [200, 300, 400, 500],
     'max_features': ['sqrt', 'log2'],
     'max_depth': [4, 5, 6, 7, 8],
     'criterion': ['gini', 'entropy']
 }
-grid['KNN'] = {
-    'n_neighbors': [1,3,5,7,12],
+grid['KNN-Classifier'] = {
+    'n_neighbors': [1, 3, 5, 7, 12],
     'weights': ['uniform', 'distance'],
     'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
-    #'leaf_size': range(1, 10, 3),
-    #'p': range(1, 4, 1)
+    # 'leaf_size': range(1, 10, 3),
+    # 'p': range(1, 4, 1)
 }
-grid['MLP'] = {
-    'hidden_layer_sizes': [(5,5), (15,15), (20,20), (10,10,10), (20,20,20)], #[(i,i) for i in range(50, 20, 5)],# +[(i,i, i) for i in range(50, 20, 5)],
+grid['MLP-Classifier'] = {
+    'hidden_layer_sizes': [(5, 5), (15, 15), (20, 20),
+                           (10, 10, 10), (20, 20, 20)],
     'activation': ['tanh', 'relu'],
     'solver': ['sgd', 'adam'],
-    'learning_rate': ['constant','adaptive'],
+    'learning_rate': ['constant', 'adaptive'],
     'alpha': [0.05, 0.005],
     'max_iter': [1000]
 }
-grid['DT'] = {
+grid['DT-Classifier'] = {
     'criterion': ['gini', 'entropy'],
     'splitter': ['best', 'random'],
-    'max_depth': [1,4,7,10,13,16,19]
+    'max_depth': [1, 4, 7, 10, 13, 16, 19]
 }
-grid['SVC'] = {
-    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
+grid['SVM-Classifier'] = {
+    'kernel': ['rbf', 'sigmoid'],
     'tol': [0.0316],
-    'C': [5,100,200,300],
+    'C': [5, 100, 300],
     'gamma': ['scale', 'auto']
 }
+grid['GB-Classifier'] = {
+    'n_estimators': [50, 200],
+    'learning_rate': [0.01, 0.1],
+    'max_depth': [3, 5],
+    'min_samples_split': [2, 4],
+    'min_samples_leaf': [1, 3]
+}
 
-grid['RFR'] = {
+grid['RF-Regressor'] = {
     'criterion': ['squared_error', 'friedman_mse'],
-    "max_depth": [1,3,7],
-    "min_samples_leaf": [1,5,10],
+    "max_depth": [1, 3, 7],
+    "min_samples_leaf": [1, 5, 10],
 }
-grid['KNNR'] = {
+grid['KNN-Regressor'] = {
     'n_neighbors': [3, 5, 10],
     'weights': ['uniform', 'distance'],
     'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
 }
-grid['MLPR'] = {
+grid['MLP-Regressor'] = {
     'hidden_layer_sizes': [(100,), (20, 20), (10, 10, 10)],
     'activation': ['logistic', 'tanh', 'relu'],
     'solver': ['adam', 'sgd'],
     'alpha': [0.0001, 0.001, 0.01]
 }
-grid['DTR'] = {
-    "splitter":["best","random"],
-    "max_depth" : [1,3,7,12],
-    "min_samples_leaf":[1,5,10],
+grid['DT-Regressor'] = {
+    "splitter": ["best", "random"],
+    "max_depth": [1, 3, 7, 12],
+    "min_samples_leaf": [1, 5, 10],
     # "min_weight_fraction_leaf":[0.1,0.5,0.9],
     # "max_features":["auto","log2","sqrt",None],
     # "max_leaf_nodes":[None,10,50,90]
 }
-grid['SVR'] = {
-    'kernel': ('linear', 'rbf','poly'),
-    'C':[1.5, 10],
-    'gamma': [1e-7, 1e-4],
-    'epsilon':[0.1,0.2,0.5,0.3]
+grid['SVM-Regressor'] = {
+    'kernel': ['rbf'],
+    'C': [0.1, 1, 10],
+    'gamma': [1e-4, 1e-3, 1e-2],
+    'epsilon': [0.1, 0.2]
+#     'kernel': ('linear', 'rbf', 'poly'),
+#     'C': [1.5, 10],
+#     'gamma': [1e-7, 1e-4],
+#     'epsilon': [0.1, 0.2, 0.5]
+}
+grid['GB-Regressor'] = {
+    'n_estimators': [50, 200],
+    'learning_rate': [0.01, 0.1],
+    'max_depth': [3, 5],
+    'min_samples_split': [2, 4],
+    'min_samples_leaf': [1, 3]
 }
 grid['SGD'] = {
-    'loss':["squared_error", "huber", "epsilon_insensitive"],
-    'penalty':["l2", "l1", "elasticnet"]
+    'loss': ["squared_error", "huber", "epsilon_insensitive"],
+    'penalty': ["l2", "l1", "elasticnet"]
 }
diff --git a/config/hyperparams/augmented_DT.yaml b/config/hyperparams/augmented_DT.yaml
diff --git a/config/hyperparams/augmented_KNN.yaml b/config/hyperparams/augmented_KNN.yaml
diff --git a/config/hyperparams/augmented_MLP.yaml b/config/hyperparams/augmented_MLP.yaml
diff --git a/config/hyperparams/augmented_RF.yaml b/config/hyperparams/augmented_RF.yaml
diff --git a/config/hyperparams/augmented_SVC.yaml b/config/hyperparams/augmented_SVC.yaml
diff --git a/config/hyperparams/bal_DT.yaml b/config/hyperparams/bal_DT.yaml
diff --git a/config/hyperparams/bal_KNN.yaml b/config/hyperparams/bal_KNN.yaml
diff --git a/config/hyperparams/bal_MLP.yaml b/config/hyperparams/bal_MLP.yaml
diff --git a/config/hyperparams/balanced_DT.yaml b/config/hyperparams/balanced_DT.yaml
diff --git a/config/hyperparams/balanced_KNN.yaml b/config/hyperparams/balanced_KNN.yaml
diff --git a/config/hyperparams/balanced_MLP.yaml b/config/hyperparams/balanced_MLP.yaml
diff --git a/config/hyperparams/balanced_RF.yaml b/config/hyperparams/balanced_RF.yaml
diff --git a/config/hyperparams/balanced_SVC.yaml b/config/hyperparams/balanced_SVC.yaml
diff --git a/config/hyperparams/basic_DT.yaml b/config/hyperparams/basic_DT.yaml
diff --git a/config/hyperparams/basic_KNN.yaml b/config/hyperparams/basic_KNN.yaml
diff --git a/config/hyperparams/basic_MLP.yaml b/config/hyperparams/basic_MLP.yaml
diff --git a/config/hyperparams/basic_RF.yaml b/config/hyperparams/basic_RF.yaml
diff --git a/config/hyperparams/basic_SVC.yaml b/config/hyperparams/basic_SVC.yaml
diff --git a/config/hyperparams/normal_DT.yaml b/config/hyperparams/normal_DT.yaml
diff --git a/config/hyperparams/normal_KNN.yaml b/config/hyperparams/normal_KNN.yaml
diff --git a/config/hyperparams/normal_MLP.yaml b/config/hyperparams/normal_MLP.yaml
diff --git a/config/hyperparams/normal_RF.yaml b/config/hyperparams/normal_RF.yaml
diff --git a/config/hyperparams/normal_SVC.yaml b/config/hyperparams/normal_SVC.yaml
diff --git a/config/ml_models.py b/config/ml_models.py
@@ -5,38 +5,34 @@
 from sklearn.neural_network import MLPClassifier
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import GradientBoostingClassifier
 
 from sklearn.svm import SVR
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.neural_network import MLPRegressor
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.neighbors import KNeighborsRegressor
+from sklearn.ensemble import GradientBoostingRegressor
 
-ml_models = [
-             'KNN',
-             'DT',
-             'SVC',
-             'RF',
-             'MLP'
-             ]
 
-ml_regressors = [
-                 'DTR',
-                 'SVR',
-                 'RFR',
-                 'KNNR',
-                 'MLPR'
-                 ]
+classifiers = {
+    'DT-Classifier': DecisionTreeClassifier,
+    'KNN-Classifier': KNeighborsClassifier,
+    'RF-Classifier': RandomForestClassifier,
+    'SVM-Classifier': SVC,
+    'MLP-Classifier': MLPClassifier,
+    # 'GB-Classifier': GradientBoostingClassifier
+}
 
-sklearn_models = {
-    'DT': DecisionTreeClassifier,
-    'KNN': KNeighborsClassifier,
-    'RF': RandomForestClassifier,
-    'SVC': SVC,
-    'MLP': MLPClassifier,
-    'DTR': DecisionTreeRegressor,
-    'KNNR': KNeighborsRegressor,
-    'RFR': RandomForestRegressor,
-    'SVR': SVR,
-    'MLPR': MLPRegressor
+regressors = {
+    'DT-Regressor': DecisionTreeRegressor,
+    'KNN-Regressor': KNeighborsRegressor,
+    'RF-Regressor': RandomForestRegressor,
+    'SVM-Regressor': SVR,
+    'MLP-Regressor': MLPRegressor,
+    # 'GB-Regressor': GradientBoostingRegressor
 }
+
+all_models = {**classifiers, **regressors}
+
+heuristics = []#'T1', 'gmods', 'brown', 'random', 'virtual-best']
diff --git a/find_filename.py b/find_filename.py
@@ -3,16 +3,16 @@
 from config.general_values import purposes
 
 
-def find_hyperparams_filename(method, ml_model):
+def find_hyperparams_filename(model_name, paradigm, training_quality):
     return os.path.join(os.path.dirname(__file__),
                         'config', 'hyperparams',
-                        f'{method}_{ml_model}')
+                        f'{model_name}-{paradigm}-{training_quality}')
 
 
-def find_model_filename(method, ml_model):
+def find_model_filename(model_name, paradigm, training_quality):
     return os.path.join(os.path.dirname(__file__),
                         'config', 'models',
-                        f'{method}_{ml_model}.txt')
+                        f'{model_name}-{paradigm}-{training_quality}.txt')
 
 
 def find_dataset_filename(purpose, method=None):

diff --git a/from_poly_set_to_features.py b/from_poly_set_to_features.py
@@ -118,8 +118,6 @@ def get_standarized_features(names, features):
         standarizing_values = pickle.load(standarizing_values_file)
     # we keep only the features that are unique
     standarized_features = []
-    # for featurex in zip(*features):
-    #     print(type(featurex), len(features))
     index = 0
     for index, feature in enumerate(zip(*features)):
         mean, std = standarizing_values[names[index]]

diff --git a/main.py b/main.py
@@ -30,7 +30,7 @@
 # Hyperparameter tuning take a very long time,
 # if tune_hyperparameters is used to decide whether to tune them
 # or to used previously tuned
-tune_hyperparameters = False
+tune_hyperparameters = True
 train_the_models = True
 paradigm = 'classification'
 

diff --git a/main_heuristics.py b/main_heuristics.py
@@ -4,117 +4,95 @@
 import random
 # import numpy as np
 from Heuristics.heuristics_guess import not_greedy_heuristic_guess
-from Heuristics.heuristics_guess import choose_order_given_projections
+from Heuristics.heuristics_guess import ordering_given_projections
 from find_filename import find_dataset_filename
 from test_models import compute_metrics
+from config.ml_models import heuristics
 
 random.seed(0)
 
 nvar = 3
 testing_method = 'Normal'
-test_dataset_filename = find_dataset_filename('Test',
-                                              testing_method)
-with open(test_dataset_filename, 'rb') as test_dataset_file:
-    testing_dataset = pickle.load(test_dataset_file)
-output_file = "heuristics_output_acc_time.csv"
 
-
-# TESTING GMODS IN AUUGMENTED : Features 2, 67 and 132
-def choose_gmods(features):
-    a = []
-    # # print(features)
-    # a.append(features[2])
-    # a.append(features[67])
-    # a.append(features[132])
-    if a[0]==min(a):
-        if a[1]<=a[2]:
-            return 0
-        else:
-            return 1
-    elif a[1]==min(a):
-        if a[0]<=a[2]:
-            return 2
-        else:
-            return 3
-    elif a[2]==min(a):
-        if a[0]<=a[1]:
-            return 4
-        else:
-            return 5
+# # TESTING GMODS IN AUUGMENTED : Features 2, 67 and 132
+# def choose_gmods(features):
+#     a = []
+#     # # print(features)
+#     # a.append(features[2])
+#     # a.append(features[67])
+#     # a.append(features[132])
+#     if a[0] == min(a):
+#         if a[1] <= a[2]:
+#             return 0
+#         else:
+#             return 1
+#     elif a[1] == min(a):
+#         if a[0] <= a[2]:
+#             return 2
+#         else:
+#             return 3
+#     elif a[2]==min(a):
+#         if a[0]<=a[1]:
+#             return 4
+#         else:
+#             return 5
 
 
-# Testing in heuristics that make all the choice at once
-first_heuristic = 1
-for heuristic in ['T1', 'gmods', 'brown', 'random', 'virtual-best']:
-# for heuristic in ['gmods', 'virtual best']:
-    reps = 100
-    sum_metrics = dict()
-    for i in range(reps):
-        if heuristic == 'virtual-best':
-            # chosen_indices = [np.argmin(timings) for timings in testing_dataset['timings']]
-            chosen_indices = testing_dataset['labels']
-        elif heuristic == 'random':
-            chosen_indices = [random.randint(0, 5) for timings in testing_dataset['timings']]
-        else:
-            chosen_indices = [not_greedy_heuristic_guess(projection[0][0], heuristic)
+def ordering_choices_heuristics(heuristic, testing_dataset, greedy=False):
+    if heuristic == 'virtual-best':
+        chosen_indices = testing_dataset['labels']
+    elif heuristic == 'random':
+        chosen_indices = [random.randint(0, len(timings)-1)
+                          for timings in testing_dataset['timings']]
+    else:
+        if greedy:
+            chosen_indices = [ordering_given_projections(projection, heuristic)
                               for projection in testing_dataset['projections']]
-            # chosen_indices = [choose_gmods(features)
-            #                   for features in testing_dataset['features']]
-        metrics = compute_metrics(chosen_indices,
-                                  testing_dataset['labels'],
-                                  testing_dataset['timings'],
-                                  testing_dataset['cells'],
-                                  heuristic)
-        if len(sum_metrics) == 0:
-            sum_metrics = metrics
         else:
-            sum_metrics = {key: metrics[key] + sum_metrics[key] for key in metrics}
-    aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
-    augmented_metrics = {key: aveg_metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*aveg_metrics[key] for key in sum_metrics}
+            chosen_indices = [not_greedy_heuristic_guess(projection[0][0],
+                                                         heuristic)
+                              for projection in testing_dataset['projections']]
+    return chosen_indices
 
-    print(heuristic, augmented_metrics)
-    if first_heuristic == 1:
-        first_heuristic = 0
-        keys = list(augmented_metrics.keys())
-        with open(output_file, 'a') as f:
-            f.write('Choosing the whole ordering in the beggining \n')
-            f.write(', '.join(['Model'] + keys) + '\n')
-    with open(output_file, 'a', newline='') as f:
-        writer = csv.writer(f)
-        writer.writerow([heuristic] + [augmented_metrics[key] for key in keys])
 
-# # Testing on greedy heuristics
-# for heuristic in ['brown', 'gmods', 'random', 'virtual best']:
-#     reps = 100
-#     sum_metrics = dict()
-#     for i in range(reps):
-#         if heuristic == 'virtual best':
-#             chosen_indices = [np.argmin(timings) for timings in testing_dataset['timings']]
-#         elif heuristic == 'random':
-#             chosen_indices = [random.randint(0, 5) for timings in testing_dataset['timings']]
-#         else:
-#             chosen_indices = [choose_order_given_projections(projection, heuristic)
-#                               for projection in testing_dataset['projections']]
-#         metrics = compute_metrics(chosen_indices,
-#                                   testing_dataset['labels'],
-#                                   testing_dataset['timings'],
-#                                   testing_dataset['cells'],
-#                                   heuristic)
-#         if len(sum_metrics) == 0:
-#             sum_metrics = metrics
-#         else:
-#             sum_metrics = {key: metrics[key] + sum_metrics[key] for key in metrics}
-#     aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
-#     augmented_metrics = {key: aveg_metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*aveg_metrics[key] for key in sum_metrics}
-
-#     print(heuristic, augmented_metrics)
-#     if first_heuristic == 1:
-#         first_heuristic = 0
-#         keys = list(augmented_metrics.keys())
-#         with open(output_file, 'a') as f:
-#             f.write('Now choosing greedily \n')
-#             f.write(', '.join(['Model'] + keys) + '\n')
-#     with open(output_file, 'a', newline='') as f:
-#         writer = csv.writer(f)
-#         writer.writerow([heuristic] + [augmented_metrics[key] for key in keys])
-# # print(sum(min(timings) for timings in testing_dataset['timings']))
+if __name__ == "__main__":
+    test_dataset_filename = find_dataset_filename('Test',
+                                                testing_method)
+    with open(test_dataset_filename, 'rb') as test_dataset_file:
+        testing_dataset = pickle.load(test_dataset_file)
+    output_file = "heuristics_output_acc_time.csv"
+
+    # Testing in heuristics that make all the choice at once
+    first_heuristic = 1
+    for greedy in [True, False]:
+        for heuristic in heuristics:
+        # for heuristic in ['gmods', 'virtual best']:
+            reps = 100
+            for i in range(reps):
+                chosen_indices = ordering_choices_heuristics(heuristic,
+                                                            testing_dataset,
+                                                            greedy=greedy)
+                metrics = compute_metrics(chosen_indices,
+                                          testing_dataset)
+                if i == 0:
+                    sum_metrics = metrics
+                else:
+                    sum_metrics = {key: metrics[key] + sum_metrics[key]
+                                   for key in metrics}
+            aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
+            augmented_metrics = {key: aveg_metrics[key]
+                                 if key in ['Accuracy', 'Markup']
+                                 else math.factorial(nvar)*aveg_metrics[key]
+                                 for key in sum_metrics}
+
+            print('not-'*(not greedy) + 'greedy-' + heuristic,
+                  augmented_metrics)
+            if first_heuristic == 1:
+                first_heuristic = 0
+                keys = list(augmented_metrics.keys())
+                with open(output_file, 'a') as f:
+                    f.write(', '.join(['Model'] + keys) + '\n')
+            with open(output_file, 'a', newline='') as f:
+                writer = csv.writer(f)
+                writer.writerow(['not-'*(not greedy) + 'greedy-' + heuristic]
+                                + [augmented_metrics[key] for key in keys])
diff --git a/main_regression.py b/main_regression.py
@@ -57,7 +57,7 @@
         # C:\Software\Python37\Lib\site-packages\sklearn\neighbors\_regression.py
         print(f"Testing models trained in {ml_model}")
         metrics = test_model(ml_model, paradigm=paradigm,
-                            testing_method=testing_method)
+                             testing_method=testing_method)
         if first_time == 1:
             first_time = 0
             keys = list(metrics.keys())

diff --git a/make_plots.py b/make_plots.py
@@ -96,4 +96,4 @@ def create_adversarial_plot(
     plt.cla()
 
 
-create_adversarial_plot()
+# create_adversarial_plot()
diff --git a/packages/dataset_manipulation/dataset_manipulation.py b/packages/dataset_manipulation/dataset_manipulation.py
@@ -28,7 +28,6 @@ def augmentate_instance(features, timings, cells, nvar):
     return augmented_features, augmented_timings, augmented_cells
 
 
-
 def augmentate_dataset(all_features, all_timings, all_cells, nvar):
     """
     Multiply the size of the dataset by math.factorial(nvar).

diff --git a/replicating_Dorians_features.py b/replicating_Dorians_features.py
@@ -57,8 +57,6 @@ def extract_features(dataset):
     all_original_polynomials = []
     all_projections = []
     all_cells = []
-    for index, elem in enumerate(dataset):
-        print(index, elem[0])
     for index, projections in enumerate(dataset[0]):
         all_projections.append(projections)
         original_polynomials = projections[0][0]

diff --git a/test_models.py b/test_models.py
@@ -5,8 +5,10 @@
 import numpy as np
 from sklearn import metrics
 from config.general_values import dataset_qualities
-from config.ml_models import ml_models
-from config.ml_models import ml_regressors
+from config.ml_models import all_models
+from config.ml_models import regressors
+from config.ml_models import classifiers
+from config.ml_models import heuristics
 from find_filename import find_output_filename
 from find_filename import find_dataset_filename
 from find_filename import find_model_filename
@@ -34,7 +36,7 @@ def test_results(training_method):
     with open(output_filename, 'w') as output_file:
         writer_balanced = csv.writer(output_file)
         writer_balanced.writerow(["Name"] + dataset_qualities)
-        for ml_model in ml_models:
+        for ml_model in all_models:
             trained_model_filename = find_model_filename(training_method,
                                                          ml_model)
             accuracy = dict()
@@ -97,35 +99,44 @@ def test_regressor(ml_model):
 
 
 def test_model(ml_model, paradigm, testing_method='augmented'):
-    trained_model_filename = find_model_filename(paradigm,
-                                                 ml_model)
-    # print(trained_model_filename, paradigm, ml_model)
     test_dataset_filename = find_dataset_filename('Test',
                                                   testing_method)
-    with open(trained_model_filename, 'rb') as trained_model_file:
-        model = pickle.load(trained_model_file)
     with open(test_dataset_filename, 'rb') as test_dataset_file:
         testing_dataset = pickle.load(test_dataset_file)
-    if ml_model in ml_regressors and paradigm == 'regression':
-        chosen_indices = [return_regressor_choice(model, features)
-                          for features in testing_dataset['features']]
-    elif ml_model in ml_models:
-        # print('testing_method', testing_method)
-        chosen_indices = [model.predict([features])[0]
-                          for features in testing_dataset['features']]
-    elif paradigm == 'reinforcement' and testing_method == 'Normal':
-        chosen_indices = [ordering_choice_reinforcement(model, projections)
-                          for projections in testing_dataset['projections']]
-    # print(chosen_indices)
-    # print("here2")
+    trained_model_filename = find_model_filename(paradigm,
+                                                 ml_model)
+    with open(trained_model_filename, 'rb') as trained_model_file:
+        model = pickle.load(trained_model_file)
+    chosen_indices = choose_indices(model, testing_dataset)
     return compute_metrics(chosen_indices,
                            testing_dataset['labels'],
                            testing_dataset['timings'],
                            testing_dataset['cells'],
                            ml_model)
 
 
-def compute_metrics(chosen_indices, labels, all_timings, all_cells, model):
+def choose_indices(ml_model, dataset, paradigm=''):
+    trained_model_filename = find_model_filename(paradigm, ml_model)
+    with open(trained_model_filename, 'rb') as trained_model_file:
+        model = pickle.load(trained_model_file)
+    if ml_model in regressors:
+        chosen_indices = [return_regressor_choice(model, features)
+                          for features in dataset['features']]
+    elif ml_model in classifiers:
+        chosen_indices = [model.predict([features])[0]
+                          for features in dataset['features']]
+    elif paradigm == 'reinforcement':
+        chosen_indices = [ordering_choice_reinforcement(model, projections)
+                          for projections in dataset['projections']]
+    elif ml_model in heuristics:
+        ordering_choices_heuristics(model, dataset)
+    return chosen_indices
+
+
+def compute_metrics(chosen_indices, testing_dataset):
+    labels = testing_dataset['labels']
+    all_timings = testing_dataset['timings']
+    all_cells = testing_dataset['cells']
     metrics = dict()
     correct = 0
     metrics['TotalTime'] = 0
@@ -143,9 +154,6 @@ def compute_metrics(chosen_indices, labels, all_timings, all_cells, model):
         metrics['TotalCells'] += cells[chosen_index]
     chosen_times = [timings[index] for index, timings
                     in zip(chosen_indices, all_timings)]
-    timings_lists_filename = find_timings_lists(model)
-    with open(timings_lists_filename, 'wb') as timings_lists_file:
-        pickle.dump(chosen_times, timings_lists_file)
     metrics['TotalTime'] = sum(chosen_times)
     total_instances = len(chosen_indices)
     metrics['Accuracy'] = correct/total_instances

diff --git a/test_train_datasets.py b/test_train_datasets.py
@@ -116,11 +116,9 @@ def create_regression_datasets(taking_logarithms=True):
         # we will use the augmented dataset here
         with open(this_dataset_filename, 'rb') as this_dataset_file:
             regression_dataset = pickle.load(this_dataset_file)
-            # print("regression_dataset['timings']", len(regression_dataset['timings']), regression_dataset['timings'])
             regression_dataset['labels'] = \
                 [timings[0] for timings
                  in regression_dataset['timings']]
-            # print("regression_dataset['labels']", len(regression_dataset['labels']), regression_dataset['labels'])
             if taking_logarithms:
                 regression_dataset['labels'] = \
                     [log(label) for label
@@ -133,7 +131,6 @@ def create_regression_datasets(taking_logarithms=True):
             # classification_dataset['labels'] = \
             #     [np.argmin(timings) for timings
             #      in regression_dataset['timings']]
-            # print(classification_dataset['labels'])
 
 
 # create_regression_datasets(taking_logarithms=False)

diff --git a/train_models.py b/train_models.py
@@ -2,15 +2,14 @@
 import pickle
 import random
 from yaml_tools import read_yaml_from_file
-from config.ml_models import sklearn_models
-from config.ml_models import ml_regressors
+from config.ml_models import all_models
 from find_filename import find_dataset_filename
 from find_filename import find_hyperparams_filename
 from find_filename import find_model_filename
 from find_filename import find_other_filename
 from dataset_manipulation import give_all_symmetries
 import numpy as np
-from sklearn import metrics
+# from sklearn import metrics
 from itertools import combinations
 from replicating_Dorians_features import compute_features_for_var
 from test_models import compute_metrics
@@ -22,13 +21,16 @@ def train_model(ml_model, method):
     with open(train_data_filename, 'rb') as train_data_file:
         train_dataset = pickle.load(train_data_file)
     hyperparams = read_yaml_from_file(hyperparams_file)
-    current_model = sklearn_models[ml_model]
+    current_model = all_models[ml_model]
     model = current_model(**hyperparams)
     # model = current_model()
+    print('here')
     model.fit(train_dataset['features'], train_dataset['labels'])
     trained_model_filename = find_model_filename(method, ml_model)
+    print('here2')
     with open(trained_model_filename, 'wb') as trained_model_file:
         pickle.dump(model, trained_model_file)
+    return model
 
 
 def train_regression_model(ml_model, method):
@@ -75,7 +77,7 @@ def train_reinforcement_model(ml_model, method='Normal'):
         train_dataset = pickle.load(train_data_file)
     # hyperparams_file = find_hyperparams_filename(method, ml_model)
     # hyperparams = read_yaml_from_file(hyperparams_file)
-    current_model = sklearn_models[ml_model]
+    current_model = all_models[ml_model]
     # model = current_model(**hyperparams)
     model = current_model()
     first_polys = train_dataset['projections'][0][0][0]
@@ -148,7 +150,10 @@ def var_choice_reinforcement(model, polynomials):
     chosen by the model trained using reinforcement'''
     vars_features = get_vars_features(polynomials)
     evaluations = model.predict(vars_features)
-    return np.argmin(evaluations)
+    min_value = np.min(evaluations)
+    min_indices = np.where(evaluations == min_value)[0]
+    # Randomly select one of the minimal indices
+    return np.random.choice(min_indices)
 
 
 def ordering_choice_reinforcement(model, projections):