diff --git a/choose_hyperparams.py b/choose_hyperparams.py
index f3beb66..b9570df 100644
--- a/choose_hyperparams.py
+++ b/choose_hyperparams.py
@@ -28,16 +28,16 @@ def k_folds_ml(x_train, y_train, model, random_state=0):
     return rf_cv.best_params_
 
 
-def choose_hyperparams(ml_model, method):
+def choose_hyperparams(model_name, paradigm, training_quality):
     """Given a ml_model and a method, a file with the hyperparameters
     chosen by cross validation is created"""
-    this_dataset_file = find_dataset_filename('Train', method=method)
+    this_dataset_file = find_dataset_filename('Train', dataset_quality=training_quality)
     with open(this_dataset_file, 'rb') as f:
         dataset = pickle.load(f)
-    hyperparams = k_folds_ml(dataset['features'], dataset['labels'], model=ml_model)
+    hyperparams = k_folds_ml(dataset['features'], dataset['labels'], model=model_name)
     print(hyperparams)
-    hyperparams_filename = find_hyperparams_filename(method, ml_model)
-    print(hyperparams_filename)
+    hyperparams_filename = find_hyperparams_filename(model_name, paradigm, training_quality)
+    print('new hyperparams_filename', hyperparams_filename)
     write_yaml_to_file(hyperparams, hyperparams_filename)
 
 
diff --git a/config/general_values.py b/config/general_values.py
index f9a5d8d..6cf50b5 100644
--- a/config/general_values.py
+++ b/config/general_values.py
@@ -1,6 +1,6 @@
 
 purposes = ['Train', 'Test']
-dataset_qualities = ['Normal', 'Balanced', 'Augmented']
+dataset_qualities = ['Biased', 'Balanced', 'Augmented']
 
 
 def aveg(given_list):
diff --git a/config/hyperparameters_grid.py b/config/hyperparameters_grid.py
index 6f30462..d3aa5e2 100644
--- a/config/hyperparameters_grid.py
+++ b/config/hyperparameters_grid.py
@@ -2,9 +2,9 @@
 
 grid = dict()
 grid['RF-Classifier'] = {
-    'n_estimators': [200, 300, 400, 500],
+    'n_estimators': [200, 500],
     'max_features': ['sqrt', 'log2'],
-    'max_depth': [4, 5, 6, 7, 8],
+    'max_depth': [4, 6, 8],
     'criterion': ['gini', 'entropy']
 }
 grid['KNN-Classifier'] = {
@@ -15,8 +15,7 @@
     # 'p': range(1, 4, 1)
 }
 grid['MLP-Classifier'] = {
-    'hidden_layer_sizes': [(5, 5), (15, 15), (20, 20),
-                           (10, 10, 10), (20, 20, 20)],
+    'hidden_layer_sizes': [(30, 30), (10, 10, 10), (20, 20, 20)],
     'activation': ['tanh', 'relu'],
     'solver': ['sgd', 'adam'],
     'learning_rate': ['constant', 'adaptive'],
@@ -43,9 +42,13 @@
 }
 
 grid['RF-Regressor'] = {
-    'criterion': ['squared_error', 'friedman_mse'],
-    "max_depth": [1, 3, 7],
-    "min_samples_leaf": [1, 5, 10],
+    'n_estimators': [200, 500],
+    'max_features': ['sqrt', 'log2'],
+    'max_depth': [4, 6, 8],
+    'criterion': ['friedman_mse', 'squared_error']
+    # 'criterion': ['squared_error', 'friedman_mse'],
+    # "max_depth": [1, 3, 7],
+    # "min_samples_leaf": [1, 5, 10],
 }
 grid['KNN-Regressor'] = {
     'n_neighbors': [3, 5, 10],
@@ -53,10 +56,16 @@
     'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
 }
 grid['MLP-Regressor'] = {
-    'hidden_layer_sizes': [(100,), (20, 20), (10, 10, 10)],
-    'activation': ['logistic', 'tanh', 'relu'],
-    'solver': ['adam', 'sgd'],
-    'alpha': [0.0001, 0.001, 0.01]
+    'hidden_layer_sizes': [(30, 30), (10, 10, 10), (20, 20, 20)],
+    'activation': ['tanh', 'relu'],
+    'solver': ['sgd', 'adam'],
+    'learning_rate': ['constant', 'adaptive'],
+    'alpha': [0.05, 0.005],
+    'max_iter': [1000]
+    # 'hidden_layer_sizes': [(30, 30), (20, 20, 20), (10, 10, 10)],
+    # 'activation': ['logistic', 'tanh', 'relu'],
+    # 'solver': ['adam', 'sgd'],
+    # 'alpha': [0.0001, 0.001, 0.01]
 }
 grid['DT-Regressor'] = {
     "splitter": ["best", "random"],
diff --git a/config/ml_models.py b/config/ml_models.py
index 5a35d1b..d1778cd 100644
--- a/config/ml_models.py
+++ b/config/ml_models.py
@@ -35,4 +35,4 @@
 
 all_models = {**classifiers, **regressors}
 
-heuristics = []#'T1', 'gmods', 'brown', 'random', 'virtual-best']
+heuristics = ['T1', 'gmods', 'brown', 'random', 'virtual-best']
diff --git a/create_clean_dataset.py b/create_clean_dataset.py
index ec56e2f..4651870 100644
--- a/create_clean_dataset.py
+++ b/create_clean_dataset.py
@@ -83,7 +83,6 @@ def cleaning_dataset():
 
 def convert_to_timing(timing_str, penalization=2):
     if not contains_float(timing_str):
-        print(penalization * float(timing_str[5:]))
         return penalization * float(timing_str[5:])
     return float(timing_str)
 
@@ -109,4 +108,4 @@ def contains_int(input_str):
     match = re.match(int_pattern, input_str)
     return match is not None
 
-cleaning_dataset()
\ No newline at end of file
+# cleaning_dataset()
diff --git a/datasets/clean_dataset.txt b/datasets/clean_dataset.txt
index 2a4b33d..5242697 100644
Binary files a/datasets/clean_dataset.txt and b/datasets/clean_dataset.txt differ
diff --git a/find_filename.py b/find_filename.py
index bd62c25..aa6a9ac 100644
--- a/find_filename.py
+++ b/find_filename.py
@@ -15,7 +15,7 @@ def find_model_filename(model_name, paradigm, training_quality):
                         f'{model_name}-{paradigm}-{training_quality}.txt')
 
 
-def find_dataset_filename(purpose, method=None):
+def find_dataset_filename(purpose, dataset_quality=None, paradigm=''):
     if purpose == "unclean":
         return os.path.join(os.path.dirname(__file__),
                             'DatasetsBeforeProcessing',
@@ -34,7 +34,7 @@ def find_dataset_filename(purpose, method=None):
     elif purpose in purposes:
         return os.path.join(os.path.dirname(__file__),
                             'datasets', f'{purpose}',
-                            f'{method}_{purpose}_dataset.txt')
+                            f'{dataset_quality}-{purpose}-{paradigm}-dataset.txt')
     else:
         raise Exception(f"Purpose {purpose} not found")
 
diff --git a/main.py b/main.py
index 25c62af..fb1f5a8 100644
--- a/main.py
+++ b/main.py
@@ -34,6 +34,7 @@
 train_the_models = True
 paradigm = 'classification'
 
+print("MAIN.PY")
 cleaning_dataset()
 create_train_test_datasets()
 
diff --git a/main_heuristics.py b/main_heuristics.py
index 462b341..da50ea8 100644
--- a/main_heuristics.py
+++ b/main_heuristics.py
@@ -5,14 +5,14 @@
 # import numpy as np
 from Heuristics.heuristics_guess import not_greedy_heuristic_guess
 from Heuristics.heuristics_guess import ordering_given_projections
-from find_filename import find_dataset_filename
-from test_models import compute_metrics
-from config.ml_models import heuristics
+# from find_filename import find_dataset_filename
+# from test_models import compute_metrics
+# from config.ml_models import heuristics
 
 random.seed(0)
 
 nvar = 3
-testing_method = 'Normal'
+testing_method = 'Biased'
 
 # # TESTING GMODS IN AUUGMENTED : Features 2, 67 and 132
 # def choose_gmods(features):
@@ -38,61 +38,63 @@
 #             return 5
 
 
-def ordering_choices_heuristics(heuristic, testing_dataset, greedy=False):
+def ordering_choices_heuristics(heuristic, testing_dataset, paradigm):
     if heuristic == 'virtual-best':
         chosen_indices = testing_dataset['labels']
     elif heuristic == 'random':
         chosen_indices = [random.randint(0, len(timings)-1)
                           for timings in testing_dataset['timings']]
     else:
-        if greedy:
+        if paradigm == 'Greedy':
             chosen_indices = [ordering_given_projections(projection, heuristic)
                               for projection in testing_dataset['projections']]
-        else:
-            chosen_indices = [not_greedy_heuristic_guess(projection[0][0],
+        elif paradigm == 'NotGreedy':
+            chosen_indices = [not_greedy_heuristic_guess(polynomials,
                                                          heuristic)
-                              for projection in testing_dataset['projections']]
+                              for polynomials in testing_dataset['polynomials']]
+        else:
+            raise Exception(f"Paradigm {paradigm} not recognised for a heuristic.")
     return chosen_indices
 
 
-if __name__ == "__main__":
-    test_dataset_filename = find_dataset_filename('Test',
-                                                testing_method)
-    with open(test_dataset_filename, 'rb') as test_dataset_file:
-        testing_dataset = pickle.load(test_dataset_file)
-    output_file = "heuristics_output_acc_time.csv"
+# if __name__ == "__main__":
+#     test_dataset_filename = find_dataset_filename('Test',
+#                                                 testing_method)
+#     with open(test_dataset_filename, 'rb') as test_dataset_file:
+#         testing_dataset = pickle.load(test_dataset_file)
+#     output_file = "heuristics_output_acc_time.csv"
 
-    # Testing in heuristics that make all the choice at once
-    first_heuristic = 1
-    for greedy in [True, False]:
-        for heuristic in heuristics:
-        # for heuristic in ['gmods', 'virtual best']:
-            reps = 100
-            for i in range(reps):
-                chosen_indices = ordering_choices_heuristics(heuristic,
-                                                            testing_dataset,
-                                                            greedy=greedy)
-                metrics = compute_metrics(chosen_indices,
-                                          testing_dataset)
-                if i == 0:
-                    sum_metrics = metrics
-                else:
-                    sum_metrics = {key: metrics[key] + sum_metrics[key]
-                                   for key in metrics}
-            aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
-            augmented_metrics = {key: aveg_metrics[key]
-                                 if key in ['Accuracy', 'Markup']
-                                 else math.factorial(nvar)*aveg_metrics[key]
-                                 for key in sum_metrics}
+#     # Testing in heuristics that make all the choice at once
+#     first_heuristic = 1
+#     for greedy in [True, False]:
+#         for heuristic in heuristics:
+#         # for heuristic in ['gmods', 'virtual best']:
+#             reps = 100
+#             for i in range(reps):
+#                 chosen_indices = ordering_choices_heuristics(heuristic,
+#                                                             testing_dataset,
+#                                                             greedy=greedy)
+#                 metrics = compute_metrics(chosen_indices,
+#                                           testing_dataset)
+#                 if i == 0:
+#                     sum_metrics = metrics
+#                 else:
+#                     sum_metrics = {key: metrics[key] + sum_metrics[key]
+#                                    for key in metrics}
+#             aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
+#             augmented_metrics = {key: aveg_metrics[key]
+#                                  if key in ['Accuracy', 'Markup']
+#                                  else math.factorial(nvar)*aveg_metrics[key]
+#                                  for key in sum_metrics}
 
-            print('not-'*(not greedy) + 'greedy-' + heuristic,
-                  augmented_metrics)
-            if first_heuristic == 1:
-                first_heuristic = 0
-                keys = list(augmented_metrics.keys())
-                with open(output_file, 'a') as f:
-                    f.write(', '.join(['Model'] + keys) + '\n')
-            with open(output_file, 'a', newline='') as f:
-                writer = csv.writer(f)
-                writer.writerow(['not-'*(not greedy) + 'greedy-' + heuristic]
-                                + [augmented_metrics[key] for key in keys])
+#             print('not-'*(not greedy) + 'greedy-' + heuristic,
+#                   augmented_metrics)
+#             if first_heuristic == 1:
+#                 first_heuristic = 0
+#                 keys = list(augmented_metrics.keys())
+#                 with open(output_file, 'a') as f:
+#                     f.write(', '.join(['Model'] + keys) + '\n')
+#             with open(output_file, 'a', newline='') as f:
+#                 writer = csv.writer(f)
+#                 writer.writerow(['not-'*(not greedy) + 'greedy-' + heuristic]
+#                                 + [augmented_metrics[key] for key in keys])
diff --git a/replicating_Dorians_features.py b/replicating_Dorians_features.py
index 832c44b..bf1cf44 100644
--- a/replicating_Dorians_features.py
+++ b/replicating_Dorians_features.py
@@ -69,13 +69,14 @@ def extract_features(dataset):
         names, instance_features = \
             features_from_set_of_polys(original_polynomials)
         all_features.append(instance_features)
-    my_dataset['polynomials'] = np.array(all_original_polynomials)
+    my_dataset['polynomials'] = all_original_polynomials
     my_dataset['names'] = np.array(names)
     my_dataset['features'] = np.array(all_features)
     my_dataset['labels'] = np.array(all_labels)
     my_dataset['timings'] = np.array(all_timings)
-    my_dataset['projections'] = np.array(all_projections)
+    my_dataset['projections'] = all_projections
     my_dataset['cells'] = np.array(all_cells)
+    # all these use to be converted to np.array()
     return my_dataset
 
 
diff --git a/run_for_paper.py b/run_for_paper.py
new file mode 100644
index 0000000..6af4d39
--- /dev/null
+++ b/run_for_paper.py
@@ -0,0 +1,174 @@
+import os
+import pickle
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+from create_clean_dataset import cleaning_dataset
+from test_train_datasets import create_train_test_datasets
+from test_train_datasets import create_regression_datasets
+from config.ml_models import all_models
+from config.ml_models import regressors
+from config.ml_models import classifiers
+from config.ml_models import heuristics
+from choose_hyperparams import choose_hyperparams
+from train_models import train_model
+from main_heuristics import ordering_choices_heuristics
+from find_filename import find_dataset_filename
+from find_filename import find_timings_lists
+from find_filename import find_hyperparams_filename
+from test_models import compute_metrics
+from test_models import choose_indices
+
+
+def metrics_for_all_reps(all_indices_chosen, testing_dataset, ml_model):
+    all_metrics = [compute_metrics(chosen_indices, testing_dataset)
+                   for chosen_indices in all_indices_chosen]
+    aveg_metrics = {key: sum(metrics[key]/len(all_metrics)
+                             for metrics in all_metrics)
+                    for key in all_metrics[0]}
+    all_timings = testing_dataset['timings']
+    aveg_timings = []
+    for instance in range(len(all_indices_chosen[0])):
+        instance_timings = [timings[indices_chosen[instance]]
+                            for timings, indices_chosen
+                            in zip(all_timings, all_indices_chosen)]
+        aveg_timings.append(instance_timings)
+    timings_lists_filename = find_timings_lists(ml_model)
+    with open(timings_lists_filename, 'wb') as timings_lists_file:
+        pickle.dump(aveg_timings, timings_lists_file)
+    all_total_times = [metrics['TotalTime'] for metrics in all_metrics]
+    return aveg_metrics, all_total_times
+
+
+def dominiks_plots(all_total_times):
+    data = []
+    for key in all_total_times:
+        data.extend([{'Model': key, 'Total time': total_time}
+                     for total_time in all_total_times[key]])
+    df = pd.DataFrame(data)
+
+    # Create a box plot
+    plt.figure(figsize=(8, 6))
+    sns.boxplot(x='Model', y='Total time', data=df)
+
+    # Add labels and title
+    plt.xlabel('Model')
+    plt.ylabel('Total time')
+    plt.title('Model Total time Comparison')
+
+    # Display the plot
+    plt.show()
+
+
+def repeat_instances_dataset(dataset, n_reps):
+    new_dataset = dict()
+    for key in dataset:
+        new_dataset[key] = [elem for elem in dataset[key]
+                            for _ in range(n_reps)]
+    return new_dataset
+
+
+def study_a_model(model_name: str,
+                  testing_quality: str,
+                  paradigm: str,
+                  training_quality: str = '',
+                  tune_hyperparameters: bool = False,
+                  reps: int = 10
+                  ):
+    if model_name in heuristics:
+        if training_quality != '':
+            raise Exception(f"training_quality cannot be {training_quality}.")
+        if tune_hyperparameters is not False:
+            raise Exception(f"Hyperparams cannot be tuned for {paradigm}.")
+    testing_filename = find_dataset_filename('Test', testing_quality)
+    with open(testing_filename, 'rb') as testing_file:
+        testing_dataset = pickle.load(testing_file)
+    factorial_nvar = len(testing_dataset['projections'][0])
+    if testing_quality in ['Biased', 'Balanced']:
+        # If the dataset contains less factorial_nvar less instances,
+        # we repeat each instance factorial_nvar times
+        testing_dataset = \
+            repeat_instances_dataset(testing_dataset, factorial_nvar)
+    all_metrics = []
+    all_timings = []
+    for _ in range(reps):
+        if model_name not in heuristics:
+            # If the paradigm is 'Heuristics' there is no need
+            # to tune_hyperparameters or to train the models
+            hyperparams_filename = find_hyperparams_filename(model_name,
+                                                             paradigm,
+                                                             training_quality) + '.yaml'
+            if tune_hyperparameters or not os.path.exists(hyperparams_filename):
+                if not os.path.exists(hyperparams_filename):
+                    print('hyperparams_filename doesnt exits \n', hyperparams_filename)
+                choose_hyperparams(model_name, paradigm, training_quality)
+            # Hyperparameters ready
+            train_model(model_name, paradigm, training_quality)
+            # Model trained
+        chosen_indices = choose_indices(model_name, testing_dataset,
+                                        paradigm, training_quality)
+        # Indices chosen by the model
+        all_metrics.append(compute_metrics(chosen_indices, testing_dataset))
+        all_timings.append([timings[index] for timings, index
+                            in zip(testing_dataset['timings'],
+                                   chosen_indices)])
+    model_info = dict()
+    model_info['AverageMetrics'] = {key: sum(metrics[key] for metrics
+                                             in all_metrics)/reps
+                                    for key in all_metrics[0]}
+    # average metrics computed for comparison purposes
+    model_info['AverageTimings'] = [sum(all_timings_in_instance)/reps
+                                    for all_timings_in_instance
+                                    in zip(*all_timings)]
+    # average timings in each instance to create adversarial plots
+    for key in all_metrics[0]:
+        model_info['All' + key] = [metrics[key]
+                                   for metrics in all_metrics]
+    # info of all metrics saved for seaborn boxplots
+    return model_info
+
+
+if __name__ == "__main__":
+    reps = 50
+    data = dict()
+    data['TotalTime'] = []
+    new_datasets = False
+    if new_datasets:
+        cleaning_dataset()
+        create_train_test_datasets()
+        create_regression_datasets()
+    all_total_times = dict()
+    for model_name in list(all_models) + heuristics:
+        if model_name in heuristics:
+            testing_quality = 'Biased'
+            training_quality = ''
+            tune_hyperparameters = False
+            paradigm = 'Greedy'  # NotGreedy
+        else:
+            testing_quality = 'Augmented'
+            training_quality = 'Augmented'
+            tune_hyperparameters = False
+            if model_name in classifiers:
+                paradigm = ''
+            elif model_name in regressors:
+                paradigm = 'Regression'
+
+        model_info = study_a_model(model_name=model_name,
+                                   testing_quality=testing_quality,
+                                   paradigm=paradigm,
+                                   training_quality=training_quality,
+                                   tune_hyperparameters=tune_hyperparameters,
+                                   reps=reps
+                                   )
+        all_total_times[model_name] = model_info['AllTotalTime']
+
+    dominiks_plots(all_total_times)
+
+
+
+
+# def choose_indices(model, dataset):
+#     if model in classifiers:
+#     elif model in heuristics:
+#         ordering_choices_heuristics(model, dataset)
diff --git a/test_models.py b/test_models.py
index ef32a4b..662c82a 100644
--- a/test_models.py
+++ b/test_models.py
@@ -12,7 +12,7 @@
 from find_filename import find_output_filename
 from find_filename import find_dataset_filename
 from find_filename import find_model_filename
-from find_filename import find_timings_lists
+from main_heuristics import ordering_choices_heuristics
 # from train_models import ordering_choice_reinforcement
 # from train_models import training_instances_reinforcement
 # Check if 'dataset_manipulation' is installed
@@ -52,8 +52,8 @@ def test_results(training_method):
                                      round_accuracies)
 
 
-def test_classifier(ml_model, testing_method='augmented'):
-    trained_model_filename = find_model_filename('classification',
+def test_classifier(ml_model, testing_method='Augmented'):
+    trained_model_filename = find_model_filename('Classification',
                                                  ml_model)
     test_dataset_filename = find_dataset_filename('Test',
                                                   testing_method)
@@ -65,8 +65,8 @@ def test_classifier(ml_model, testing_method='augmented'):
     return compute_metrics(chosen_indices, y_test, all_timings)
 
 
-def timings_in_test(model, testing_method='augmented', training_method=None):
-    test_dataset_filename = find_dataset_filename('test',
+def timings_in_test(model, testing_method='Augmented', training_method=None):
+    test_dataset_filename = find_dataset_filename('Test',
                                                   testing_method)
     with open(test_dataset_filename, 'rb') as test_dataset_file:
         x_test, _, all_timings = pickle.load(test_dataset_file)
@@ -85,10 +85,10 @@ def timings_in_test(model, testing_method='augmented', training_method=None):
 
 
 def test_regressor(ml_model):
-    trained_model_filename = find_model_filename('regression',
+    trained_model_filename = find_model_filename('Regression',
                                                  ml_model)
-    test_dataset_filename = find_dataset_filename('test',
-                                                  'regression')
+    test_dataset_filename = find_dataset_filename('Test',
+                                                  'Regression')
     with open(trained_model_filename, 'rb') as trained_model_file:
         model = pickle.load(trained_model_file)
     with open(test_dataset_filename, 'rb') as test_dataset_file:
@@ -98,7 +98,7 @@ def test_regressor(ml_model):
     print(f"{ml_model} gave {avg_error}")
 
 
-def test_model(ml_model, paradigm, testing_method='augmented'):
+def test_model(ml_model, paradigm, testing_method='Augmented'):
     test_dataset_filename = find_dataset_filename('Test',
                                                   testing_method)
     with open(test_dataset_filename, 'rb') as test_dataset_file:
@@ -115,21 +115,22 @@ def test_model(ml_model, paradigm, testing_method='augmented'):
                            ml_model)
 
 
-def choose_indices(ml_model, dataset, paradigm=''):
-    trained_model_filename = find_model_filename(paradigm, ml_model)
-    with open(trained_model_filename, 'rb') as trained_model_file:
-        model = pickle.load(trained_model_file)
-    if ml_model in regressors:
-        chosen_indices = [return_regressor_choice(model, features)
-                          for features in dataset['features']]
-    elif ml_model in classifiers:
-        chosen_indices = [model.predict([features])[0]
-                          for features in dataset['features']]
-    elif paradigm == 'reinforcement':
-        chosen_indices = [ordering_choice_reinforcement(model, projections)
-                          for projections in dataset['projections']]
-    elif ml_model in heuristics:
-        ordering_choices_heuristics(model, dataset)
+def choose_indices(model_name, testing_dataset, paradigm='', training_quality='Augmented'):
+    if model_name in heuristics:
+        chosen_indices = ordering_choices_heuristics(model_name, testing_dataset, paradigm)
+    else:
+        trained_model_filename = find_model_filename(model_name, paradigm, training_quality)
+        with open(trained_model_filename, 'rb') as trained_model_file:
+            model = pickle.load(trained_model_file)
+        if model_name in regressors:
+            chosen_indices = [return_regressor_choice(model, features)
+                              for features in testing_dataset['features']]
+        elif model_name in classifiers:
+            chosen_indices = [model.predict([features])[0]
+                              for features in testing_dataset['features']]
+        elif paradigm == 'Reinforcement':
+            chosen_indices = [ordering_choice_reinforcement(model, projections)
+                              for projections in testing_dataset['projections']]
     return chosen_indices
 
 
diff --git a/test_train_datasets.py b/test_train_datasets.py
index 0cc51b1..cdc7c3c 100644
--- a/test_train_datasets.py
+++ b/test_train_datasets.py
@@ -40,16 +40,16 @@ def create_train_test_datasets():
     # train and test sets are created
     random_state = 0
     print(dataset.keys())
-    datasets['Train_Normal']['features'], \
-        datasets['Test_Normal']['features'], \
-        datasets['Train_Normal']['labels'], \
-        datasets['Test_Normal']['labels'], \
-        datasets['Train_Normal']['timings'], \
-        datasets['Test_Normal']['timings'], \
-        datasets['Train_Normal']['projections'], \
-        datasets['Test_Normal']['projections'], \
-        datasets['Train_Normal']['cells'], \
-        datasets['Test_Normal']['cells'] = \
+    datasets['Train_Biased']['features'], \
+        datasets['Test_Biased']['features'], \
+        datasets['Train_Biased']['labels'], \
+        datasets['Test_Biased']['labels'], \
+        datasets['Train_Biased']['timings'], \
+        datasets['Test_Biased']['timings'], \
+        datasets['Train_Biased']['projections'], \
+        datasets['Test_Biased']['projections'], \
+        datasets['Train_Biased']['cells'], \
+        datasets['Test_Biased']['cells'] = \
         train_test_split(dataset['features'],
                          dataset['labels'],
                          dataset['timings'],
@@ -62,7 +62,7 @@ def create_train_test_datasets():
         datasets[f'{purpose}_Balanced'] = \
             {key: elem for key,
              elem in zip(keys, balance_dataset(
-                                   *[datasets[f'{purpose}_Normal'][key2]
+                                   *[datasets[f'{purpose}_Biased'][key2]
                                      for key2 in keys], nvar=3)) ##CHOOSE NVAR WELL
              }
         datasets[f'{purpose}_Balanced']['labels'] = \
@@ -70,7 +70,7 @@ def create_train_test_datasets():
         datasets[f'{purpose}_Augmented'] = \
             {key: elem for key,
              elem in zip(keys, augmentate_dataset(
-                                   *[datasets[f'{purpose}_Normal'][key2]
+                                   *[datasets[f'{purpose}_Biased'][key2]
                                      for key2 in keys], nvar=3))
              }
         print(f"features in {purpose}_Augmented", len(datasets[f'{purpose}_Augmented']['features'][0]))
@@ -79,7 +79,7 @@ def create_train_test_datasets():
     for purpose in purposes:
         for quality in dataset_qualities:
             this_dataset_filename = \
-                find_dataset_filename(purpose, method=quality)
+                find_dataset_filename(purpose, dataset_quality=quality)
             with open(this_dataset_filename, 'wb') as this_dataset_file:
                 pickle.dump(datasets[purpose + '_' + quality],
                             this_dataset_file)
@@ -109,12 +109,14 @@ def create_train_test_datasets():
 #                                 + [str(len(y[f'{purpose}_{method}']))])
 
 
-def create_regression_datasets(taking_logarithms=True):
+def create_regression_datasets(dataset_quality='Augmented',
+                               taking_logarithms=True):
     for purpose in purposes:
-        this_dataset_filename = find_dataset_filename(purpose,
-                                                      method='augmented')
+        existing_dataset_filename = find_dataset_filename(
+                                        purpose,
+                                        dataset_quality=dataset_quality)
         # we will use the augmented dataset here
-        with open(this_dataset_filename, 'rb') as this_dataset_file:
+        with open(existing_dataset_filename, 'rb') as this_dataset_file:
             regression_dataset = pickle.load(this_dataset_file)
             regression_dataset['labels'] = \
                 [timings[0] for timings
@@ -123,9 +125,11 @@ def create_regression_datasets(taking_logarithms=True):
                 regression_dataset['labels'] = \
                     [log(label) for label
                      in regression_dataset['labels']]
-            this_dataset_filename =\
-                find_dataset_filename(purpose, method='regression')
-            with open(this_dataset_filename, 'wb') as this_dataset_file:
+            new_dataset_filename = find_dataset_filename(
+                                       purpose,
+                                       dataset_quality=dataset_quality,
+                                       paradigm='Regression')
+            with open(new_dataset_filename, 'wb') as this_dataset_file:
                 pickle.dump(regression_dataset, this_dataset_file)
             # classification_dataset = regression_dataset
             # classification_dataset['labels'] = \
diff --git a/train_models.py b/train_models.py
index 89f685a..8d6df33 100644
--- a/train_models.py
+++ b/train_models.py
@@ -15,29 +15,30 @@
 from test_models import compute_metrics
 
 
-def train_model(ml_model, method):
-    train_data_filename = find_dataset_filename('Train', method=method)
-    hyperparams_file = find_hyperparams_filename(method, ml_model)
+def train_model(model_name, paradigm, training_quality):
+    train_data_filename = find_dataset_filename('Train', dataset_quality=training_quality, paradigm=paradigm)
+    print(model_name, 'dataset used for train', train_data_filename)
+    hyperparams_file = find_hyperparams_filename(model_name, paradigm=paradigm, training_quality=training_quality)
     with open(train_data_filename, 'rb') as train_data_file:
         train_dataset = pickle.load(train_data_file)
     hyperparams = read_yaml_from_file(hyperparams_file)
-    current_model = all_models[ml_model]
+    current_model = all_models[model_name]
     model = current_model(**hyperparams)
     # model = current_model()
     print('here')
     model.fit(train_dataset['features'], train_dataset['labels'])
-    trained_model_filename = find_model_filename(method, ml_model)
+    trained_model_filename = find_model_filename(model_name, paradigm, training_quality)
     print('here2')
     with open(trained_model_filename, 'wb') as trained_model_file:
         pickle.dump(model, trained_model_file)
     return model
 
 
-def train_regression_model(ml_model, method):
+def train_regression_model(model_name, method):
     train_data_filename = find_dataset_filename('Train', method=method)
     with open(train_data_filename, 'rb') as train_data_file:
         train_dataset = pickle.load(train_data_file)
-    # hyperparams_file = find_hyperparams_filename(method, ml_model)
+    # hyperparams_file = find_hyperparams_filename(method, model_name)
     # hyperparams = read_yaml_from_file(hyperparams_file)
     train_dataset['features'] = np.asarray([x_t for x_t, t_t in zip(train_dataset['features'], train_dataset['timings'])
                                             if t_t[:4] != 'Over'], dtype=float)
@@ -46,10 +47,10 @@ def train_regression_model(ml_model, method):
                           ####
                           # IS THIS REALLY DOING SOMTHING?
                           # What if we used twice timelimit instead
-    current_model = ml_regressors[ml_model]
+    current_model = ml_regressors[model_name]
     reg = current_model()  # **hyperparams)
     reg.fit(train_dataset['features'], train_dataset['timings'])
-    # trained_model_filename = find_model_filename(method, ml_model, 'regression')
+    # trained_model_filename = find_model_filename(method, model_name, 'regression')
     # with open(trained_model_filename, 'wb') as trained_model_file:
     #     pickle.dump(reg, trained_model_file)
     return reg
@@ -71,13 +72,13 @@ def test_regression_model(method, regressor):
     y_pred = [choose_using_regression(x_i, regressor) for x_i in x_test]
 
 
-def train_reinforcement_model(ml_model, method='Normal'):
+def train_reinforcement_model(model_name, method='Normal'):
     train_data_filename = find_dataset_filename('Train', method=method)
     with open(train_data_filename, 'rb') as train_data_file:
         train_dataset = pickle.load(train_data_file)
-    # hyperparams_file = find_hyperparams_filename(method, ml_model)
+    # hyperparams_file = find_hyperparams_filename(method, model_name)
     # hyperparams = read_yaml_from_file(hyperparams_file)
-    current_model = all_models[ml_model]
+    current_model = all_models[model_name]
     # model = current_model(**hyperparams)
     model = current_model()
     first_polys = train_dataset['projections'][0][0][0]
@@ -94,7 +95,7 @@ def train_reinforcement_model(ml_model, method='Normal'):
             training_labels += new_training_labels
         model.fit(training_features, training_labels)
         print(test_reinforcement_model(model))
-    trained_model_filename = find_model_filename('reinforcement', ml_model)
+    trained_model_filename = find_model_filename('reinforcement', model_name)
     with open(trained_model_filename, 'wb') as trained_model_file:
         pickle.dump(model, trained_model_file)
 
@@ -167,14 +168,14 @@ def ordering_choice_reinforcement(model, projections):
     return ordering
 
 
-def test_reinforcement_model(ml_model, method='Normal', nvar=3):
+def test_reinforcement_model(model_name, method='Normal', nvar=3):
     train_data_filename = find_dataset_filename('Test', method=method)
     with open(train_data_filename, 'rb') as train_data_file:
         testing_dataset = pickle.load(train_data_file)
-    # trained_model_filename = find_model_filename('reinforcement', ml_model)
+    # trained_model_filename = find_model_filename('reinforcement', model_name)
     # with open(trained_model_filename, 'rb') as trained_model_file:
     #     model = pickle.load(trained_model_file)
-    model = ml_model
+    model = model_name
     chosen_indices = [ordering_choice_reinforcement(model, projections)
                       for projections in testing_dataset['projections']]
     metrics = compute_metrics(chosen_indices,