diff --git a/Heuristics/__init__.py b/Heuristics/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/Heuristics/create_graphs_heuristics.py b/Heuristics/create_graphs_heuristics.py
new file mode 100644
index 0000000..0845cf4
--- /dev/null
+++ b/Heuristics/create_graphs_heuristics.py
@@ -0,0 +1,228 @@
+'''
+This file contains the functions to create the graphs comparing the heuristics.
+'''
+
+from pydoc_data import topics
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import cm
+import pickle
+import os
+import numpy as np
+from numpy import sort
+from numpy import Inf
+
+from .heuristics_guess import choose_order_given_projections
+from .heuristic_tools import get_dataset, substract_two_timings, finding_time_limit, compute_markups, compute_real_timings
+
+import matplotlib
+matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans'
+matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic'
+matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold'
+matplotlib.rcParams['mathtext.fontset'] = 'cm'
+matplotlib.rcParams['font.family'] = 'STIXGeneral'
+
+fontsize = 15
+desired_font = {'fontname':'monospace'}
+matplotlib.rcParams.update({'font.size': fontsize})
+
+folder_figures = os.path.join(os.path.dirname(__file__), '..','Art')
+
+######################################################
+###LEARN TO USE PNG https://riptutorial.com/matplotlib/example/10066/saving-and-exporting-plots-that-use-tex#:~:text=In%20order%20to%20include%20plots,text%20in%20the%20final%20document.&text=Plots%20in%20matplotlib%20can%20be,macro%20package%20to%20display%20graphics.
+########################################################
+
+
+def create_survival_plot(
+    heuristics = ['virtual_best', 'gmods', 'mods', 'brown', 'sotd', 'greedy_sotd'],
+    minimum_time_to_consider=0,
+    rep=10
+    ):
+    '''This function creates a survival plot comparing the desired heuristics.'''
+
+    dataset = get_dataset(without_repetition=True, minimum_time_to_consider=minimum_time_to_consider)
+    projections, targets, timings, heuristics_costs, ncells = dataset
+
+    color = cm.rainbow(np.linspace(0, 1, len(heuristics)+1))
+    # color[4]=[0.8,0.8,0.2,1]
+    color[3]=[0.65,0.42,0.42,1]
+    color[2]=[0.00,1,0.5,1]
+    #color = ['0','0.5','0','0.5','0','0.5']
+    style = ['--','--','--','--','--','--']
+    dashes = [(1,0),(5,1),(5,1,1,1),(2,1,2,1),(1,1),(5,5)]
+    
+    for heuristic, c, s, d in zip(heuristics,color, style, dashes):
+        many_sorted_timings = []
+        for i in range(rep):
+            if heuristic=='virtual_best':
+                rawtimings = [timing[target] for timing, target in zip(timings, targets)]
+            else:
+                guesses = [choose_order_given_projections(projection, method=heuristic) for projection in projections]
+                rawtimings = [timing[guess] for timing, guess in zip(timings, guesses)]
+            sorted_timings = sort([timing for timing, all_orders_timing in zip(rawtimings,timings) if type(timing)!=str and timing<finding_time_limit(all_orders_timing)]) # This eliminates not only strings but also choices that together with the heuristic cost got over the time limit.
+            many_sorted_timings.append(sorted_timings)
+        avg_sorted_timings = combine_many_sorted_timings(many_sorted_timings, penalization=120)
+        accumulative_timings = [sum(avg_sorted_timings[:i]) for i in range(len(avg_sorted_timings))]
+
+        #plotting
+        if heuristic==36:
+            heuristic = "T1"
+        elif heuristic == 67:
+            heuristic = "T2"
+        plt.plot(accumulative_timings, list(range(len(accumulative_timings))), s, color=c, label=heuristic, dashes=d)
+    plt.xlabel('Time', fontsize=fontsize)
+    plt.ylabel('No. problems finished', fontsize=fontsize)
+    plt.legend(prop={'family':'monospace', 'size':fontsize-2}, loc='lower right')
+    figure_location = os.path.join(folder_figures,'survival_plot__without_repetition__min_time_'+str(minimum_time_to_consider)+'.png')
+    plt.savefig(figure_location)
+    plt.cla()
+
+def combine_many_sorted_timings(many_sorted_timings, penalization=120):
+    avg_len = round(sum([len(sorted_timings) for sorted_timings in many_sorted_timings])/ len(many_sorted_timings))
+    #many_sorted_timings_longenough = [sorted_timings+[penalization]*avg_len for sorted_timings in many_sorted_timings]
+    avg_sorted_timings = [sum([penalization if i>=len(st) else st[i] for st in many_sorted_timings])/len( many_sorted_timings) for i in range(avg_len)]
+    return avg_sorted_timings
+
+def create_adversarial_plot(
+    heuristic1 = 'gmods',
+    heuristic2 = 'avegavegdeg'
+    ):
+    '''This function creates an adversarial plot comparing the desired heuristics.'''
+
+    dataset = get_dataset(without_repetition=True, minimum_time_to_consider=0)
+    # we always want all examples here
+    projections, _, timings, heuristics_costs, ncells = dataset
+
+    guesses1 = [choose_order_given_projections(projection, method=heuristic1) for projection in projections]
+    rawtimings1 = [timing[guess] for timing, guess in zip(timings, guesses1)]
+    timings1 = [timing if type(timing)!=str and timing<finding_time_limit(all_orders_timing) else 80 for timing, all_orders_timing in zip(rawtimings1,timings)] # This eliminates not only strings but also choices that together with the heuristic cost got over the time limit.
+
+
+    guesses2 = [choose_order_given_projections(projection, method=heuristic2) for projection in projections]
+    rawtimings2 = [timing[guess] for timing, guess in zip(timings, guesses2)]
+    timings2 = [timing if type(timing)!=str and timing<finding_time_limit(all_orders_timing) else 80 for timing, all_orders_timing in zip(rawtimings2,timings)] # This eliminates not only strings but also choices that together with the heuristic cost got over the time limit.
+
+    plot, ax = plt.subplots(1,1) 
+
+    # Set number of ticks for x-axis
+    ticks = list(np.arange(0,90,10))
+    ticks.pop(-2)
+    ax.set_xticks(ticks)
+    ax.set_yticks(ticks)
+    # Set ticks labels for x-axis
+    ticks_labels = ticks
+    ticks_labels[-1] = 'Timeout'
+    ax.set_xticklabels(ticks_labels, fontsize=fontsize) #
+    ax.set_yticklabels(ticks_labels, rotation='vertical', fontsize=fontsize)
+
+    #plotting
+    ax.plot(timings1, timings2, '.')
+    ax.plot([0,60],[0,60],'-')
+
+    #creating labels
+    plt.xlabel(heuristic1, **desired_font, fontsize=fontsize-2)
+    plt.ylabel(heuristic2, **desired_font, fontsize=fontsize-2) 
+
+    # plt.title('Adversarial plot comparing '+heuristic1+' and '+heuristic2)
+    figure_location = os.path.join(folder_figures, 'adversarial_plot_'+heuristic1+'_vs_'+heuristic2+'.png') #, '..\\','Art','adversarial_plot_'+heuristic1+'_vs_'+heuristic2+'.png')
+    plt.savefig(figure_location)
+    plt.cla()
+
+
+def plot_comparing_mods_and_gmods():
+
+    dataset = get_dataset(without_repetition=True, minimum_time_to_consider=0)
+    # we always want all examples here
+    projections, targets, timings, heuristics_costs, ncells = dataset
+
+    guesses_mods = [choose_order_given_projections(projection, method='mods') for projection in projections]
+    timings_mods = [timing[guess] for timing, guess in zip(timings,guesses_mods)]
+    heuristic_costs = [heuristics_cost[target] for heuristics_cost, target in zip(heuristics_costs, targets)]
+
+
+    guesses_gmods = [choose_order_given_projections(projection, method='gmods') for projection in projections]
+    timings_gmods = [timing[guess] for timing, guess in zip(timings,guesses_gmods)]
+
+    virtual_best_timings = [timing[target] for timing, target in zip(timings,targets)]
+    timings_diff = [substract_two_timings(gmods_time,mods_time) for gmods_time, mods_time in zip(timings_gmods, timings_mods)]
+
+    # Create Plot
+
+    fig, ax1 = plt.subplots() 
+    top = 30
+    nticks = 7
+    ax1.set_yticks(np.linspace(-top, top, nticks))
+    
+    ax1.set_xlabel('Time taken by virtual_best ordering', fontsize=fontsize) 
+    ax1.set_ylabel('Ordering difference', color = 'red', fontsize=fontsize) 
+    ax1.plot(virtual_best_timings, timings_diff, '.', color = 'red') 
+    ax1.tick_params(axis ='y', labelcolor = 'red') 
+    
+    # Adding Twin Axes
+
+    ax2 = ax1.twinx() 
+    
+    ax2.set_ylim([-top, top])
+    ax2.set_yticks(np.linspace(-top, top, nticks))
+    ax2.set_ylabel('Heuristic cost', color = 'blue', fontsize=fontsize) 
+    ax2.plot(virtual_best_timings, heuristic_costs, '.', color = 'blue', fontsize=fontsize) 
+    ax2.tick_params(axis ='y', labelcolor = 'blue')
+
+
+    # plt.show()
+    figure_location = os.path.join(folder_figures, 'plot_comparing_mods_and_gmods.png') #, '..\\','Art','adversarial_plot_'+heuristic1+'_vs_'+heuristic2+'.png')
+    plt.savefig(figure_location)
+    
+
+
+def plot_comparing_mods_and_gmods2(max_penalization_if_not_finished=Inf):
+
+    dataset = get_dataset(without_repetition=True, minimum_time_to_consider=0)
+    # we always want all examples here
+    projections, targets, timings, heuristics_costs, ncells = dataset
+
+    virtual_best_timings = [timing[target] for timing, target in zip(timings,targets)]
+
+    guesses_mods = [choose_order_given_projections(projection, method='mods') for projection in projections]
+    choice_timings_including_str_mods = [timing[guess] for timing, guess in zip(timings, guesses_mods)]
+    choice_timings_mods = compute_real_timings(timings, choice_timings_including_str_mods, virtual_best_timings, max_penalization_if_not_finished=max_penalization_if_not_finished) 
+    markups_mods = compute_markups(virtual_best_timings, choice_timings_mods)
+
+    guesses_gmods = [choose_order_given_projections(projection, method='gmods') for projection in projections]
+    choice_timings_including_str_gmods = [timing[guess] for timing, guess in zip(timings, guesses_gmods)]
+    choice_timings_gmods = compute_real_timings(timings, choice_timings_including_str_gmods, virtual_best_timings, max_penalization_if_not_finished=max_penalization_if_not_finished) 
+    markups_gmods = compute_markups(virtual_best_timings, choice_timings_gmods)
+
+    
+
+    # Create Plot
+
+    
+    plt.plot(virtual_best_timings, markups_mods, '.', color = 'red') 
+    plt.plot(virtual_best_timings, markups_gmods, '.', color = 'blue') 
+    plt.xlabel('virtual_best time', fontsize=fontsize)
+    plt.ylabel('Markups', fontsize=fontsize)
+    
+
+    # plt.show()
+    figure_location = os.path.join(folder_figures, 'plot_comparing_mods_and_gmods2.png') #, '..\\','Art','adversarial_plot_'+heuristic1+'_vs_'+heuristic2+'.png')
+    plt.savefig(figure_location)
+
+
+def create_difficulty_histogram():
+    '''This function create a histogram showing the distribution of difficulty among the problems.'''
+
+    dataset = get_dataset(without_repetition=True, minimum_time_to_consider=0)
+    projections, targets, timings, heuristics_costs, ncells = dataset
+
+    op_timings = [timing[target] for timing, target in zip(timings, targets)]
+    plt.yscale('log')
+    plt.hist(op_timings, bins=list(range(0,65,5)))
+    plt.xlabel('seconds in optimal ordering', fontsize=fontsize)
+    plt.ylabel('number of problems', fontsize=fontsize)
+    # plt.show()
+    figure_location = os.path.join(folder_figures, 'histogram_difficulty.png') #, '..\\','Art','adversarial_plot_'+heuristic1+'_vs_'+heuristic2+'.png')
+    plt.savefig(figure_location)
+
+
+
+# create_difficulty_histogram()
\ No newline at end of file
diff --git a/Heuristics/extra_metrics.py b/Heuristics/extra_metrics.py
new file mode 100644
index 0000000..64cfa2c
--- /dev/null
+++ b/Heuristics/extra_metrics.py
@@ -0,0 +1,44 @@
+'''
+This file studies the metrics of the virtual_best and the random choices.
+'''
+
+from .heuristic_tools import finding_time_limit, compute_markups, compute_ncells_markup
+def compute_extra_metrics(heuristic, virtual_best_timings, timings, number_no_timedout, useful_timings, ncells):
+
+    if heuristic == 'virtual_best':
+        metrics = compute_virtual_best_metrics(heuristic, virtual_best_timings)
+    elif heuristic == 'random':
+        metrics = compute_average_metrics(heuristic, virtual_best_timings, timings, number_no_timedout, useful_timings, ncells)
+    return metrics
+
+def compute_virtual_best_metrics(heuristic, virtual_best_timings):
+
+    metrics = dict()
+    metrics['name'] = 'virtual-best'
+    no_samples = len(virtual_best_timings)
+    metrics['accuracy'] = 1
+    metrics['no_samples'], metrics['terminating'], metrics['timeouts_30'], metrics['timeouts_60'] = no_samples, no_samples, 0, 0
+    metrics['markup'], metrics['ncells_markup'] = 0, 0
+    metrics['total_time'] = sum(virtual_best_timings)
+    metrics['perc_found_1'], metrics['perc_found_2'], metrics['perc_found_3'] = 1, 1, 1
+    return metrics
+
+
+def compute_average_metrics(heuristic, virtual_best_timings, timings, number_no_timedout, useful_timings, ncells):
+
+    metrics = dict()
+    metrics['name'] = 'random'
+    no_samples = len(virtual_best_timings)
+    metrics['no_samples'] = no_samples
+    metrics['accuracy'] = 1/6
+    prob_timeouts = [pos_timeout/6 for pos_timeout in number_no_timedout]
+    metrics['terminating'] = no_samples - sum(prob_timeouts)
+    metrics['timeouts_30'] = sum([prob_timeout for prob_timeout,timing in zip(prob_timeouts,timings) if finding_time_limit(timing)==30])
+    metrics['timeouts_60'] = sum([prob_timeout for prob_timeout,timing in zip(prob_timeouts,timings) if finding_time_limit(timing)==60])
+    expected_timings = [sum(useful_timing)/len(useful_timing) for useful_timing in useful_timings]
+    metrics['markup'] = compute_markups(virtual_best_timings, expected_timings)
+    metrics['ncells_markup'] = sum([sum([elem if type(elem)!=str else 10 for elem in ex_ncells])/len(ex_ncells) for ex_ncells in ncells])/len(ncells)
+    metrics['total_time'] = sum(expected_timings)
+    metrics['perc_found_1'], metrics['perc_found_2'], metrics['perc_found_3'] = 1/6, 2/6, 3/6
+
+    return metrics
\ No newline at end of file
diff --git a/Heuristics/heuristic_tools.py b/Heuristics/heuristic_tools.py
new file mode 100644
index 0000000..88455a6
--- /dev/null
+++ b/Heuristics/heuristic_tools.py
@@ -0,0 +1,202 @@
+'''
+This file contains a variety of functions useful for other functions.
+'''
+
+import itertools
+import os
+import pickle
+import math
+import json
+
+from numpy import Inf
+
+all_heuristics = ['sumsignsumdeg','sotd', 'old_mods', 'mods', 'logmods', 'acc_logmods', 'greedy_sotd', 'brown', 'gmods', 'greedy_logmods', 'random', 'virtual_best']
+for degree_type in ['svdeg', 'signdeg', 'deg']:
+    for monomial_operation in ['sum','max','aveg']:
+        for polynomial_operation in ['sum','max','aveg']:
+            all_heuristics += [polynomial_operation + monomial_operation + degree_type]
+    
+expensive_heuristics = ['sotd', 'old_mods', 'mods', 'logmods', 'acc_logmods']
+greedy_heuristics = [heuristic for heuristic in all_heuristics if heuristic not in expensive_heuristics]
+not_heuristics = ['virtual_best', 'random']
+
+
+
+def create_pseudorderings(nvar):
+    '''
+    This function returns the possible orderings for CAD but written in a different fashion
+    that makes more sense for the way projections are saved
+    '''
+    l = [list(range(nvar-i)) for i in range(nvar)]
+    return itertools.product(*l)
+
+
+def compute_real_timings(timings, choice_timings_including_str, virtual_best_timings, max_penalization_if_not_finished=Inf):
+    '''
+    This function returns the timing that are used for computing the metrics
+    '''
+    return [ choice_time if type(choice_time) is not str else min(max_penalization_if_not_finished*virtual_best_time,2*finding_time_limit(timing)) for timing, choice_time, virtual_best_time in zip(timings, choice_timings_including_str, virtual_best_timings)]
+
+def compute_markups(virtual_best_timings, real_timings, smoother = 1):
+    ''' 
+    This function computes the markups of the chosen orderings
+    with respect to the virtual_best orderings.
+    The smoother is a parameter applied to avoid unreasonable markups.
+    '''
+    # the especified smoother is used
+    
+    markups = [(choice_timing-virtual_best_timing)/(virtual_best_timing+smoother) for virtual_best_timing, choice_timing in zip(virtual_best_timings, real_timings)] 
+    return sum(markups)/len(markups)
+    
+def compute_ncells_markup(ncells, guesses,ncell_markup_default = 10):
+    ncells_markups = [ncell[guess]/min([nc for nc in ncell if type(nc)!=str])-1 if type(ncell[guess])!=str else ncell_markup_default for ncell, guess in zip(ncells, guesses)]
+    ncells_markup = sum(ncells_markups)/len(ncells_markups)
+    return ncells_markup
+
+def order_mate(order):
+    '''Returns the order that shares the first variable projected with the given order'''
+    if order == 0:
+        return 1
+    elif order == 1:
+        return 0
+    elif order == 2:
+        return 3
+    elif order == 3:
+        return 2
+    elif order == 4:
+        return 5
+    elif order == 5:
+        return 4
+    else:
+        raise Exception('Order too big')
+
+
+def finding_time_limit(timings):
+    '''
+    Returns the timelimit that was given.
+    '''
+    if min([timing for timing in timings if type(timing) is not str])>30:
+        return 60
+    else:
+        return 30
+
+
+def minimum_indices(given_list):
+    '''
+    Returns the indices containing the minima of a list.
+    Helpful function for the function above
+    '''
+    minimum = min(given_list)
+    return [index for index, value in enumerate(given_list) if value == minimum]
+
+
+def multiplyList(myList) :
+    '''
+    Multiplies all the elements in a list
+    '''
+    result = 1
+    for x in myList:
+         result = result * x
+    return result
+
+
+def all_combinations(l):
+    '''
+    Returns all possible combinations of a given list.
+    More concretely, all possible subsets ordered in all possible ways.
+    '''
+    combs_with_order = []
+    for i in range(1,len(l)+1):
+        combs=list(itertools.combinations(l,i))
+        for comb in combs:
+            combs_with_order+=list(itertools.permutations(comb,i))
+    return combs_with_order
+
+
+def all_combinations_fixed_length(l, i):
+    '''
+    Returns all possible combinations of a given list.
+    More concretely, all possible subsets ordered in all possible ways.
+    '''
+    combs_with_order = []
+    combs=list(itertools.combinations(l,i))
+    for comb in combs:
+        combs_with_order+=list(itertools.permutations(comb,i))
+    return combs_with_order
+
+
+def trim_dataset(dataset, minimum_time_to_consider=0):
+    '''
+    Returns the dataset containing only the problems that took 
+    at least 'minimum_time_to_consider' seconds to finish.
+    '''
+    projections, targets, timings, heuristics_costs, ncells = dataset
+    new_projections = [projection for projection, timing, target in zip(projections, timings, targets) if timing[target]>minimum_time_to_consider]
+    new_targets = [target for target, timing in zip(targets, timings) if timing[target]>minimum_time_to_consider]
+    new_timings = [timing for timing, target in zip(timings, targets) if timing[target]>minimum_time_to_consider]
+    new_heuristics_costs = [heuristics_cost for heuristics_cost, timing, target in zip(heuristics_costs, timings, targets) if timing[target]>minimum_time_to_consider]
+    new_ncells = [ncells for ncells, timing, target in zip(ncells, timings, targets) if timing[target]>minimum_time_to_consider]
+
+    return new_projections, new_targets, new_timings, new_heuristics_costs, new_ncells
+
+
+def get_dataset(without_repetition=True, return_ncells=True, minimum_time_to_consider=0):
+    '''
+    Uploads the desired dataset from its location
+    '''
+
+    if without_repetition:
+        aux_name = 'without_repetition'
+    else:
+        aux_name = 'with_repetition'
+
+    if return_ncells:
+        dataset_location = os.path.join(os.path.dirname(__file__), '..','Datasets','ThreeVariableSMTLIB2021','dataset_'+aux_name+'_return_ncells.txt')
+        #dataset_location = 'C:\\Users\\delriot\\OneDrive - Coventry University\\03Repositories\\01DEWCADCoventry\\Datasets\\dataset_without_repetition_return_ncells.txt'
+    else:
+        dataset_location = os.path.join(os.path.dirname(__file__), '..','Datasets','dataset_'+aux_name+'.txt')
+
+    f = open(dataset_location, 'rb')
+    dataset = pickle.load(f)
+    f.close()
+
+    return trim_dataset(dataset, minimum_time_to_consider=minimum_time_to_consider)
+
+   
+def aveg_of_not_zero(given_list):
+    '''
+    Takes the average of a list without considering the elements that are 0.'''
+    s= sum(given_list)
+    if s>0:
+       return s/sum([1 for elem in given_list if elem>0])
+    else:
+        return 0
+
+def substract_two_timings(time1, time2):
+    '''time1 minus time2'''
+    if type(time1) is str and type(time2) is str:
+        return 0
+    elif type(time1) is str and type(time2) is not str:
+        return 30
+    elif type(time1) is not str and type(time2) is str:
+        return -30
+    elif type(time1) is not str and type(time2) is not str:
+        return time1-time2
+
+# This is how to save the best features
+file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'best_features')
+# best_features = ['summaxdeg', 'avegavegdeg', 'sumsumdeg', 'avegavegsigndeg' , 'sumsignsumdeg', 'summaxsvdeg']# , 'sumsumsigndeg', 'sumsumsvdeg'
+# with open(file_path, 'w') as file:
+#     json.dump(best_features, file)
+
+# This is how to load the best features
+with open(file_path, 'r') as file:
+    best_features = json.load(file)
+
+paper_all_pos = all_combinations(best_features)
+indices = list(range(len(best_features)))
+paper_all_indices = [str(elem).replace(', ','>').replace('(','').replace(')','') for elem in all_combinations(indices)]
+existing_heuristics = ['brown', 'mods', 36, 'random', 'virtual_best'] # 36 is gmods
+survival_plot_heuristics = ['virtual_best', 36, 'brown']
+ml_models = []
+
diff --git a/Heuristics/heuristics_guess.py b/Heuristics/heuristics_guess.py
new file mode 100644
index 0000000..01bdc29
--- /dev/null
+++ b/Heuristics/heuristics_guess.py
@@ -0,0 +1,97 @@
+'''
+This file contains the functions that given all projections with 
+all possible orderings, return the ordering that would have been
+choose by the desired heuristic.
+'''
+
+import random
+from math import factorial
+from .heuristics_rules import *
+from .heuristic_tools import greedy_heuristics, expensive_heuristics, create_pseudorderings, ml_models
+
+
+def choose_order_given_projections(projections, method="gmods"):
+    '''Returns the order guessed by the heuristic requested'''
+    if method in greedy_heuristics or type(method) == int:
+        guess = greedy_heuristic_guess(projections, heuristic=method)
+        return guess
+    elif method in expensive_heuristics:
+        return no_greedy_heuristic_guess(projections, heuristic=method)
+    elif method in ml_models:
+        return ml_model_guess(projections, method=method)
+    else:
+        raise Exception(f'Heuristic not recognised:{method}.')
+
+
+def greedy_heuristic_guess(projections:list, heuristic:str="gmods"):
+    '''
+    This function is especialized in greedy heuristics.
+    One variable is picked at a time, adjusting the ordering accordingly.
+    '''
+    order = 0  # we start assuming that the best order is the first one
+    nvar = len(projections[0])  # the number of variables corresponds with the length of the list describing one of the projections
+
+    for i in range(nvar):
+        # projections[order] is the projection that if chosen order we assume to be the best. All orders we can still choose from are equal to this one until this point
+        try:
+            if heuristic != 'greedy_sotd':
+                new_var = greedy_choose_variable(projections[order][i], heuristic=heuristic)
+            elif i < nvar-1:
+                new_var = greedy_choose_variable([projections[ordering][i+1] for ordering in range(factorial(nvar)) if projections[ordering][i]== projections[order][i]], heuristic=heuristic)
+            else:
+                new_var = 0
+        except IndexError:
+            # The reason of this error is probably that the computation of the projection did not go further, in this case we return the current order
+            return order
+
+        if type(new_var) == str:
+            return order
+        order = order + factorial(nvar-i-1) * new_var  # the best order is updated with the new information
+    return order  # the final best order is returned
+
+
+def not_greedy_heuristic_guess(original_polynomials: list,
+                               heuristic: str = "gmods"):
+    '''
+    This function is especialized in not greedy heuristics.
+    All variables are picked from the original polynomials.
+    '''
+    order = 0  # we start assuming that the best order is the first one
+    order_measure = get_order_measure(heuristic, if_tie=None)
+    degrees_list, nvar = get_degree_list(original_polynomials, heuristic)
+    variables = list(range(nvar))
+    ordering = []
+
+    while len(variables) != 0:
+        best_vars = variables
+        for measure in order_measure:
+            best_vars = choose_variables_minimizing(degrees_list, measure='gmods', var_list=best_vars)
+        random.shuffle(best_vars)
+        # print('best vars shuffled', best_vars)
+        ordering += best_vars
+        variables = [var for var in variables if var not in ordering]
+    assignment = {'[0, 1, 2]': 0, '[0, 2, 1]': 1,
+                  '[1, 0, 2]': 2, '[1, 2, 0]': 3,
+                  '[2, 0, 1]': 4, '[2, 1, 0]': 5,
+                  }
+    order = assignment[str(ordering)]
+    # order = order + factorial(nvar-i-1) * new_var # the best order is updated with the new information
+    return order  # the final best order is returned
+
+
+def no_greedy_heuristic_guess(projections:list, heuristic:str="old_mods"):
+    '''
+    Looking at the same time at all the projections, 
+    the no greedy heuristics make an ordering choice.
+    '''
+    if heuristic == "sotd":
+        sotd_values = [sum([degree for level in projection for polynomial in level for monomial in polynomial for degree in monomial[:-1]]) for projection in projections]
+        return min(range(len(sotd_values)), key=sotd_values.__getitem__) # returns the index with the smallest value in the list sotd_values
+    elif heuristic in ["old_mods", "logmods", "mods", "acc_logmods"]:
+        nvar = len(projections[0])
+        pseudorderings = create_pseudorderings(nvar)
+        relevant_degrees = [[[max([monomial[var] for monomial in polynomial]) for polynomial in level] for level,var in zip(projection,pseudordering)] for projection, pseudordering in zip(projections, pseudorderings)] # This returns a list of lists, each of those lists correspond to a projection. Those lists contain lists of the degrees of the polynomials in each level wrt the variable that will be projected after.
+        heuristic_dict = {'old_mods':old_mods_guess, 'mods':mods_guess,'logmods':logmods_guess, 'acc_logmods':acc_logmods_guess}
+        return heuristic_dict[heuristic](relevant_degrees)
+    else:
+        raise Exception("Heuristic "+heuristic+" not found.")
diff --git a/Heuristics/heuristics_rules.py b/Heuristics/heuristics_rules.py
new file mode 100644
index 0000000..60ff9bf
--- /dev/null
+++ b/Heuristics/heuristics_rules.py
@@ -0,0 +1,193 @@
+'''
+This folder contains all the details necessary for the 
+different heuristics to make their choices.
+'''
+
+import os
+import numpy as np
+from math import log
+import itertools
+import random
+from .heuristic_tools import multiplyList, all_combinations, minimum_indices, aveg_of_not_zero, paper_all_pos
+
+
+def choose_variables_minimizing(degrees_list, measure='gmods', var_list=''):
+    '''Given a list the degrees of polynomials returns the list of variables that minimise the measure desired'''
+    if measure != 'greedy_sotd':
+        nvar = len(degrees_list[0][0])  # the number of variables will be the same everywhere, we check the first monomial of the first polynomial
+    else:
+        nvar = len(degrees_list[0][0][0])
+    if var_list == '':  # if the value is the default one
+        var_list = range(nvar)
+
+    if measure == 'gmods':
+        sum_degree_polys = [sum([max([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degree_polys)] # var_list is filtered
+    if measure == 'ali_aveg':
+        av_degree_polys_with_var = [aveg_of_not_zero([max([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(av_degree_polys_with_var)] # var_list is filtered
+    elif measure == 'greedy_logmods':
+        sum_degrees_overall_polys = [sum([log(max([1]+[monomial[var] for monomial in polynomial])) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about.
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)]
+    elif measure == 'brown1':
+        max_degrees_polywise = [max([max([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the maximum degree in the polynomials is computed.
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(max_degrees_polywise)]
+    elif measure == 'brown2':
+        max_degrees_polywise = [max([max([0]+[monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the maximum degree in the polynomials is computed.
+
+        degrees_of_monomials_with_max_degrees = [max([max([0]+[sum(monomial) for monomial in polynomial if monomial[var]==max_degrees_polywise[var]]) for polynomial in degrees_list]) for var in var_list] # for each variable, the maximum degree in the polynomials is computed.
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(degrees_of_monomials_with_max_degrees)]
+    elif measure == 'brown3':
+        number_appearances = [sum([sum([np.sign(monomial[var]) for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # the number of monomials in which the variables appear is counted
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(number_appearances)]
+    # elif measure == 'avegmaxsvdeg':
+    #     sum_degrees_overall_polys = [np.average([max([sum(monomial) for monomial in polynomial if monomial[var]>0]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    # elif measure == 'maxsumsvdeg':
+    #     sum_degrees_overall_polys = [max([sum([sum(monomial) for monomial in polynomial if monomial[var]>0]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    # elif measure == 'avegsumsvdeg':
+    #     sum_degrees_overall_polys = [np.average([sum([sum(monomial) for monomial in polynomial if monomial[var]>0]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    # elif measure == 'avegsumdeg':
+    #     sum_degrees_overall_polys = [np.average([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    # elif measure == 'avegavegdeg':
+    #     aveg_degrees_overall_polys = [np.average([np.average([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(aveg_degrees_overall_polys)] # var_list is filtered
+
+    # elif measure == 'maxsumdeg':
+    #     sum_degrees_overall_polys = [max([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    elif measure == 'sumsignsumdeg':
+        sum_degrees_overall_polys = [np.sum(np.sign([np.sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list])) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    # elif measure == 'sumsumdeg':
+    #     sum_degrees_overall_polys = [sum([sum([monomial[var] for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    # elif measure == 'avegvegsigndeg':
+    #     sum_degrees_overall_polys = [np.average([np.average([np.sign(monomial[var]) for monomial in polynomial]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    # elif measure == 'sumsumsvdeg':
+    #     sum_degrees_overall_polys = [sum([sum([sum(monomial) for monomial in polynomial if monomial[var]>0]) for polynomial in degrees_list]) for var in var_list] # for each variable, the total degree of each polynomial is computed. Then for each variable this values are added because is what we really care about. 
+    #     return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_degrees_overall_polys)] # var_list is filtered
+    elif measure == 'greedy_sotd':
+        sum_total_degrees = [sum([sum(monomial) for polynomial in possible_proj_set for monomial in polynomial]) for possible_proj_set in degrees_list]
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(sum_total_degrees)] # var_list is filtered
+    elif measure == 'random':
+        return [random.choice(var_list)]
+    elif measure == 'first':
+        return [var_list[0]]
+    elif measure == 'last':
+        return [var_list[-1]]
+    elif type(measure)==str:
+        if measure[-5:]=='svdeg':
+            measure = measure[:-5]
+            monomial_numbers = [[[sum(monomial) for monomial in polynomial if monomial[var]>0] for polynomial in degrees_list] for var in var_list]
+        elif measure[-7:]=='signdeg':
+            measure = measure[:-7]
+            monomial_numbers = [[[np.sign(monomial[var]) for monomial in polynomial] for polynomial in degrees_list] for var in var_list]
+        elif measure[-3:]== 'deg':
+            measure = measure[:-3]
+            monomial_numbers = [[[monomial[var] for monomial in polynomial] for polynomial in degrees_list] for var in var_list]
+        else:
+            raise Exception(measure+" is not a valid measure")
+
+        if measure[-3:] == 'sum':
+            measure = measure[:-3]
+            polynomial_numbers = [[sum(monomial_numbers_in_poly) for monomial_numbers_in_poly in var_monomial_numbers] for var_monomial_numbers in monomial_numbers]
+        elif measure[-3:] == 'max':
+            measure = measure[:-3]
+            polynomial_numbers = [[max(monomial_numbers_in_poly) if len(monomial_numbers_in_poly)>0 else 0 for monomial_numbers_in_poly in var_monomial_numbers] for var_monomial_numbers in monomial_numbers]
+        elif measure[-4:] == 'aveg':
+            measure = measure[:-4]
+            polynomial_numbers = [[np.average(monomial_numbers_in_poly) for monomial_numbers_in_poly in var_monomial_numbers] for var_monomial_numbers in monomial_numbers]
+        else:
+            raise Exception("Not a valid measure - maybe add the possibility of sign here")
+
+        if measure == 'sum':
+            set_numbers = [sum(var_polynomial_numbers) for var_polynomial_numbers in polynomial_numbers]
+        elif measure == 'max':
+            set_numbers = [max(var_polynomial_numbers) if len(var_polynomial_numbers)>0 else 0 for var_polynomial_numbers in polynomial_numbers]
+        elif measure == 'aveg':
+            set_numbers = [np.average(var_polynomial_numbers) for var_polynomial_numbers in polynomial_numbers]
+        else:
+            raise Exception("Not a valid measure")
+        return [var_list[i] for i in range(len(var_list)) if i in minimum_indices(set_numbers)]  # var_list is filtered
+
+
+def get_order_measure(heuristic, if_tie='random'):
+    if heuristic == 'brown':
+        order_measure = ['brown1', 'brown2', 'brown3', if_tie]
+    elif type(heuristic) == int:
+        order_measure = list(paper_all_pos[heuristic])+[if_tie]
+    else:
+        order_measure = [heuristic, if_tie]
+    return order_measure
+
+
+def get_degree_list(poly_list, heuristic):
+    if heuristic != 'greedy_sotd':
+        degrees_list = [[monomial[:-1] for monomial in polynomial] for polynomial in poly_list] # the same list without the coefficients
+        nvar = len(degrees_list[0][0])  # the number of variables will be the same everywhere, we check the first monomial of the first polynomial
+    else:
+        degrees_list = [[[monomial[:-1] for monomial in polynomial] for polynomial in polys] for polys in poly_list] # the same list without the coefficients
+        nvar = len(degrees_list[0][0][0])
+    # if degrees_list == []:  # idk why this happens but we just return this sentence
+    #     return "The list given is empty"
+    return degrees_list, nvar
+
+
+def greedy_choose_variable(poly_list, heuristic='gmods'):
+    '''Given a list of polynomials returns the variable that the gmods heuristic would choose to project next'''
+
+    order_measure = get_order_measure(heuristic, if_tie='random')
+    degrees_list, nvar = get_degree_list(poly_list, heuristic)
+    best_vars = range(nvar)
+    n_random_choice = 1
+    while len(best_vars) > 1:
+        measure = order_measure.pop(0)
+        if measure == 'random':
+            # if we reach random we save how many variables are left
+            n_random_choice = len(best_vars)
+        best_vars = choose_variables_minimizing(degrees_list, measure=measure, var_list=best_vars)
+    # The following three lines are just used to answer a question from the reviewers
+    if nvar == 3 and (heuristic == 'gmods' or heuristic == 36 or heuristic == 'brown'):
+        file_random_name = os.path.join(os.path.dirname(__file__), '..', 'Datasets', f"{heuristic}_random_choices.txt")
+        with open(file_random_name, 'a') as f:
+            f.write(f"{n_random_choice}, ")
+    return best_vars[0]
+
+
+##
+# Rules for expensive heuristics
+##
+
+def old_mods_guess(mrd):#mrd->old_mods_relevant_degrees
+    '''Computes the best order according to the old_mods heuristic (multiplication of relative degrees).'''
+    old_mods_values = [multiplyList([sum([degree for degree in level_mrd if degree!=0]) for level_mrd in proj_mrd]) for proj_mrd in mrd]
+    return min(range(len(old_mods_values)), key=old_mods_values.__getitem__) # returns the index with the smallest value in the list old_mods_values
+
+
+def logmods_guess(mrd):
+    '''Computes the best order according to the logmods heuristic (multiplication of the logarithm of relative degrees).'''
+    logmods_values = [multiplyList([sum([log(degree) for degree in level_mrd if degree!=0]) for level_mrd in proj_mrd]) for proj_mrd in mrd]
+    return min(range(len(logmods_values)), key=logmods_values.__getitem__) # returns the index with the smallest value in the list logmods_values
+
+
+def mods_guess(mrd):
+    '''Computes the best ordering minimizing the maximum number of cells in the final CAD.'''
+    mods_values = [multiplyList([1+2*sum([degree for degree in level_mrd if degree!=0]) for level_mrd in proj_mrd]) for proj_mrd in mrd]
+    return min(range(len(mods_values)), key=mods_values.__getitem__) # returns the index with the smallest value in the list old_mods_values
+
+
+def super_mods_guess(mrd):
+    '''Computes the best ordering minimizing the maximum number of cells in all the CADs needed to build the final CAD.'''
+    mods_values = [sum([multiplyList([1+2*sum([degree for degree in level_mrd if degree!=0]) for level_mrd in proj_mrd[:i+1]]) for i in range(len(proj_mrd))])for proj_mrd in mrd]
+    return min(range(len(mods_values)), key=mods_values.__getitem__) # returns the index with the smallest value in the list old_mods_values
+
+
+def acc_logmods_guess(mrd):
+    '''Computes the best order according to the logmods heuristic (multiplication of the logarithm of relative degrees).'''
+    acc_logmods_values = [multiplyList([1+2*sum([log(degree) for degree in level_mrd if degree!=0]) for level_mrd in proj_mrd]) for proj_mrd in mrd]
+    return min(range(len(acc_logmods_values)), key=acc_logmods_values.__getitem__) # returns the index with the smallest value in the list logmods_values
diff --git a/choose_hyperparams.py b/choose_hyperparams.py
index 53e51c1..8a9a54c 100644
--- a/choose_hyperparams.py
+++ b/choose_hyperparams.py
@@ -3,7 +3,7 @@
 import csv
 from config.ml_models import ml_models
 from config.ml_models import sklearn_models
-from config.ml_models import dataset_types
+from config.general_values import dataset_qualities
 from config.hyperparameters_grid import grid
 from sklearn.model_selection import GridSearchCV
 from yaml_tools import write_yaml_to_file
@@ -56,17 +56,17 @@ def choose_hyperparams(ml_model, method):
 #                                     'ml_results_k_fold_tested_in_balanced.csv')
 # with open(output_file_balanced, 'w') as f_balanced:
 #     writer_balanced = csv.writer(f_balanced)
-#     writer_balanced.writerow(["Name"] + dataset_types)
+#     writer_balanced.writerow(["Name"] + dataset_qualities)
 #     output_file_normal = os.path.join(os.path.dirname(__file__),
 #                                       'ml_results_k_fold_tested_in_normal.csv')
 #     with open(output_file_normal, 'w') as f_normal:
 #         writer_normal = csv.writer(f_normal)
-#         writer_normal.writerow(["Name"] + dataset_types)
+#         writer_normal.writerow(["Name"] + dataset_qualities)
 #         for ml_model in ml_models:
 #             print(f"Model: {ml_model}")
 #             acc_balanced = dict()
 #             acc_normal = dict()
-#             for method in dataset_types:
+#             for method in dataset_qualities:
 #                 this_dataset_file = os.path.join(os.path.dirname(__file__),
 #                                                  'datasets', 'train',
 #                                                  f'{method}_train_dataset.txt')
@@ -91,10 +91,10 @@ def choose_hyperparams(ml_model, method):
 #                     pickle.dump(clf, method_file)
 #             round_accuracies_balanced = [round(acc, 2)
 #                                          for acc in [acc_balanced[method_here]
-#                                          for method_here in dataset_types]]
+#                                          for method_here in dataset_qualities]]
 #             round_accuracies_normal = [round(acc, 2)
 #                                        for acc in [acc_normal[method_here]
-#                                        for method_here in dataset_types]]
+#                                        for method_here in dataset_qualities]]
 #             writer_balanced.writerow([ml_model] + round_accuracies_balanced)
 #             writer_normal.writerow([ml_model] + round_accuracies_normal)
 
diff --git a/create_clean_dataset.py b/create_clean_dataset.py
index 4d60cc6..97c3114 100644
--- a/create_clean_dataset.py
+++ b/create_clean_dataset.py
@@ -17,7 +17,7 @@
 
 def create_dataframe(dataset):
     all_features = []
-    all_targets = dataset[1][:]
+    all_labels = dataset[1][:]
     all_timings = dataset[2][:]
     all_original_polynomials = []
     for index, all_projections in enumerate(dataset[0]):
@@ -28,7 +28,7 @@ def create_dataframe(dataset):
                                    determine_standarization=True,
                                    determine_unique_features=True)
     return np.array(all_original_polynomials), np.array(names),\
-        np.array(all_features), np.array(all_targets), np.array(all_timings)
+        np.array(all_features), np.array(all_labels), np.array(all_timings)
 
 
 # dataset_filename = os.path.join(os.path.dirname(__file__),
@@ -36,7 +36,7 @@ def create_dataframe(dataset):
 #                                 'dataset_without_repetition_return_ncells.txt')
 # with open(dataset_filename, 'rb') as f:
 #     dataset = pickle.load(f)
-# original_polys_list, names, features_list, targets_list, timings_list =\
+# original_polys_list, names, features_list, labels_list, timings_list =\
 #     create_dataframe(dataset)
 
 
@@ -63,6 +63,7 @@ def cleaning_dataset():
     for key in my_dataset:
         if key not in clean_dataset:
             clean_dataset[key] = my_dataset[key]
+    print("CLEAN", clean_dataset.keys())
     with open(clean_dataset_filename, 'wb') as clean_dataset_file:
         pickle.dump(clean_dataset, clean_dataset_file)
 
diff --git a/datasets/clean_dataset.txt b/datasets/clean_dataset.txt
index 24c1b18..b7d2456 100644
Binary files a/datasets/clean_dataset.txt and b/datasets/clean_dataset.txt differ
diff --git a/datasets/test/augmented_test_dataset.txt b/datasets/test/augmented_test_dataset.txt
index e1ab3ef..36c4323 100644
Binary files a/datasets/test/augmented_test_dataset.txt and b/datasets/test/augmented_test_dataset.txt differ
diff --git a/datasets/test/balanced_test_dataset.txt b/datasets/test/balanced_test_dataset.txt
index 1712401..1a6fd17 100644
Binary files a/datasets/test/balanced_test_dataset.txt and b/datasets/test/balanced_test_dataset.txt differ
diff --git a/datasets/test/normal_test_dataset.txt b/datasets/test/normal_test_dataset.txt
index 72b027d..5b49302 100644
Binary files a/datasets/test/normal_test_dataset.txt and b/datasets/test/normal_test_dataset.txt differ
diff --git a/datasets/train/augmented_train_dataset.txt b/datasets/train/augmented_train_dataset.txt
index 3ed1c22..a52fea7 100644
Binary files a/datasets/train/augmented_train_dataset.txt and b/datasets/train/augmented_train_dataset.txt differ
diff --git a/datasets/train/balanced_train_dataset.txt b/datasets/train/balanced_train_dataset.txt
index 0b42a35..28fd1ac 100644
Binary files a/datasets/train/balanced_train_dataset.txt and b/datasets/train/balanced_train_dataset.txt differ
diff --git a/datasets/train/normal_train_dataset.txt b/datasets/train/normal_train_dataset.txt
index 21b4a44..347a778 100644
Binary files a/datasets/train/normal_train_dataset.txt and b/datasets/train/normal_train_dataset.txt differ
diff --git a/main.py b/main.py
index 04571f1..ab1064e 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,7 @@
 """
 The experiments in [1] are replicated with some changes.
 
-The first change is that the testing data is balanced, so that all targets
+The first change is that the testing data is balanced, so that all labels
 are almost equally common.
 Then we use three training sets; dataset as in [1], balanced dataset
 and data augmentation dataset.
@@ -14,7 +14,8 @@
 """
 import csv
 from config.ml_models import ml_models
-from config.ml_models import dataset_types
+from config.general_values import dataset_qualities
+from config.general_values import purposes
 from find_filename import find_dataset_filename
 from find_filename import find_model_filename
 from create_clean_dataset import cleaning_dataset
@@ -32,19 +33,19 @@
 # tune_hyperparameters = False
 paradigm = 'classification'
 
-# cleaning_dataset()
+cleaning_dataset()
 create_train_test_datasets()
 
 # if tune_hyperparameters:
 #     for ml_model in ml_models:
-#         for method in dataset_types:
+#         for method in dataset_qualities:
 #             print(f"Choosing hyperparameters for {ml_model} in {method}")
 #             choose_hyperparams(ml_model, method)
-# for ml_model in ml_models:
-#     print(f"Training {ml_model}")
-#     for method in dataset_types:
-#         print(f"for {method}")
-#         train_model(ml_model, method)
+for ml_model in ml_models:
+    print(f"Training {ml_model}")
+    for method in dataset_qualities:
+        print(f"for {method}")
+        train_model(ml_model, method)
 training_method = 'augmented'
 testing_method = 'augmented'
 first_time = 1
@@ -56,6 +57,7 @@
         first_time = 0
         keys = list(metrics.keys())
         with open(output_file, 'a') as f:
+            f.write('No more cheating\n')
             f.write(', '.join(['Model'] + keys) + '\n')
     with open(output_file, 'a', newline='') as f:
         writer = csv.writer(f)
@@ -70,7 +72,7 @@
 # with open("classification_output_timings.csv", 'w') as f:
 #     f.write("model, Normal, Balanced, Augmented\n")
 # for ml_model in ml_models:
-#     for training_method in dataset_types:
+#     for training_method in dataset_qualities:
 #         trained_model_filename = find_model_filename(training_method,
 #                                                      ml_model)
 #         accuracy = test_model(trained_model_filename,
diff --git a/packages/dataset_manipulation/dataset_manipulation.py b/packages/dataset_manipulation/dataset_manipulation.py
index b92263e..4e18fa8 100644
--- a/packages/dataset_manipulation/dataset_manipulation.py
+++ b/packages/dataset_manipulation/dataset_manipulation.py
@@ -9,7 +9,7 @@
 nvar = 3
 
 
-def augmentate_dataset(features, targets, timings):
+def augmentate_dataset(features, targets, timings, cells):
     """
     Multiply the size of the dataset by 6.
 
@@ -20,16 +20,19 @@ def augmentate_dataset(features, targets, timings):
     symmetric_features = []
     symmetric_targets = []
     symmetric_timings = []
-    for features, target, timing in zip(features, targets, timings):
+    symmetric_cells = []
+    for features, target, timing, cell in \
+            zip(features, targets, timings, cells):
         symmetric_features += give_all_symmetries(features, int(target))
         symmetric_targets += list(range(math.factorial(nvar)))
         symmetric_timings += augmentate_timings(timing, int(target))
+        symmetric_cells += augmentate_timings(cell, int(target))
 
     return np.array(symmetric_features), np.array(symmetric_targets), \
-        np.array(symmetric_timings)
+        np.array(symmetric_timings), np.array(symmetric_cells)
 
 
-def balance_dataset(features, targets, timings):
+def balance_dataset(features, targets, timings, cells):
     """
     Balance the dataset so all targets are almost equally common.
 
@@ -40,15 +43,19 @@ def balance_dataset(features, targets, timings):
     balanced_features = []
     balanced_targets = []
     balanced_timings = []
-    for features, target, timing in zip(features, targets, timings):
+    balanced_cells = []
+    for features, target, timing, cell in \
+            zip(features, targets, timings, cells):
         symmetric_features = give_all_symmetries(features, int(target))
         symmetric_timings = augmentate_timings(timing, int(target))
+        symmetric_cells = augmentate_timings(cell, int(target))
         new_target = random.choice(list(range(math.factorial(nvar))))
         balanced_features.append(symmetric_features[new_target])
         balanced_targets.append(new_target)
         balanced_timings.append(symmetric_timings[new_target])
+        balanced_cells.append(symmetric_cells[new_target])
     return np.array(balanced_features), np.array(balanced_targets),\
-        np.array(balanced_timings)
+        np.array(balanced_timings), np.array(balanced_cells)
 
 
 def name_unique_features(names, features):
@@ -94,7 +101,8 @@ def remove_notunique_features(names, features, nvar=3):
     # creating some targets and timing because the function requires them
     targets = [0]*len(features)
     timings = [list(range(math.factorial(nvar)))]*len(features)
-    augmented_features, _, _ = augmentate_dataset(features, targets, timings)
+    cells = [list(range(math.factorial(nvar)))]*len(features)
+    augmented_features, _, _, _ = augmentate_dataset(features, targets, timings, cells)
     # normalized_augmented_features = normalize(augmented_features)
     unique_names = name_unique_features(names, augmented_features)
     unique_features = []
diff --git a/replicating_Dorians_features.py b/replicating_Dorians_features.py
index 3260c82..a65bccf 100644
--- a/replicating_Dorians_features.py
+++ b/replicating_Dorians_features.py
@@ -64,7 +64,7 @@ def create_features(degrees, variable=0, sv=False,
 def extract_features(dataset):
     my_dataset = dict()
     all_features = []
-    all_targets = []
+    all_labels = []
     all_timings = []
     all_original_polynomials = []
     all_projections = []
@@ -75,7 +75,7 @@ def extract_features(dataset):
         # the original polynomials are the initial polynomials of any
         # of the possible projections (also of the first one)
         all_original_polynomials.append(original_polynomials)
-        all_targets.append(dataset[1][index])
+        all_labels.append(dataset[1][index])
         all_timings.append(dataset[2][index])
         all_cells.append(dataset[3][index])
         names, instance_features = features_from_set_of_polys(
@@ -84,7 +84,7 @@ def extract_features(dataset):
     my_dataset['polynomials'] = np.array(all_original_polynomials)
     my_dataset['names'] = np.array(names)
     my_dataset['features'] = np.array(all_features)
-    my_dataset['targets'] = np.array(all_targets)
+    my_dataset['labels'] = np.array(all_labels)
     my_dataset['timings'] = np.array(all_timings)
     my_dataset['projections'] = np.array(all_projections)
     my_dataset['cells'] = np.array(all_cells)
diff --git a/test_train_datasets.py b/test_train_datasets.py
index fd53ce1..8777fcc 100644
--- a/test_train_datasets.py
+++ b/test_train_datasets.py
@@ -10,6 +10,8 @@
     from packages.dataset_manipulation import remove_notunique_features
     from packages.dataset_manipulation import balance_dataset
     from packages.dataset_manipulation import augmentate_dataset
+from config.general_values import purposes
+from config.general_values import dataset_qualities
 from sklearn.model_selection import train_test_split
 from find_filename import find_dataset_filename
 from find_filename import find_other_filename
@@ -30,57 +32,87 @@ def create_train_test_datasets():
     # maybe it's better to create a dictionary for each dataset:
     # train/test, normal/balanced/augmented
     ###
-    x = dict()  # to keep the features
-    y = dict()  # to keep the labels
-    t = dict()  # to keep the timings
-    p = dict()  # to keep the projections
-    c = dict()  # to keep the number of cells
+    datasets = dict()
+    for purpose in purposes:
+        for quality in dataset_qualities:
+            datasets[purpose + '_' + quality] = dict()
     # train and test sets are created
     random_state = 0
-    x['train_normal'], x['test_normal'], \
-        y['train_normal'], y['test_normal'], \
-        t['train_normal'], t['test_normal'], \
-        p['train_normal'], p['test_normal'] = \
+    print(dataset.keys())
+    datasets['Train_Normal']['features'], \
+        datasets['Test_Normal']['features'], \
+        datasets['Train_Normal']['labels'], \
+        datasets['Test_Normal']['labels'], \
+        datasets['Train_Normal']['timings'], \
+        datasets['Test_Normal']['timings'], \
+        datasets['Train_Normal']['projections'], \
+        datasets['Test_Normal']['projections'], \
+        datasets['Train_Normal']['cells'], \
+        datasets['Test_Normal']['cells'] = \
         train_test_split(dataset['features'],
-                         dataset['targets'],
+                         dataset['labels'],
                          dataset['timings'],
                          dataset['projections'],
+                         dataset['cells'],
                          test_size=0.20,
                          random_state=random_state)
-    for purpose in ['train', 'test']:
-        x[f'{purpose}_balanced'], y[f'{purpose}_balanced'], t[f'{purpose}_balanced'] = balance_dataset(x[f'{purpose}_normal'], y[f'{purpose}_normal'], t[f'{purpose}_normal'])
-        x[f'{purpose}_augmented'], y[f'{purpose}_augmented'], t[f'{purpose}_augmented'] = augmentate_dataset(x[f'{purpose}_normal'], y[f'{purpose}_normal'], t[f'{purpose}_normal'])
-    dataset_info_file = find_dataset_filename('instances')
-    with open(dataset_info_file, 'w') as f_dataset_info:
-        writer = csv.writer(f_dataset_info)
-        writer.writerow(['dataset'] + ['zero', 'one', 'two', 'three', 'four', 'five', 'total'])
-        for usage in ['train', 'test']:
-            for method in ['normal', 'balanced', 'augmented']:
-                this_dataset_filename = find_dataset_filename(usage, method=method)
-                with open(this_dataset_filename, 'wb') as this_dataset_file:
-                    if method == 'normal':
-                        pickle.dump((x[f'{usage}_{method}'], y[f'{usage}_{method}'], t[f'{usage}_{method}'], p[f'{usage}_{method}']), this_dataset_file)
-                    else:
-                        pickle.dump((x[f'{usage}_{method}'], y[f'{usage}_{method}'], t[f'{usage}_{method}']), this_dataset_file)
+    keys = ['features', 'labels', 'timings', 'cells']
+    for purpose in purposes:
+        datasets[f'{purpose}_Balanced'] = \
+            {key: elem for key, elem in zip(keys,
+                                            balance_dataset(*[datasets[f'{purpose}_Normal'][key2]
+                                                            for key2 in keys])
+                                            )
+             }
+        datasets[f'{purpose}_Augmented'] = \
+            {key: elem for key, elem in zip(keys,
+                                            augmentate_dataset(*[datasets[f'{purpose}_Normal'][key2]
+                                                               for key2 in keys])
+                                            )
+             }
+    for purpose in purposes:
+        for quality in dataset_qualities:
+            this_dataset_filename = find_dataset_filename(purpose, method=quality)
+            with open(this_dataset_filename, 'wb') as this_dataset_file:
+                pickle.dump(datasets[purpose + '_' + quality], this_dataset_file)
+
+
+    ## The following code is to count how many instances of each are there in the different datasets
+    ## Sould be substitute by another function
+
+        # {datasets[f'{purpose}_balanced'][key]: elem for elem in balance_dataset(datasets[f'{purpose}_balanced'][key2] for key2 in keys) for key in keys}
+        # x[f'{purpose}_augmented'], y[f'{purpose}_augmented'], t[f'{purpose}_augmented'] = augmentate_dataset(x[f'{purpose}_normal'], y[f'{purpose}_normal'], t[f'{purpose}_normal'])
+#     dataset_info_file = find_dataset_filename('instances')
+#     with open(dataset_info_file, 'w') as f_dataset_info:
+#         writer = csv.writer(f_dataset_info)
+#         writer.writerow(['dataset'] + ['zero', 'one', 'two', 'three', 'four', 'five', 'total'])
+#         for purpose in purposes:
+#             for method in ['normal', 'balanced', 'augmented']:
+#                 this_dataset_filename = find_dataset_filename(purpose, method=method)
+#                 with open(this_dataset_filename, 'wb') as this_dataset_file:
+#                     if method == 'normal':
+#                         pickle.dump((x[f'{purpose}_{method}'], y[f'{purpose}_{method}'], t[f'{purpose}_{method}'], p[f'{purpose}_{method}']), this_dataset_file)
+#                     else:
+#                         pickle.dump((x[f'{purpose}_{method}'], y[f'{purpose}_{method}'], t[f'{purpose}_{method}']), this_dataset_file)
 
-                writer.writerow([f'{usage} {method} dataset']
-                                + [str(count_instances(y[f'{usage}_{method}'], i))
-                                for i in range(6)]
-                                + [str(len(y[f'{usage}_{method}']))])
+#                 writer.writerow([f'{purpose} {method} dataset']
+#                                 + [str(count_instances(y[f'{purpose}_{method}'], i))
+#                                 for i in range(6)]
+#                                 + [str(len(y[f'{purpose}_{method}']))])
 
 
 def create_regression_datasets(taking_logarithms=True):
-    for usage in ['train', 'test']:
-        this_dataset_filename = find_dataset_filename(usage,
+    for purpose in purposes:
+        this_dataset_filename = find_dataset_filename(purpose,
                                                       method='augmented')
         # we will use the augmented dataset here
         with open(this_dataset_filename, 'rb') as this_dataset_file:
-            X, Y, T = pickle.load(this_dataset_file)
+            regression_dataset = pickle.load(this_dataset_file)
             if taking_logarithms:
-                Y = [log(timings[0]) for timings in T]
+                regression_dataset['labels'] = [log(timings[0]) for timings in regression_dataset['timings']]
             else:
-                Y = [timings[0] for timings in T]
+                regression_dataset['labels'] = [timings[0] for timings in regression_dataset['timings']]
             this_dataset_filename =\
-                find_dataset_filename(usage, method='regression')
+                find_dataset_filename(purpose, method='regression')
             with open(this_dataset_filename, 'wb') as this_dataset_file:
-                pickle.dump((X, Y, T), this_dataset_file)
+                pickle.dump(regression_dataset, this_dataset_file)
diff --git a/train_models.py b/train_models.py
index 84a2921..a344e39 100644
--- a/train_models.py
+++ b/train_models.py
@@ -11,47 +11,46 @@
 
 
 def train_model(ml_model, method):
-    train_data_filename = find_dataset_filename('train', method=method)
+    train_data_filename = find_dataset_filename('Train', method=method)
     hyperparams_file = find_hyperparams_filename(method, ml_model)
     with open(train_data_filename, 'rb') as train_data_file:
-        if method == "Normal":
-            x_train, y_train, _, _ = pickle.load(train_data_file)
-        else:
-            x_train, y_train, _ = pickle.load(train_data_file)
-            # a = pickle.load(train_data_file)
-            # print(a[0], type(a), len(a), method)
+        train_dataset = pickle.load(train_data_file)
     hyperparams = read_yaml_from_file(hyperparams_file)
     current_classifier = sklearn_models[ml_model]
     clf = current_classifier(**hyperparams)
-    clf.fit(x_train, y_train)
+    print("DATaset", train_dataset.keys())
+    clf.fit(train_dataset['features'], train_dataset['labels'])
     trained_model_filename = find_model_filename(method, ml_model)
     with open(trained_model_filename, 'wb') as trained_model_file:
         pickle.dump(clf, trained_model_file)
 
 
 def train_regression_model(ml_model, method):
-    train_data_filename = find_dataset_filename('train', method=method)
+    train_data_filename = find_dataset_filename('Train', method=method)
     with open(train_data_filename, 'rb') as train_data_file:
-        x_train, _, t_train = pickle.load(train_data_file)
+        train_dataset = pickle.load(train_data_file)
     # hyperparams_file = find_hyperparams_filename(method, ml_model)
     # hyperparams = read_yaml_from_file(hyperparams_file)
-    x_train = np.asarray([x_t for x_t, t_t in zip(x_train, t_train)
-                          if t_t[:4] != 'Over'], dtype=float)
-    t_train = np.asarray([t_t for t_t in t_train 
-                          if t_t[:4] != 'Over'], dtype=float)
-    current_classifier = regressors[ml_model]
-    # print(t_train)
+    train_dataset['features'] = np.asarray([x_t for x_t, t_t in zip(train_dataset['features'], train_dataset['timings'])
+                                            if t_t[:4] != 'Over'], dtype=float)
+    train_dataset['timings'] = np.asarray([t_t for t_t in train_dataset['timings']
+                                           if t_t[:4] != 'Over'], dtype=float)
+                          ####
+                          # IS THIS REALLY DOING SOMTHING?
+                          # What if we used twice timelimit instead
+    current_classifier = ml_regressors[ml_model]
+    # print(train_dataset['timings'])
     print("her")
     reg = current_classifier()  # **hyperparams)
-    reg.fit(x_train, t_train)
+    reg.fit(train_dataset['features'], train_dataset['timings'])
     # trained_model_filename = find_model_filename(method, ml_model, 'regression')
     # with open(trained_model_filename, 'wb') as trained_model_file:
     #     pickle.dump(reg, trained_model_file)
     print("Real")
-    print(t_train[10:20])
+    print(train_dataset['timings'][10:20])
     print("Predicted")
-    print(reg.predict(x_train)[10:20])
-    print(metrics.mean_squared_error(reg.predict(x_train), t_train))
+    print(reg.predict(train_dataset['features'])[10:20])
+    print(metrics.mean_squared_error(reg.predict(train_dataset['features']), train_dataset['timings']))
     return reg
 
 
@@ -61,7 +60,7 @@ def choose_using_regression(x_test, regressor):
 
 
 def test_regression_model(method, regressor):
-    test_data_filename = find_dataset_filename('test', method=method)
+    test_data_filename = find_dataset_filename('Test', method=method)
     with open(test_data_filename, 'rb') as test_data_file:
         x_test, y_test, t_test = pickle.load(test_data_file)
     x_test = np.asarray([x_t for x_t, t_t in zip(x_test, t_test)
@@ -69,7 +68,6 @@ def test_regression_model(method, regressor):
     y_test = np.asarray([y_t for y_t, t_t in zip(y_test, t_test)
                          if t_t[:4] != 'Over'], dtype=float)
     y_pred = [choose_using_regression(x_i, regressor) for x_i in x_test]
-    print("ACC", metrics.accuracy_score(y_test, y_pred))
 
 
 # for ml_reg in ml_regressors: