diff --git a/datasets/test/balanced_test_dataset.txt b/datasets/test/balanced_test_dataset.txt index f7adca5..4c7cf43 100644 Binary files a/datasets/test/balanced_test_dataset.txt and b/datasets/test/balanced_test_dataset.txt differ diff --git a/datasets/train/balanced_train_dataset.txt b/datasets/train/balanced_train_dataset.txt index bdb7fc8..bc476ed 100644 Binary files a/datasets/train/balanced_train_dataset.txt and b/datasets/train/balanced_train_dataset.txt differ diff --git a/main.py b/main.py index 14355ed..51382b1 100644 --- a/main.py +++ b/main.py @@ -33,8 +33,8 @@ tune_hyperparameters = False paradigm = 'classification' -# cleaning_dataset() -# create_train_test_datasets() +cleaning_dataset() +create_train_test_datasets() if tune_hyperparameters: for ml_model in ml_models: diff --git a/packages/dataset_manipulation/dataset_manipulation.py b/packages/dataset_manipulation/dataset_manipulation.py index 72fd25a..62dff14 100644 --- a/packages/dataset_manipulation/dataset_manipulation.py +++ b/packages/dataset_manipulation/dataset_manipulation.py @@ -118,10 +118,9 @@ def get_unique_feature_names(unique_names, names, features): def remove_notunique_features(names, features, nvar=3): # creating some targets and timing because the function requires them - targets = [0]*len(features) timings = [list(range(math.factorial(nvar)))]*len(features) cells = [list(range(math.factorial(nvar)))]*len(features) - augmented_features, _, _, _ = augmentate_dataset(features, targets, timings, cells) + augmented_features, _, _ = augmentate_dataset(features, timings, cells, nvar) # normalized_augmented_features = normalize(augmented_features) unique_names = name_unique_features(names, augmented_features) unique_features = [] diff --git a/replicating_Dorians_features.py b/replicating_Dorians_features.py index 366af7b..f90a043 100644 --- a/replicating_Dorians_features.py +++ b/replicating_Dorians_features.py @@ -105,7 +105,8 @@ def features_from_set_of_polys(original_polynomials, return names, instance_features -def compute_features_for_var(original_polynomials, var, operations): +def compute_features_for_var(original_polynomials, var, + operations=[sum, max, aveg]): '''Given polynomials and a variable computes the features''' degrees = [[monomial[var] for monomial in poly] for poly in original_polynomials] diff --git a/test_models.py b/test_models.py index 653642f..a9ad961 100644 --- a/test_models.py +++ b/test_models.py @@ -10,6 +10,7 @@ from find_filename import find_output_filename from find_filename import find_dataset_filename from find_filename import find_model_filename +from train_models import ordering_choice_reinforcement # Check if 'dataset_manipulation' is installed if isinstance(importlib.util.find_spec('dataset_manipulation'), type(None)): from dataset_manipulation import augmentate_instance @@ -107,12 +108,15 @@ def test_model(ml_model, paradigm, testing_method='augmented'): with open(test_dataset_filename, 'rb') as test_dataset_file: testing_dataset = pickle.load(test_dataset_file) print("here") - if ml_model in ml_regressors: + if ml_model in ml_regressors and paradigm == 'regression': chosen_indices = [return_regressor_choice(model, features) for features in testing_dataset['features']] - else: + elif ml_model in ml_models: chosen_indices = [model.predict([features])[0] for features in testing_dataset['features']] + elif paradigm == 'reinforcement' and testing_method == 'Normal': + chosen_indices = [ordering_choice_reinforcement(model, projections) + for projections in testing_dataset['projections']] print(chosen_indices) print("here2") return compute_metrics(chosen_indices, diff --git a/train_models.py b/train_models.py index 7e73060..e195c72 100644 --- a/train_models.py +++ b/train_models.py @@ -73,26 +73,35 @@ def test_regression_model(method, regressor): # print(ml_reg) # test_regression_model('balanced', regressor) -def train_reinforcement_model(ml_model, method='Augmented'): +def train_reinforcement_model(ml_model, method='Normal'): train_data_filename = find_dataset_filename('Train', method=method) with open(train_data_filename, 'rb') as train_data_file: train_dataset = pickle.load(train_data_file) - hyperparams_file = find_hyperparams_filename(method, ml_model) - hyperparams = read_yaml_from_file(hyperparams_file) + # hyperparams_file = find_hyperparams_filename(method, ml_model) + # hyperparams = read_yaml_from_file(hyperparams_file) current_model = sklearn_models[ml_model] - model = current_model(**hyperparams) + # model = current_model(**hyperparams) + model = current_model() + first_polys = train_dataset['projections'][0][0][0] + first_features = get_vars_features(first_polys) + first_labels = [1]*len(first_features) + model.fit(first_features, first_labels) for projections, timings \ in zip(train_dataset['projections'], train_dataset['timings']): training_features, training_labels = \ - training_instances_reinforcement(model, projections) + training_instances_reinforcement(model, projections, timings) model.fit(training_features, training_labels) - + trained_model_filename = find_model_filename('reinforcement', ml_model) + with open(trained_model_filename, 'wb') as trained_model_file: + pickle.dump(model, trained_model_file) def training_instances_reinforcement(model, projections, timings): original_polynomials = projections[0][0] nvar = len(original_polynomials[0][0]) - 1 vars_features = get_vars_features(original_polynomials) + print(len(vars_features[0])) + print(model.predict([vars_features[0]])) evaluations = [model.predict([var_features])[0] for var_features in vars_features] timing = [] @@ -101,7 +110,7 @@ def training_instances_reinforcement(model, projections, timings): projected_polynomials = projections[var * math.factorial(nvar-1)][1] new_var = var_choice_reinforcement(model, projected_polynomials) ordering_chosen = new_var + var * math.factorial(nvar-1) - timing[var] = timings[ordering_chosen] + timing.append(timings[ordering_chosen]) # now compute which part of the difference between # evaluations[i]/evaluations[j] and timing[i]/timing[j] # corresponds to each evaluation @@ -122,22 +131,41 @@ def get_vars_features(polynomials): in the given set of polynomials''' vars_features = [] nvar = len(polynomials[0][0]) - 1 + print('number of variabels', nvar) unique_features_filename = find_other_filename("unique_features") - with open(unique_features_filename, 'wb') as unique_features_file: + with open(unique_features_filename, 'rb') as unique_features_file: unique_names = pickle.load(unique_features_file) for var in range(nvar): + print('variabel', var) var_features, var_names = \ compute_features_for_var(polynomials, var) + print('var_features', var_features) var_features = [feature for feature, name in zip(var_features, var_names) if name in unique_names] - vars_features += var_features + vars_features.append(var_features) return vars_features def var_choice_reinforcement(model, polynomials): - '''This function will return the next variable to project chosen by the model trained using reinforcement''' - vars_features = get_vars_features(model, polynomials) - evaluations = [model.predict([var_features])[0] - for var_features in vars_features] - return evaluations.index(min(evaluations)) + '''This function will return the next variable to project + chosen by the model trained using reinforcement''' + vars_features = get_vars_features(polynomials) + print(polynomials) + print(len(vars_features), len(vars_features[0]), '\n', vars_features) + evaluations = model.predict(vars_features) + return np.argmin(evaluations) + + +def ordering_choice_reinforcement(model, projections): + '''This function will return the ordering chosen by the RL model''' + nvar = len(projections[0]) + ordering = 0 + for level in range(nvar-1): + polynomials = projections[ordering][level] + next_var = var_choice_reinforcement(model, polynomials) + ordering += next_var * math.factorial(nvar-1-level) + return ordering + + +train_reinforcement_model('RFR')