run_for_paper added

This file allows us to compare all models and heuristics in a single run. I have to add a line to save the results
delriot · Sep 21, 2023 · f96c66f · f96c66f
1 parent 4d43ecb
commit f96c66f
Showing 14 changed files with 324 additions and 132 deletions.
diff --git a/choose_hyperparams.py b/choose_hyperparams.py
@@ -28,16 +28,16 @@ def k_folds_ml(x_train, y_train, model, random_state=0):
     return rf_cv.best_params_
 
 
-def choose_hyperparams(ml_model, method):
+def choose_hyperparams(model_name, paradigm, training_quality):
     """Given a ml_model and a method, a file with the hyperparameters
     chosen by cross validation is created"""
-    this_dataset_file = find_dataset_filename('Train', method=method)
+    this_dataset_file = find_dataset_filename('Train', dataset_quality=training_quality)
     with open(this_dataset_file, 'rb') as f:
         dataset = pickle.load(f)
-    hyperparams = k_folds_ml(dataset['features'], dataset['labels'], model=ml_model)
+    hyperparams = k_folds_ml(dataset['features'], dataset['labels'], model=model_name)
     print(hyperparams)
-    hyperparams_filename = find_hyperparams_filename(method, ml_model)
-    print(hyperparams_filename)
+    hyperparams_filename = find_hyperparams_filename(model_name, paradigm, training_quality)
+    print('new hyperparams_filename', hyperparams_filename)
     write_yaml_to_file(hyperparams, hyperparams_filename)
 
 

diff --git a/config/general_values.py b/config/general_values.py
@@ -1,6 +1,6 @@
 
 purposes = ['Train', 'Test']
-dataset_qualities = ['Normal', 'Balanced', 'Augmented']
+dataset_qualities = ['Biased', 'Balanced', 'Augmented']
 
 
 def aveg(given_list):

diff --git a/config/hyperparameters_grid.py b/config/hyperparameters_grid.py
@@ -2,9 +2,9 @@
 
 grid = dict()
 grid['RF-Classifier'] = {
-    'n_estimators': [200, 300, 400, 500],
+    'n_estimators': [200, 500],
     'max_features': ['sqrt', 'log2'],
-    'max_depth': [4, 5, 6, 7, 8],
+    'max_depth': [4, 6, 8],
     'criterion': ['gini', 'entropy']
 }
 grid['KNN-Classifier'] = {
@@ -15,8 +15,7 @@
     # 'p': range(1, 4, 1)
 }
 grid['MLP-Classifier'] = {
-    'hidden_layer_sizes': [(5, 5), (15, 15), (20, 20),
-                           (10, 10, 10), (20, 20, 20)],
+    'hidden_layer_sizes': [(30, 30), (10, 10, 10), (20, 20, 20)],
     'activation': ['tanh', 'relu'],
     'solver': ['sgd', 'adam'],
     'learning_rate': ['constant', 'adaptive'],
@@ -43,20 +42,30 @@
 }
 
 grid['RF-Regressor'] = {
-    'criterion': ['squared_error', 'friedman_mse'],
-    "max_depth": [1, 3, 7],
-    "min_samples_leaf": [1, 5, 10],
+    'n_estimators': [200, 500],
+    'max_features': ['sqrt', 'log2'],
+    'max_depth': [4, 6, 8],
+    'criterion': ['friedman_mse', 'squared_error']
+    # 'criterion': ['squared_error', 'friedman_mse'],
+    # "max_depth": [1, 3, 7],
+    # "min_samples_leaf": [1, 5, 10],
 }
 grid['KNN-Regressor'] = {
     'n_neighbors': [3, 5, 10],
     'weights': ['uniform', 'distance'],
     'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
 }
 grid['MLP-Regressor'] = {
-    'hidden_layer_sizes': [(100,), (20, 20), (10, 10, 10)],
-    'activation': ['logistic', 'tanh', 'relu'],
-    'solver': ['adam', 'sgd'],
-    'alpha': [0.0001, 0.001, 0.01]
+    'hidden_layer_sizes': [(30, 30), (10, 10, 10), (20, 20, 20)],
+    'activation': ['tanh', 'relu'],
+    'solver': ['sgd', 'adam'],
+    'learning_rate': ['constant', 'adaptive'],
+    'alpha': [0.05, 0.005],
+    'max_iter': [1000]
+    # 'hidden_layer_sizes': [(30, 30), (20, 20, 20), (10, 10, 10)],
+    # 'activation': ['logistic', 'tanh', 'relu'],
+    # 'solver': ['adam', 'sgd'],
+    # 'alpha': [0.0001, 0.001, 0.01]
 }
 grid['DT-Regressor'] = {
     "splitter": ["best", "random"],