Adding the posibility to used previously tuned hyperparameters

delriot · Jun 16, 2023 · d42164a · d42164a
1 parent e181625
commit d42164a
Showing 26 changed files with 136 additions and 24 deletions.
diff --git a/config/hyperparams/augmented_DT.yaml b/config/hyperparams/augmented_DT.yaml
@@ -0,0 +1,3 @@
+criterion: gini
+max_depth: 16
+splitter: best
diff --git a/config/hyperparams/augmented_KNN.yaml b/config/hyperparams/augmented_KNN.yaml
@@ -0,0 +1,3 @@
+algorithm: kd_tree
+n_neighbors: 12
+weights: distance
diff --git a/config/hyperparams/augmented_MLP.yaml b/config/hyperparams/augmented_MLP.yaml
@@ -0,0 +1,8 @@
+activation: tanh
+alpha: 0.005
+hidden_layer_sizes: !!python/tuple
+- 20
+- 20
+learning_rate: constant
+max_iter: 1000
+solver: adam
diff --git a/config/hyperparams/augmented_RF.yaml b/config/hyperparams/augmented_RF.yaml
@@ -0,0 +1,5 @@
+criterion: entropy
+max_depth: 8
+max_features: sqrt
+n_estimators: 500
+random_state: 18
diff --git a/config/hyperparams/augmented_SVC.yaml b/config/hyperparams/augmented_SVC.yaml
@@ -0,0 +1,4 @@
+C: 100
+gamma: auto
+kernel: rbf
+tol: 0.0316
diff --git a/config/hyperparams/bal_DT.yaml b/config/hyperparams/bal_DT.yaml
@@ -0,0 +1,3 @@
+criterion: gini
+max_depth: 7
+splitter: best
diff --git a/config/hyperparams/bal_KNN.yaml b/config/hyperparams/bal_KNN.yaml
@@ -0,0 +1,3 @@
+algorithm: auto
+n_neighbors: 1
+weights: uniform
diff --git a/config/hyperparams/bal_MLP.yaml b/config/hyperparams/bal_MLP.yaml
@@ -0,0 +1,8 @@
+activation: tanh
+alpha: 0.005
+hidden_layer_sizes: !!python/tuple
+- 20
+- 20
+learning_rate: adaptive
+max_iter: 1000
+solver: adam
diff --git a/config/hyperparams/balanced_DT.yaml b/config/hyperparams/balanced_DT.yaml
@@ -0,0 +1,3 @@
+criterion: gini
+max_depth: 7
+splitter: best
diff --git a/config/hyperparams/balanced_KNN.yaml b/config/hyperparams/balanced_KNN.yaml
@@ -0,0 +1,3 @@
+algorithm: auto
+n_neighbors: 1
+weights: uniform
diff --git a/config/hyperparams/balanced_MLP.yaml b/config/hyperparams/balanced_MLP.yaml
@@ -0,0 +1,8 @@
+activation: tanh
+alpha: 0.005
+hidden_layer_sizes: !!python/tuple
+- 20
+- 20
+learning_rate: adaptive
+max_iter: 1000
+solver: adam
diff --git a/config/hyperparams/balanced_RF.yaml b/config/hyperparams/balanced_RF.yaml
@@ -0,0 +1,5 @@
+criterion: gini
+max_depth: 8
+max_features: sqrt
+n_estimators: 500
+random_state: 18
diff --git a/config/hyperparams/balanced_SVC.yaml b/config/hyperparams/balanced_SVC.yaml
@@ -0,0 +1,4 @@
+C: 100
+gamma: auto
+kernel: rbf
+tol: 0.0316
diff --git a/config/hyperparams/basic_DT.yaml b/config/hyperparams/basic_DT.yaml
@@ -0,0 +1,3 @@
+criterion: entropy
+max_depth: 19
+splitter: random
diff --git a/config/hyperparams/basic_KNN.yaml b/config/hyperparams/basic_KNN.yaml
@@ -0,0 +1,3 @@
+algorithm: auto
+n_neighbors: 7
+weights: distance
diff --git a/config/hyperparams/basic_MLP.yaml b/config/hyperparams/basic_MLP.yaml
@@ -0,0 +1,8 @@
+activation: tanh
+alpha: 0.05
+hidden_layer_sizes: !!python/tuple
+- 20
+- 20
+learning_rate: constant
+max_iter: 1000
+solver: adam
diff --git a/config/hyperparams/basic_RF.yaml b/config/hyperparams/basic_RF.yaml
@@ -0,0 +1,5 @@
+criterion: entropy
+max_depth: 8
+max_features: sqrt
+n_estimators: 200
+random_state: 18
diff --git a/config/hyperparams/basic_SVC.yaml b/config/hyperparams/basic_SVC.yaml
@@ -0,0 +1,4 @@
+C: 100
+gamma: auto
+kernel: rbf
+tol: 0.0316
diff --git a/config/hyperparams/normal_DT.yaml b/config/hyperparams/normal_DT.yaml
@@ -0,0 +1,3 @@
+criterion: gini
+max_depth: 13
+splitter: random
diff --git a/config/hyperparams/normal_KNN.yaml b/config/hyperparams/normal_KNN.yaml
@@ -0,0 +1,3 @@
+algorithm: auto
+n_neighbors: 7
+weights: distance
diff --git a/config/hyperparams/normal_MLP.yaml b/config/hyperparams/normal_MLP.yaml
@@ -0,0 +1,8 @@
+activation: tanh
+alpha: 0.005
+hidden_layer_sizes: !!python/tuple
+- 20
+- 20
+learning_rate: adaptive
+max_iter: 1000
+solver: adam
diff --git a/config/hyperparams/normal_RF.yaml b/config/hyperparams/normal_RF.yaml
@@ -0,0 +1,5 @@
+criterion: entropy
+max_depth: 8
+max_features: sqrt
+n_estimators: 200
+random_state: 18
diff --git a/config/hyperparams/normal_SVC.yaml b/config/hyperparams/normal_SVC.yaml
@@ -0,0 +1,4 @@
+C: 100
+gamma: auto
+kernel: rbf
+tol: 0.0316
diff --git a/create_clean_dataset.py b/create_clean_dataset.py
@@ -29,12 +29,12 @@ def create_dataframe(dataset):
         np.array(all_features), np.array(all_targets), np.array(all_timings)
 
 
-dataset_filename = os.path.join(os.path.dirname(__file__),
-                                'DatasetsBeforeProcessing',
-                                'dataset_without_repetition_return_ncells.txt')
-with open(dataset_filename, 'rb') as f:
-    dataset = pickle.load(f)
-original_polys_list, names, features_list, targets_list, timings_list = create_dataframe(dataset)
+# dataset_filename = os.path.join(os.path.dirname(__file__),
+#                                 'DatasetsBeforeProcessing',
+#                                 'dataset_without_repetition_return_ncells.txt')
+# with open(dataset_filename, 'rb') as f:
+#     dataset = pickle.load(f)
+# original_polys_list, names, features_list, targets_list, timings_list = create_dataframe(dataset)
 
 
 def cleaning_dataset(dataset_filename, clean_dataset_filename):

diff --git a/main.py b/main.py
@@ -20,22 +20,35 @@
 from choose_hyperparams import choose_hyperparams
 from train_models import train_model
 from test_models import test_results
+from test_models import timings_in_test
 
 
-# original_dataset_file = find_dataset_filename('unclean')
-# clean_dataset_filename = find_dataset_filename('clean')
-# cleaning_dataset(original_dataset_file, clean_dataset_filename)
-# create_train_test_datasets()
+# Hyperparameter tuning take a very long time,
+# if tune_hyperparameters is used to decide whether to tune them
+# or to used previously tuned
+tune_hyperparameters = False
 
-# for ml_model in ml_models:
-#     for method in dataset_types:
-#         print(f"Choosing hyperparameters for {ml_model} in {method}")
-#         choose_hyperparams(ml_model, method)
+original_dataset_file = find_dataset_filename('unclean')
+clean_dataset_filename = find_dataset_filename('clean')
+cleaning_dataset(original_dataset_file, clean_dataset_filename)
+create_train_test_datasets()
+
+if tune_hyperparameters:
+    for ml_model in ml_models:
+        for method in dataset_types:
+            print(f"Choosing hyperparameters for {ml_model} in {method}")
+            choose_hyperparams(ml_model, method)
 for ml_model in ml_models:
     print(f"Training {ml_model}")
     for method in dataset_types:
         print(f"for {method}")
         train_model(ml_model, method)
-for testing_method in dataset_types:
-    print(f"Testing {testing_method}")
-    test_results(testing_method)
+for training_method in dataset_types:
+    print(f"Testing models trained in {training_method}")
+    test_results(training_method)
+
+model = 'SVC'
+testing_method = 'Augmented'
+for training_method in dataset_types:
+    print(f"Testing models trained in {training_method}")
+    print(timings_in_test(model, testing_method, training_method))
diff --git a/ml_results.csv b/ml_results.csv