diff --git a/config/hyperparams/augmented_DT.yaml b/config/hyperparams/augmented_DT.yaml new file mode 100644 index 0000000..a9e3b69 --- /dev/null +++ b/config/hyperparams/augmented_DT.yaml @@ -0,0 +1,3 @@ +criterion: gini +max_depth: 16 +splitter: best diff --git a/config/hyperparams/augmented_KNN.yaml b/config/hyperparams/augmented_KNN.yaml new file mode 100644 index 0000000..80fcee0 --- /dev/null +++ b/config/hyperparams/augmented_KNN.yaml @@ -0,0 +1,3 @@ +algorithm: kd_tree +n_neighbors: 12 +weights: distance diff --git a/config/hyperparams/augmented_MLP.yaml b/config/hyperparams/augmented_MLP.yaml new file mode 100644 index 0000000..fca174d --- /dev/null +++ b/config/hyperparams/augmented_MLP.yaml @@ -0,0 +1,8 @@ +activation: tanh +alpha: 0.005 +hidden_layer_sizes: !!python/tuple +- 20 +- 20 +learning_rate: constant +max_iter: 1000 +solver: adam diff --git a/config/hyperparams/augmented_RF.yaml b/config/hyperparams/augmented_RF.yaml new file mode 100644 index 0000000..f7828fb --- /dev/null +++ b/config/hyperparams/augmented_RF.yaml @@ -0,0 +1,5 @@ +criterion: entropy +max_depth: 8 +max_features: sqrt +n_estimators: 500 +random_state: 18 diff --git a/config/hyperparams/augmented_SVC.yaml b/config/hyperparams/augmented_SVC.yaml new file mode 100644 index 0000000..505a4fd --- /dev/null +++ b/config/hyperparams/augmented_SVC.yaml @@ -0,0 +1,4 @@ +C: 100 +gamma: auto +kernel: rbf +tol: 0.0316 diff --git a/config/hyperparams/bal_DT.yaml b/config/hyperparams/bal_DT.yaml new file mode 100644 index 0000000..da4ceb5 --- /dev/null +++ b/config/hyperparams/bal_DT.yaml @@ -0,0 +1,3 @@ +criterion: gini +max_depth: 7 +splitter: best diff --git a/config/hyperparams/bal_KNN.yaml b/config/hyperparams/bal_KNN.yaml new file mode 100644 index 0000000..710b5f6 --- /dev/null +++ b/config/hyperparams/bal_KNN.yaml @@ -0,0 +1,3 @@ +algorithm: auto +n_neighbors: 1 +weights: uniform diff --git a/config/hyperparams/bal_MLP.yaml b/config/hyperparams/bal_MLP.yaml new file mode 100644 index 0000000..a4fb4e4 --- /dev/null +++ b/config/hyperparams/bal_MLP.yaml @@ -0,0 +1,8 @@ +activation: tanh +alpha: 0.005 +hidden_layer_sizes: !!python/tuple +- 20 +- 20 +learning_rate: adaptive +max_iter: 1000 +solver: adam diff --git a/config/hyperparams/balanced_DT.yaml b/config/hyperparams/balanced_DT.yaml new file mode 100644 index 0000000..da4ceb5 --- /dev/null +++ b/config/hyperparams/balanced_DT.yaml @@ -0,0 +1,3 @@ +criterion: gini +max_depth: 7 +splitter: best diff --git a/config/hyperparams/balanced_KNN.yaml b/config/hyperparams/balanced_KNN.yaml new file mode 100644 index 0000000..710b5f6 --- /dev/null +++ b/config/hyperparams/balanced_KNN.yaml @@ -0,0 +1,3 @@ +algorithm: auto +n_neighbors: 1 +weights: uniform diff --git a/config/hyperparams/balanced_MLP.yaml b/config/hyperparams/balanced_MLP.yaml new file mode 100644 index 0000000..a4fb4e4 --- /dev/null +++ b/config/hyperparams/balanced_MLP.yaml @@ -0,0 +1,8 @@ +activation: tanh +alpha: 0.005 +hidden_layer_sizes: !!python/tuple +- 20 +- 20 +learning_rate: adaptive +max_iter: 1000 +solver: adam diff --git a/config/hyperparams/balanced_RF.yaml b/config/hyperparams/balanced_RF.yaml new file mode 100644 index 0000000..710fc36 --- /dev/null +++ b/config/hyperparams/balanced_RF.yaml @@ -0,0 +1,5 @@ +criterion: gini +max_depth: 8 +max_features: sqrt +n_estimators: 500 +random_state: 18 diff --git a/config/hyperparams/balanced_SVC.yaml b/config/hyperparams/balanced_SVC.yaml new file mode 100644 index 0000000..505a4fd --- /dev/null +++ b/config/hyperparams/balanced_SVC.yaml @@ -0,0 +1,4 @@ +C: 100 +gamma: auto +kernel: rbf +tol: 0.0316 diff --git a/config/hyperparams/basic_DT.yaml b/config/hyperparams/basic_DT.yaml new file mode 100644 index 0000000..7e09a7f --- /dev/null +++ b/config/hyperparams/basic_DT.yaml @@ -0,0 +1,3 @@ +criterion: entropy +max_depth: 19 +splitter: random diff --git a/config/hyperparams/basic_KNN.yaml b/config/hyperparams/basic_KNN.yaml new file mode 100644 index 0000000..d7863e4 --- /dev/null +++ b/config/hyperparams/basic_KNN.yaml @@ -0,0 +1,3 @@ +algorithm: auto +n_neighbors: 7 +weights: distance diff --git a/config/hyperparams/basic_MLP.yaml b/config/hyperparams/basic_MLP.yaml new file mode 100644 index 0000000..b3a62b5 --- /dev/null +++ b/config/hyperparams/basic_MLP.yaml @@ -0,0 +1,8 @@ +activation: tanh +alpha: 0.05 +hidden_layer_sizes: !!python/tuple +- 20 +- 20 +learning_rate: constant +max_iter: 1000 +solver: adam diff --git a/config/hyperparams/basic_RF.yaml b/config/hyperparams/basic_RF.yaml new file mode 100644 index 0000000..5359ae0 --- /dev/null +++ b/config/hyperparams/basic_RF.yaml @@ -0,0 +1,5 @@ +criterion: entropy +max_depth: 8 +max_features: sqrt +n_estimators: 200 +random_state: 18 diff --git a/config/hyperparams/basic_SVC.yaml b/config/hyperparams/basic_SVC.yaml new file mode 100644 index 0000000..505a4fd --- /dev/null +++ b/config/hyperparams/basic_SVC.yaml @@ -0,0 +1,4 @@ +C: 100 +gamma: auto +kernel: rbf +tol: 0.0316 diff --git a/config/hyperparams/normal_DT.yaml b/config/hyperparams/normal_DT.yaml new file mode 100644 index 0000000..5ed1b07 --- /dev/null +++ b/config/hyperparams/normal_DT.yaml @@ -0,0 +1,3 @@ +criterion: gini +max_depth: 13 +splitter: random diff --git a/config/hyperparams/normal_KNN.yaml b/config/hyperparams/normal_KNN.yaml new file mode 100644 index 0000000..d7863e4 --- /dev/null +++ b/config/hyperparams/normal_KNN.yaml @@ -0,0 +1,3 @@ +algorithm: auto +n_neighbors: 7 +weights: distance diff --git a/config/hyperparams/normal_MLP.yaml b/config/hyperparams/normal_MLP.yaml new file mode 100644 index 0000000..a4fb4e4 --- /dev/null +++ b/config/hyperparams/normal_MLP.yaml @@ -0,0 +1,8 @@ +activation: tanh +alpha: 0.005 +hidden_layer_sizes: !!python/tuple +- 20 +- 20 +learning_rate: adaptive +max_iter: 1000 +solver: adam diff --git a/config/hyperparams/normal_RF.yaml b/config/hyperparams/normal_RF.yaml new file mode 100644 index 0000000..5359ae0 --- /dev/null +++ b/config/hyperparams/normal_RF.yaml @@ -0,0 +1,5 @@ +criterion: entropy +max_depth: 8 +max_features: sqrt +n_estimators: 200 +random_state: 18 diff --git a/config/hyperparams/normal_SVC.yaml b/config/hyperparams/normal_SVC.yaml new file mode 100644 index 0000000..505a4fd --- /dev/null +++ b/config/hyperparams/normal_SVC.yaml @@ -0,0 +1,4 @@ +C: 100 +gamma: auto +kernel: rbf +tol: 0.0316 diff --git a/create_clean_dataset.py b/create_clean_dataset.py index 2b2d463..bacf348 100644 --- a/create_clean_dataset.py +++ b/create_clean_dataset.py @@ -29,12 +29,12 @@ def create_dataframe(dataset): np.array(all_features), np.array(all_targets), np.array(all_timings) -dataset_filename = os.path.join(os.path.dirname(__file__), - 'DatasetsBeforeProcessing', - 'dataset_without_repetition_return_ncells.txt') -with open(dataset_filename, 'rb') as f: - dataset = pickle.load(f) -original_polys_list, names, features_list, targets_list, timings_list = create_dataframe(dataset) +# dataset_filename = os.path.join(os.path.dirname(__file__), +# 'DatasetsBeforeProcessing', +# 'dataset_without_repetition_return_ncells.txt') +# with open(dataset_filename, 'rb') as f: +# dataset = pickle.load(f) +# original_polys_list, names, features_list, targets_list, timings_list = create_dataframe(dataset) def cleaning_dataset(dataset_filename, clean_dataset_filename): diff --git a/main.py b/main.py index 6982b59..b0afcae 100644 --- a/main.py +++ b/main.py @@ -20,22 +20,35 @@ from choose_hyperparams import choose_hyperparams from train_models import train_model from test_models import test_results +from test_models import timings_in_test -# original_dataset_file = find_dataset_filename('unclean') -# clean_dataset_filename = find_dataset_filename('clean') -# cleaning_dataset(original_dataset_file, clean_dataset_filename) -# create_train_test_datasets() +# Hyperparameter tuning take a very long time, +# if tune_hyperparameters is used to decide whether to tune them +# or to used previously tuned +tune_hyperparameters = False -# for ml_model in ml_models: -# for method in dataset_types: -# print(f"Choosing hyperparameters for {ml_model} in {method}") -# choose_hyperparams(ml_model, method) +original_dataset_file = find_dataset_filename('unclean') +clean_dataset_filename = find_dataset_filename('clean') +cleaning_dataset(original_dataset_file, clean_dataset_filename) +create_train_test_datasets() + +if tune_hyperparameters: + for ml_model in ml_models: + for method in dataset_types: + print(f"Choosing hyperparameters for {ml_model} in {method}") + choose_hyperparams(ml_model, method) for ml_model in ml_models: print(f"Training {ml_model}") for method in dataset_types: print(f"for {method}") train_model(ml_model, method) -for testing_method in dataset_types: - print(f"Testing {testing_method}") - test_results(testing_method) +for training_method in dataset_types: + print(f"Testing models trained in {training_method}") + test_results(training_method) + +model = 'SVC' +testing_method = 'Augmented' +for training_method in dataset_types: + print(f"Testing models trained in {training_method}") + print(timings_in_test(model, testing_method, training_method)) diff --git a/ml_results.csv b/ml_results.csv deleted file mode 100644 index 5ae9217..0000000 --- a/ml_results.csv +++ /dev/null @@ -1,7 +0,0 @@ -Name,Normal,Balance data,Augment data -SVC,0.21,0.21,0.16 -DT,0.32,0.27,0.34 -KNN,0.29,0.39,0.51 -RF,0.42,0.5,0.61 -MPL,0.21,0.2,0.17 -my_mlp,0.36,0.33,0.38