From e2c35ce550097e9f0cff8c636c98b1c28a5f84d4 Mon Sep 17 00:00:00 2001 From: Tereso del Rio Date: Wed, 13 Sep 2023 13:38:17 +0200 Subject: [PATCH] Checkpoint, about to change train/test dataset generation --- choose_hyperparams.py | 4 ++-- main.py | 2 +- replicating_Dorians_features.py | 3 +++ test_train_datasets.py | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/choose_hyperparams.py b/choose_hyperparams.py index 9a5aa18..53e51c1 100644 --- a/choose_hyperparams.py +++ b/choose_hyperparams.py @@ -22,8 +22,8 @@ def k_folds_ml(x_train, y_train, model, random_state=0): rf_cv = GridSearchCV(estimator=current_classifier(), param_grid=current_grid, cv=5, - verbose=10 # to get updates - ) + verbose=10 # to get updates + ) rf_cv.fit(x_train, y_train) return rf_cv.best_params_ diff --git a/main.py b/main.py index fb90487..04571f1 100644 --- a/main.py +++ b/main.py @@ -33,7 +33,7 @@ paradigm = 'classification' # cleaning_dataset() -# create_train_test_datasets() +create_train_test_datasets() # if tune_hyperparameters: # for ml_model in ml_models: diff --git a/replicating_Dorians_features.py b/replicating_Dorians_features.py index c27c89c..3260c82 100644 --- a/replicating_Dorians_features.py +++ b/replicating_Dorians_features.py @@ -68,6 +68,7 @@ def extract_features(dataset): all_timings = [] all_original_polynomials = [] all_projections = [] + all_cells = [] for index, projections in enumerate(dataset[0]): all_projections.append(projections) original_polynomials = projections[0][0] @@ -76,6 +77,7 @@ def extract_features(dataset): all_original_polynomials.append(original_polynomials) all_targets.append(dataset[1][index]) all_timings.append(dataset[2][index]) + all_cells.append(dataset[3][index]) names, instance_features = features_from_set_of_polys( original_polynomials) all_features.append(instance_features) @@ -85,6 +87,7 @@ def extract_features(dataset): my_dataset['targets'] = np.array(all_targets) my_dataset['timings'] = np.array(all_timings) my_dataset['projections'] = np.array(all_projections) + my_dataset['cells'] = np.array(all_cells) return my_dataset diff --git a/test_train_datasets.py b/test_train_datasets.py index ed1b73c..fd53ce1 100644 --- a/test_train_datasets.py +++ b/test_train_datasets.py @@ -34,6 +34,7 @@ def create_train_test_datasets(): y = dict() # to keep the labels t = dict() # to keep the timings p = dict() # to keep the projections + c = dict() # to keep the number of cells # train and test sets are created random_state = 0 x['train_normal'], x['test_normal'], \