Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this user
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
delriot
/
AugmentingMathematicalDataset
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
1
Code
Issues
0
Pull requests
0
Projects
0
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Projects
Security
Insights
Files
b1a0475
DatasetsBeforeProcessing
Heuristics
config
datasets
packages
README.md
basic_ml.py
choose_hyperparams.py
create_clean_dataset.py
find_filename.py
from_poly_set_to_features.py
main.py
main_heuristics.py
main_regression.py
make_plots.py
output.txt
preprocessing_Dorians_features.py
replicating_Dorians_features.py
test_models.py
test_train_datasets.py
train_models.py
yaml_tools.py
Breadcrumbs
AugmentingMathematicalDataset
/
choose_hyperparams.py
Blame
Blame
Latest commit
History
History
105 lines (94 loc) · 4.98 KB
Breadcrumbs
AugmentingMathematicalDataset
/
choose_hyperparams.py
Top
File metadata and controls
Code
Blame
105 lines (94 loc) · 4.98 KB
Raw
import os import pickle import csv from config.ml_models import ml_models from config.ml_models import sklearn_models from config.general_values import dataset_qualities from config.hyperparameters_grid import grid from sklearn.model_selection import GridSearchCV from yaml_tools import write_yaml_to_file from find_filename import find_dataset_filename from find_filename import find_hyperparams_filename def k_folds_ml(x_train, y_train, model, random_state=0): """ Train the desired model. The hyperparameters of the models are chosen using 5-fold cross validation. """ current_classifier = sklearn_models[model] current_grid = grid[model] rf_cv = GridSearchCV(estimator=current_classifier(), param_grid=current_grid, cv=5, verbose=10 # to get updates ) rf_cv.fit(x_train, y_train) return rf_cv.best_params_ def choose_hyperparams(ml_model, method): """Given a ml_model and a method, a file with the hyperparameters chosen by cross validation is created""" this_dataset_file = find_dataset_filename('Train', method=method) with open(this_dataset_file, 'rb') as f: dataset = pickle.load(f) hyperparams = k_folds_ml(dataset['features'], dataset['labels'], model=ml_model) print(hyperparams) hyperparams_filename = find_hyperparams_filename(method, ml_model) print(hyperparams_filename) write_yaml_to_file(hyperparams, hyperparams_filename) # test_balanced_dataset_file = os.path.join(os.path.dirname(__file__), # 'datasets', 'test', # 'balanced_test_dataset.txt') # with open(test_balanced_dataset_file, 'rb') as g: # balanced_x_test, balanced_y_test = pickle.load(g) # test_normal_dataset_file = os.path.join(os.path.dirname(__file__), # 'datasets', 'test', # 'normal_test_dataset.txt') # with open(test_normal_dataset_file, 'rb') as g: # normal_x_test, normal_y_test = pickle.load(g) # output_file_balanced = os.path.join(os.path.dirname(__file__), # 'ml_results_k_fold_tested_in_balanced.csv') # with open(output_file_balanced, 'w') as f_balanced: # writer_balanced = csv.writer(f_balanced) # writer_balanced.writerow(["Name"] + dataset_qualities) # output_file_normal = os.path.join(os.path.dirname(__file__), # 'ml_results_k_fold_tested_in_normal.csv') # with open(output_file_normal, 'w') as f_normal: # writer_normal = csv.writer(f_normal) # writer_normal.writerow(["Name"] + dataset_qualities) # for ml_model in ml_models: # print(f"Model: {ml_model}") # acc_balanced = dict() # acc_normal = dict() # for method in dataset_qualities: # this_dataset_file = os.path.join(os.path.dirname(__file__), # 'datasets', 'train', # f'{method}_train_dataset.txt') # with open(this_dataset_file, 'rb') as f: # x_train, y_train, _ = pickle.load(f) # hyperparams = k_folds_ml(x_train, y_train, # model=ml_model) # write_yaml_to_file(hyperparams, # os.path.join(os.path.dirname(__file__), # 'config', 'hyperparams', # f'{method}_{ml_model}')) # current_classifier = sklearn_models[ml_model] # clf = current_classifier(**hyperparams) # clf.fit(x_train, y_train) # acc_balanced[method] = clf.score(balanced_x_test, # balanced_y_test) # acc_normal[method] = clf.score(normal_x_test, normal_y_test) # method_filename = os.path.join(os.path.dirname(__file__), # 'config', 'models', # f'{method}_trained_model.txt') # with open(method_filename, 'wb') as method_file: # pickle.dump(clf, method_file) # round_accuracies_balanced = [round(acc, 2) # for acc in [acc_balanced[method_here] # for method_here in dataset_qualities]] # round_accuracies_normal = [round(acc, 2) # for acc in [acc_normal[method_here] # for method_here in dataset_qualities]] # writer_balanced.writerow([ml_model] + round_accuracies_balanced) # writer_normal.writerow([ml_model] + round_accuracies_normal) # model = 'KNN' # method = 'balanced' # choose_hyperparams(model, method)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
You can’t perform that action at this time.