Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this user
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
delriot
/
AugmentingMathematicalDataset
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
1
Code
Issues
0
Pull requests
0
Projects
0
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Projects
Security
Insights
Files
b1a0475
DatasetsBeforeProcessing
Heuristics
config
datasets
packages
README.md
basic_ml.py
choose_hyperparams.py
create_clean_dataset.py
find_filename.py
from_poly_set_to_features.py
main.py
main_heuristics.py
main_regression.py
make_plots.py
output.txt
preprocessing_Dorians_features.py
replicating_Dorians_features.py
test_models.py
test_train_datasets.py
train_models.py
yaml_tools.py
Breadcrumbs
AugmentingMathematicalDataset
/
main.py
Blame
Blame
Latest commit
History
History
95 lines (86 loc) · 3.9 KB
Breadcrumbs
AugmentingMathematicalDataset
/
main.py
Top
File metadata and controls
Code
Blame
95 lines (86 loc) · 3.9 KB
Raw
""" The experiments in [1] are replicated with some changes. The first change is that the testing data is balanced, so that all labels are almost equally common. Then we use three training sets; dataset as in [1], balanced dataset and data augmentation dataset. [1]Florescu, D., England, M. (2020). A Machine Learning Based Software Pipeline to Pick the Variable Ordering for Algorithms with Polynomial Inputs. Bigatti, A., Carette, J., Davenport, J., Joswig, M., de Wolff, T. (eds) Mathematical Software, ICMS 2020. ICMS 2020. Lecture Notes in Computer Science, vol 12097. Springer, Cham. https://doi.org/10.1007/978-3-030-52200-1_30 """ import csv from config.ml_models import ml_models from config.general_values import dataset_qualities from config.general_values import purposes from find_filename import find_dataset_filename from find_filename import find_model_filename from create_clean_dataset import cleaning_dataset from test_train_datasets import create_train_test_datasets from choose_hyperparams import choose_hyperparams from train_models import train_model from test_models import test_results from test_models import timings_in_test from test_models import test_model # Hyperparameter tuning take a very long time, # if tune_hyperparameters is used to decide whether to tune them # or to used previously tuned tune_hyperparameters = False train_the_models = True paradigm = 'classification' cleaning_dataset() create_train_test_datasets() if tune_hyperparameters: for ml_model in ml_models: for method in dataset_qualities: print(f"Choosing hyperparameters for {ml_model} in {method}") choose_hyperparams(ml_model, method) if train_the_models: for ml_model in ml_models: print(f"Training {ml_model}") for method in dataset_qualities: print(f"for {method}") train_model(ml_model, method) training_method = 'augmented' testing_method = 'augmented' first_time = 1 output_file = "classification_output_acc_time.csv" for ml_model in ml_models: print(f"Testing models trained in {training_method}") metrics = test_model(ml_model, paradigm=training_method, testing_method=testing_method) if first_time == 1: first_time = 0 keys = list(metrics.keys()) with open(output_file, 'a') as f: f.write('No hyperparameters\n') f.write(', '.join(['Model'] + keys) + '\n') with open(output_file, 'a', newline='') as f: writer = csv.writer(f) writer.writerow([ml_model] + [metrics[key] for key in keys]) # timings = dict() # testing_method = 'augmented' # test_dataset_filename = find_dataset_filename('Test', # testing_method) # with open("classification_output_timings.csv", 'w') as f: # f.write("model, Normal, Balanced, Augmented\n") # for ml_model in ml_models: # for training_method in dataset_qualities: # trained_model_filename = find_model_filename(training_method, # ml_model) # accuracy = test_model(trained_model_filename, # test_dataset_filename) # timings[training_method] = timings_in_test(ml_model, testing_method, # training_method) # total_time = sum(timings[training_method]) # # with open("classification_output_acc_time.csv", 'a') as f: # # f.write(f"{ml_model}, {accuracy}, {total_time}\n") # with open("classification_output_timings.csv", 'a') as f: # f.write(f"{ml_model}, {sum(timings['Normal'])}, \ # {sum(timings['Balanced'])}, {sum(timings['Augmented'])}\n") # timings['optimal'] = timings_in_test('optimal', testing_method) # print(sum(timings['optimal'])) # from make_plots import survival_plot # survival_plot(timings, plot_name=f"survival_plot_{ml_model}")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
You can’t perform that action at this time.