Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this user
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
delriot
/
AugmentingMathematicalDataset
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
1
Code
Issues
0
Pull requests
0
Projects
0
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Projects
Security
Insights
Files
b1a0475
DatasetsBeforeProcessing
Heuristics
config
datasets
packages
README.md
basic_ml.py
choose_hyperparams.py
create_clean_dataset.py
find_filename.py
from_poly_set_to_features.py
main.py
main_heuristics.py
main_regression.py
make_plots.py
output.txt
preprocessing_Dorians_features.py
replicating_Dorians_features.py
test_models.py
test_train_datasets.py
train_models.py
yaml_tools.py
Breadcrumbs
AugmentingMathematicalDataset
/
train_models.py
Blame
Blame
Latest commit
History
History
183 lines (163 loc) · 8.36 KB
Breadcrumbs
AugmentingMathematicalDataset
/
train_models.py
Top
File metadata and controls
Code
Blame
183 lines (163 loc) · 8.36 KB
Raw
import math import pickle import random from yaml_tools import read_yaml_from_file from config.ml_models import sklearn_models from config.ml_models import ml_regressors from find_filename import find_dataset_filename from find_filename import find_hyperparams_filename from find_filename import find_model_filename from find_filename import find_other_filename from dataset_manipulation import give_all_symmetries import numpy as np from sklearn import metrics from itertools import combinations from replicating_Dorians_features import compute_features_for_var from test_models import compute_metrics def train_model(ml_model, method): train_data_filename = find_dataset_filename('Train', method=method) hyperparams_file = find_hyperparams_filename(method, ml_model) with open(train_data_filename, 'rb') as train_data_file: train_dataset = pickle.load(train_data_file) hyperparams = read_yaml_from_file(hyperparams_file) current_model = sklearn_models[ml_model] model = current_model(**hyperparams) # model = current_model() model.fit(train_dataset['features'], train_dataset['labels']) trained_model_filename = find_model_filename(method, ml_model) with open(trained_model_filename, 'wb') as trained_model_file: pickle.dump(model, trained_model_file) def train_regression_model(ml_model, method): train_data_filename = find_dataset_filename('Train', method=method) with open(train_data_filename, 'rb') as train_data_file: train_dataset = pickle.load(train_data_file) # hyperparams_file = find_hyperparams_filename(method, ml_model) # hyperparams = read_yaml_from_file(hyperparams_file) train_dataset['features'] = np.asarray([x_t for x_t, t_t in zip(train_dataset['features'], train_dataset['timings']) if t_t[:4] != 'Over'], dtype=float) train_dataset['timings'] = np.asarray([t_t for t_t in train_dataset['timings'] if t_t[:4] != 'Over'], dtype=float) #### # IS THIS REALLY DOING SOMTHING? # What if we used twice timelimit instead current_model = ml_regressors[ml_model] reg = current_model() # **hyperparams) reg.fit(train_dataset['features'], train_dataset['timings']) # trained_model_filename = find_model_filename(method, ml_model, 'regression') # with open(trained_model_filename, 'wb') as trained_model_file: # pickle.dump(reg, trained_model_file) return reg def choose_using_regression(x_test, regressor): timings = regressor.predict(give_all_symmetries(x_test, 0)) return np.argmin(timings) def test_regression_model(method, regressor): test_data_filename = find_dataset_filename('Test', method=method) with open(test_data_filename, 'rb') as test_data_file: x_test, y_test, t_test = pickle.load(test_data_file) x_test = np.asarray([x_t for x_t, t_t in zip(x_test, t_test) if t_t[:4] != 'Over'], dtype=float) y_test = np.asarray([y_t for y_t, t_t in zip(y_test, t_test) if t_t[:4] != 'Over'], dtype=float) y_pred = [choose_using_regression(x_i, regressor) for x_i in x_test] def train_reinforcement_model(ml_model, method='Normal'): train_data_filename = find_dataset_filename('Train', method=method) with open(train_data_filename, 'rb') as train_data_file: train_dataset = pickle.load(train_data_file) # hyperparams_file = find_hyperparams_filename(method, ml_model) # hyperparams = read_yaml_from_file(hyperparams_file) current_model = sklearn_models[ml_model] # model = current_model(**hyperparams) model = current_model() first_polys = train_dataset['projections'][0][0][0] first_features = get_vars_features(first_polys) first_labels = [random.random() for _ in range(len(first_features))] model.fit(first_features, first_labels) training_features, training_labels = [], [] for i in range(30): for projections, timings \ in zip(train_dataset['projections'], train_dataset['timings']): new_training_features, new_training_labels = \ training_instances_reinforcement(model, projections, timings) training_features += new_training_features training_labels += new_training_labels model.fit(training_features, training_labels) print(test_reinforcement_model(model)) trained_model_filename = find_model_filename('reinforcement', ml_model) with open(trained_model_filename, 'wb') as trained_model_file: pickle.dump(model, trained_model_file) def training_instances_reinforcement(model, projections, timings): original_polynomials = projections[0][0] nvar = len(original_polynomials[0][0]) - 1 vars_features = get_vars_features(original_polynomials) evaluations = [model.predict([var_features])[0] for var_features in vars_features] timing = [] for var in range(nvar): # retruns the polynomials after projection wrt var projected_polynomials = projections[var * math.factorial(nvar-1)][1] new_var = var_choice_reinforcement(model, projected_polynomials) ordering_chosen = new_var + var * math.factorial(nvar-1) timing.append(timings[ordering_chosen]) # now compute which part of the difference between # evaluations[i]/evaluations[j] and timing[i]/timing[j] # corresponds to each evaluation instances_features = [] instances_labels = [] pairs = list(combinations(range(nvar), 2)) for i, j in pairs: correction_coefficient = \ math.sqrt((timing[i]/timing[j])/(evaluations[i]/evaluations[j])) instances_features += [vars_features[i], vars_features[j]] instances_labels += [evaluations[i]*correction_coefficient, evaluations[j]/correction_coefficient] return instances_features, instances_labels def get_vars_features(polynomials): '''Will return the features of each variable in the given set of polynomials''' vars_features = [] nvar = len(polynomials[0][0]) - 1 unique_features_filename = find_other_filename("unique_features") with open(unique_features_filename, 'rb') as unique_features_file: unique_names = pickle.load(unique_features_file) for var in range(nvar): var_features, var_names = \ compute_features_for_var(polynomials, var) var_features = [feature for feature, name in zip(var_features, var_names) if name in unique_names] vars_features.append(var_features) return vars_features def var_choice_reinforcement(model, polynomials): '''This function will return the next variable to project chosen by the model trained using reinforcement''' vars_features = get_vars_features(polynomials) evaluations = model.predict(vars_features) return np.argmin(evaluations) def ordering_choice_reinforcement(model, projections): '''This function will return the ordering chosen by the RL model''' nvar = len(projections[0]) ordering = 0 for level in range(nvar-1): polynomials = projections[ordering][level] next_var = var_choice_reinforcement(model, polynomials) ordering += next_var * math.factorial(nvar-1-level) return ordering def test_reinforcement_model(ml_model, method='Normal', nvar=3): train_data_filename = find_dataset_filename('Test', method=method) with open(train_data_filename, 'rb') as train_data_file: testing_dataset = pickle.load(train_data_file) # trained_model_filename = find_model_filename('reinforcement', ml_model) # with open(trained_model_filename, 'rb') as trained_model_file: # model = pickle.load(trained_model_file) model = ml_model chosen_indices = [ordering_choice_reinforcement(model, projections) for projections in testing_dataset['projections']] metrics = compute_metrics(chosen_indices, testing_dataset['labels'], testing_dataset['timings'], testing_dataset['cells'], 'reinfocement') augmented_metrics = {key: metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*metrics[key] for key in metrics} return augmented_metrics
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
You can’t perform that action at this time.