Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this user
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
delriot
/
AugmentingMathematicalDataset
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
1
Code
Issues
0
Pull requests
0
Projects
0
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Projects
Security
Insights
Files
main
.github
Heuristics
__init__.py
best_features
create_graphs_heuristics.py
extra_metrics.py
heuristic_tools.py
heuristics_guess.py
heuristics_rules.py
config
datasets
packages
utils
README.md
basic_ml.py
choose_hyperparams.py
create_clean_dataset.py
find_filename.py
main.py
main_heuristics.py
main_regression.py
main_reinforcement.py
make_plots.py
output.txt
preprocessing_Dorians_features.py
replicating_Dorians_features.py
requirements.txt
run_for_paper.py
test_models.py
test_train_datasets.py
train_models.py
yaml_tools.py
Breadcrumbs
AugmentingMathematicalDataset
/
Heuristics
/
heuristic_tools.py
Blame
Blame
Latest commit
Tereso del Rio
Now metrics for heuristic, classification and regression can be computed
Sep 14, 2023
2c5d807
·
Sep 14, 2023
History
History
202 lines (163 loc) · 7.43 KB
Breadcrumbs
AugmentingMathematicalDataset
/
Heuristics
/
heuristic_tools.py
Top
File metadata and controls
Code
Blame
202 lines (163 loc) · 7.43 KB
Raw
''' This file contains a variety of functions useful for other functions. ''' import itertools import os import pickle import math import json from numpy import Inf all_heuristics = ['sumsignsumdeg','sotd', 'old_mods', 'mods', 'logmods', 'acc_logmods', 'greedy_sotd', 'brown', 'gmods', 'greedy_logmods', 'random', 'virtual_best'] for degree_type in ['svdeg', 'signdeg', 'deg']: for monomial_operation in ['sum','max','aveg']: for polynomial_operation in ['sum','max','aveg']: all_heuristics += [polynomial_operation + monomial_operation + degree_type] expensive_heuristics = ['sotd', 'old_mods', 'mods', 'logmods', 'acc_logmods'] greedy_heuristics = [heuristic for heuristic in all_heuristics if heuristic not in expensive_heuristics] not_heuristics = ['virtual_best', 'random'] def create_pseudorderings(nvar): ''' This function returns the possible orderings for CAD but written in a different fashion that makes more sense for the way projections are saved ''' l = [list(range(nvar-i)) for i in range(nvar)] return itertools.product(*l) def compute_real_timings(timings, choice_timings_including_str, virtual_best_timings, max_penalization_if_not_finished=Inf): ''' This function returns the timing that are used for computing the metrics ''' return [ choice_time if type(choice_time) is not str else min(max_penalization_if_not_finished*virtual_best_time,2*finding_time_limit(timing)) for timing, choice_time, virtual_best_time in zip(timings, choice_timings_including_str, virtual_best_timings)] def compute_markups(virtual_best_timings, real_timings, smoother = 1): ''' This function computes the markups of the chosen orderings with respect to the virtual_best orderings. The smoother is a parameter applied to avoid unreasonable markups. ''' # the especified smoother is used markups = [(choice_timing-virtual_best_timing)/(virtual_best_timing+smoother) for virtual_best_timing, choice_timing in zip(virtual_best_timings, real_timings)] return sum(markups)/len(markups) def compute_ncells_markup(ncells, guesses,ncell_markup_default = 10): ncells_markups = [ncell[guess]/min([nc for nc in ncell if type(nc)!=str])-1 if type(ncell[guess])!=str else ncell_markup_default for ncell, guess in zip(ncells, guesses)] ncells_markup = sum(ncells_markups)/len(ncells_markups) return ncells_markup def order_mate(order): '''Returns the order that shares the first variable projected with the given order''' if order == 0: return 1 elif order == 1: return 0 elif order == 2: return 3 elif order == 3: return 2 elif order == 4: return 5 elif order == 5: return 4 else: raise Exception('Order too big') def finding_time_limit(timings): ''' Returns the timelimit that was given. ''' if min([timing for timing in timings if type(timing) is not str])>30: return 60 else: return 30 def minimum_indices(given_list): ''' Returns the indices containing the minima of a list. Helpful function for the function above ''' minimum = min(given_list) return [index for index, value in enumerate(given_list) if value == minimum] def multiplyList(myList) : ''' Multiplies all the elements in a list ''' result = 1 for x in myList: result = result * x return result def all_combinations(l): ''' Returns all possible combinations of a given list. More concretely, all possible subsets ordered in all possible ways. ''' combs_with_order = [] for i in range(1,len(l)+1): combs=list(itertools.combinations(l,i)) for comb in combs: combs_with_order+=list(itertools.permutations(comb,i)) return combs_with_order def all_combinations_fixed_length(l, i): ''' Returns all possible combinations of a given list. More concretely, all possible subsets ordered in all possible ways. ''' combs_with_order = [] combs=list(itertools.combinations(l,i)) for comb in combs: combs_with_order+=list(itertools.permutations(comb,i)) return combs_with_order def trim_dataset(dataset, minimum_time_to_consider=0): ''' Returns the dataset containing only the problems that took at least 'minimum_time_to_consider' seconds to finish. ''' projections, targets, timings, heuristics_costs, ncells = dataset new_projections = [projection for projection, timing, target in zip(projections, timings, targets) if timing[target]>minimum_time_to_consider] new_targets = [target for target, timing in zip(targets, timings) if timing[target]>minimum_time_to_consider] new_timings = [timing for timing, target in zip(timings, targets) if timing[target]>minimum_time_to_consider] new_heuristics_costs = [heuristics_cost for heuristics_cost, timing, target in zip(heuristics_costs, timings, targets) if timing[target]>minimum_time_to_consider] new_ncells = [ncells for ncells, timing, target in zip(ncells, timings, targets) if timing[target]>minimum_time_to_consider] return new_projections, new_targets, new_timings, new_heuristics_costs, new_ncells def get_dataset(without_repetition=True, return_ncells=True, minimum_time_to_consider=0): ''' Uploads the desired dataset from its location ''' if without_repetition: aux_name = 'without_repetition' else: aux_name = 'with_repetition' if return_ncells: dataset_location = os.path.join(os.path.dirname(__file__), '..','Datasets','ThreeVariableSMTLIB2021','dataset_'+aux_name+'_return_ncells.txt') #dataset_location = 'C:\\Users\\delriot\\OneDrive - Coventry University\\03Repositories\\01DEWCADCoventry\\Datasets\\dataset_without_repetition_return_ncells.txt' else: dataset_location = os.path.join(os.path.dirname(__file__), '..','Datasets','dataset_'+aux_name+'.txt') f = open(dataset_location, 'rb') dataset = pickle.load(f) f.close() return trim_dataset(dataset, minimum_time_to_consider=minimum_time_to_consider) def aveg_of_not_zero(given_list): ''' Takes the average of a list without considering the elements that are 0.''' s= sum(given_list) if s>0: return s/sum([1 for elem in given_list if elem>0]) else: return 0 def substract_two_timings(time1, time2): '''time1 minus time2''' if type(time1) is str and type(time2) is str: return 0 elif type(time1) is str and type(time2) is not str: return 30 elif type(time1) is not str and type(time2) is str: return -30 elif type(time1) is not str and type(time2) is not str: return time1-time2 # This is how to save the best features file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'best_features') # best_features = ['summaxdeg', 'avegavegdeg', 'sumsumdeg', 'avegavegsigndeg' , 'sumsignsumdeg', 'summaxsvdeg']# , 'sumsumsigndeg', 'sumsumsvdeg' # with open(file_path, 'w') as file: # json.dump(best_features, file) # This is how to load the best features with open(file_path, 'r') as file: best_features = json.load(file) paper_all_pos = all_combinations(best_features) indices = list(range(len(best_features))) paper_all_indices = [str(elem).replace(', ','>').replace('(','').replace(')','') for elem in all_combinations(indices)] existing_heuristics = ['brown', 'mods', 36, 'random', 'virtual_best'] # 36 is gmods survival_plot_heuristics = ['virtual_best', 36, 'brown'] ml_models = []
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
You can’t perform that action at this time.