-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Now heuristics and regression also available
Tereso del Rio
committed
Sep 14, 2023
1 parent
2c5d807
commit 94aaa66
Showing
3 changed files
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import csv | ||
import math | ||
import pickle | ||
import random | ||
import numpy as np | ||
from Heuristics.heuristics_guess import not_greedy_heuristic_guess | ||
from Heuristics.heuristics_guess import choose_order_given_projections | ||
from find_filename import find_dataset_filename | ||
from test_models import compute_metrics | ||
|
||
nvar = 3 | ||
testing_method = 'Normal' | ||
test_dataset_filename = find_dataset_filename('Test', | ||
testing_method) | ||
with open(test_dataset_filename, 'rb') as test_dataset_file: | ||
testing_dataset = pickle.load(test_dataset_file) | ||
output_file = "heuristics_output_acc_time.csv" | ||
|
||
# Testing in heuristics that make all the choice at once | ||
first_heuristic = 1 | ||
for heuristic in ['gmods', 'brown', 'random', 'virtual best']: | ||
reps = 100 | ||
sum_metrics = dict() | ||
for i in range(reps): | ||
if heuristic == 'virtual best': | ||
chosen_indices = [np.argmin(timings) for timings in testing_dataset['timings']] | ||
elif heuristic == 'random': | ||
chosen_indices = [random.randint(0, 5) for timings in testing_dataset['timings']] | ||
else: | ||
chosen_indices = [not_greedy_heuristic_guess(projection[0][0], heuristic) | ||
for projection in testing_dataset['projections']] | ||
metrics = compute_metrics(chosen_indices, | ||
testing_dataset['labels'], | ||
testing_dataset['timings'], | ||
testing_dataset['cells']) | ||
if len(sum_metrics) == 0: | ||
sum_metrics = metrics | ||
else: | ||
sum_metrics = {key: metrics[key] + sum_metrics[key] for key in metrics} | ||
aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics} | ||
augmented_metrics = {key: aveg_metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*aveg_metrics[key] for key in sum_metrics} | ||
|
||
print(heuristic, augmented_metrics) | ||
if first_heuristic == 1: | ||
first_heuristic = 0 | ||
keys = list(augmented_metrics.keys()) | ||
with open(output_file, 'a') as f: | ||
f.write('Choosing the whole ordering in the beggining \n') | ||
f.write(', '.join(['Model'] + keys) + '\n') | ||
with open(output_file, 'a', newline='') as f: | ||
writer = csv.writer(f) | ||
writer.writerow([heuristic] + [augmented_metrics[key] for key in keys]) | ||
|
||
# Testing on greedy heuristics | ||
for heuristic in ['brown', 'gmods', 'random', 'virtual best']: | ||
reps = 100 | ||
sum_metrics = dict() | ||
for i in range(reps): | ||
if heuristic == 'virtual best': | ||
chosen_indices = [np.argmin(timings) for timings in testing_dataset['timings']] | ||
elif heuristic == 'random': | ||
chosen_indices = [random.randint(0, 5) for timings in testing_dataset['timings']] | ||
else: | ||
chosen_indices = [choose_order_given_projections(projection, heuristic) | ||
for projection in testing_dataset['projections']] | ||
metrics = compute_metrics(chosen_indices, | ||
testing_dataset['labels'], | ||
testing_dataset['timings'], | ||
testing_dataset['cells']) | ||
if len(sum_metrics) == 0: | ||
sum_metrics = metrics | ||
else: | ||
sum_metrics = {key: metrics[key] + sum_metrics[key] for key in metrics} | ||
aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics} | ||
augmented_metrics = {key: aveg_metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*aveg_metrics[key] for key in sum_metrics} | ||
|
||
print(heuristic, augmented_metrics) | ||
if first_heuristic == 1: | ||
first_heuristic = 0 | ||
keys = list(augmented_metrics.keys()) | ||
with open(output_file, 'a') as f: | ||
f.write('Now choosing greedily \n') | ||
f.write(', '.join(['Model'] + keys) + '\n') | ||
with open(output_file, 'a', newline='') as f: | ||
writer = csv.writer(f) | ||
writer.writerow([heuristic] + [augmented_metrics[key] for key in keys]) | ||
# print(sum(min(timings) for timings in testing_dataset['timings'])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
""" | ||
The experiments in [1] are replicated with some changes. | ||
|
||
The first change is that the testing data is balanced, so that all labels | ||
are almost equally common. | ||
Then we use three training sets; dataset as in [1], balanced dataset | ||
and data augmentation dataset. | ||
|
||
[1]Florescu, D., England, M. (2020). A Machine Learning Based Software Pipeline | ||
to Pick the Variable Ordering for Algorithms with Polynomial Inputs. | ||
Bigatti, A., Carette, J., Davenport, J., Joswig, M., de Wolff, T. (eds) | ||
Mathematical Software, ICMS 2020. ICMS 2020. Lecture Notes in Computer Science, | ||
vol 12097. Springer, Cham. https://doi.org/10.1007/978-3-030-52200-1_30 | ||
""" | ||
import csv | ||
from config.ml_models import ml_regressors | ||
from create_clean_dataset import cleaning_dataset | ||
from test_train_datasets import create_train_test_datasets | ||
from test_train_datasets import create_regression_datasets | ||
from choose_hyperparams import choose_hyperparams | ||
from train_models import train_model | ||
# from test_models import test_regressor | ||
from test_models import test_model | ||
|
||
|
||
# Hyperparameter tuning take a very long time, | ||
# if tune_hyperparameters is used to decide whether to tune them | ||
# or to used previously tuned | ||
tune_hyperparameters = False | ||
taking_logarithms = False | ||
|
||
|
||
# cleaning_dataset() | ||
# create_train_test_datasets() | ||
create_regression_datasets(taking_logarithms=taking_logarithms) | ||
|
||
paradigm = "regression" | ||
# if tune_hyperparameters: | ||
# for ml_model in ml_regressors: | ||
# print(f"Choosing hyperparameters for {ml_model} in {paradigm}") | ||
# choose_hyperparams(ml_model, paradigm) | ||
for ml_model in ml_regressors: | ||
print(f"Training {ml_model}") | ||
print(f"for {paradigm}") | ||
train_model(ml_model, paradigm) | ||
testing_method = 'augmented' | ||
output_file = "regression_output_acc_time.csv" | ||
# with open(output_file, 'a') as f: | ||
# f.write("Now without logarithms and without aveg_not_zero\n") | ||
|
||
first_time = 1 | ||
for ml_model in ml_regressors: | ||
### | ||
# For KNNR running properly X.shape[0] has been changed to len(X) | ||
# in line 240 of | ||
# C:\Software\Python37\Lib\site-packages\sklearn\neighbors\_regression.py | ||
print(f"Testing models trained in {ml_model}") | ||
metrics = test_model(ml_model, paradigm=paradigm, | ||
testing_method=testing_method) | ||
if first_time == 1: | ||
first_time = 0 | ||
keys = list(metrics.keys()) | ||
with open(output_file, 'a') as f: | ||
f.write('No more cheating; no taking logarithms also\n') | ||
f.write(', '.join(['Model'] + keys) + '\n') | ||
with open(output_file, 'a', newline='') as f: | ||
writer = csv.writer(f) | ||
writer.writerow([ml_model] + [metrics[key] for key in keys]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
"""Make some plots""" | ||
import os | ||
import numpy as np | ||
from numpy import sort | ||
import matplotlib | ||
import matplotlib.pyplot as plt | ||
from matplotlib.pyplot import cm | ||
matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans' | ||
matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic' | ||
matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold' | ||
matplotlib.rcParams['mathtext.fontset'] = 'cm' | ||
matplotlib.rcParams['font.family'] = 'STIXGeneral' | ||
|
||
fontsize = 15 | ||
desired_font = {'fontname': 'monospace'} | ||
matplotlib.rcParams.update({'font.size': fontsize}) | ||
|
||
|
||
def survival_plot(timings: dict, plot_name="survival_plot"): | ||
"""Receive a dictionary where the keys are the name | ||
of the methos and the timings that took for each of | ||
the problems""" | ||
color = cm.rainbow(np.linspace(0, 1, len(timings)+1)) | ||
# color[4]=[0.8,0.8,0.2,1] | ||
# color[3]=[0.65,0.42,0.42,1] | ||
# color[2]=[0.00,1,0.5,1] | ||
# color = ['0','0.5','0','0.5','0','0.5'] | ||
style = ['--'] * len(timings) | ||
dashes = [(1, 0), (5, 1), (5, 1, 1, 1), (2, 1, 2, 1), (1, 1), (5, 5)]\ | ||
+ [(1, 0)] * len(timings) | ||
|
||
for method, c, s, d in zip(timings, color, style, dashes): | ||
not_timeout_timings = [timing for timing in timings[method] | ||
if timing != 30 and timing != 60] | ||
sorted_timings = sort(not_timeout_timings) | ||
accumulative_timings = [sum(sorted_timings[:i]) | ||
for i in range(len(sorted_timings))] | ||
# plotting | ||
plt.plot(accumulative_timings, list(range(len(accumulative_timings))), | ||
s, color=c, label=method, dashes=d) | ||
plt.xlabel('Time', fontsize=fontsize) | ||
plt.ylabel('No. problems finished', fontsize=fontsize) | ||
plt.legend(prop={'family': 'monospace', 'size': fontsize-2}, | ||
loc='lower right') | ||
figure_location = os.path.join(os.path.dirname(__file__), 'Art', | ||
f'{plot_name}.png') | ||
plt.savefig(figure_location) | ||
plt.cla() |