Skip to content

Commit

Permalink
Now heuristics and regression also available
Browse files Browse the repository at this point in the history
Tereso del Rio committed Sep 14, 2023
1 parent 2c5d807 commit 94aaa66
Showing 3 changed files with 203 additions and 0 deletions.
87 changes: 87 additions & 0 deletions main_heuristics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import csv
import math
import pickle
import random
import numpy as np
from Heuristics.heuristics_guess import not_greedy_heuristic_guess
from Heuristics.heuristics_guess import choose_order_given_projections
from find_filename import find_dataset_filename
from test_models import compute_metrics

nvar = 3
testing_method = 'Normal'
test_dataset_filename = find_dataset_filename('Test',
testing_method)
with open(test_dataset_filename, 'rb') as test_dataset_file:
testing_dataset = pickle.load(test_dataset_file)
output_file = "heuristics_output_acc_time.csv"

# Testing in heuristics that make all the choice at once
first_heuristic = 1
for heuristic in ['gmods', 'brown', 'random', 'virtual best']:
reps = 100
sum_metrics = dict()
for i in range(reps):
if heuristic == 'virtual best':
chosen_indices = [np.argmin(timings) for timings in testing_dataset['timings']]
elif heuristic == 'random':
chosen_indices = [random.randint(0, 5) for timings in testing_dataset['timings']]
else:
chosen_indices = [not_greedy_heuristic_guess(projection[0][0], heuristic)
for projection in testing_dataset['projections']]
metrics = compute_metrics(chosen_indices,
testing_dataset['labels'],
testing_dataset['timings'],
testing_dataset['cells'])
if len(sum_metrics) == 0:
sum_metrics = metrics
else:
sum_metrics = {key: metrics[key] + sum_metrics[key] for key in metrics}
aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
augmented_metrics = {key: aveg_metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*aveg_metrics[key] for key in sum_metrics}

print(heuristic, augmented_metrics)
if first_heuristic == 1:
first_heuristic = 0
keys = list(augmented_metrics.keys())
with open(output_file, 'a') as f:
f.write('Choosing the whole ordering in the beggining \n')
f.write(', '.join(['Model'] + keys) + '\n')
with open(output_file, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow([heuristic] + [augmented_metrics[key] for key in keys])

# Testing on greedy heuristics
for heuristic in ['brown', 'gmods', 'random', 'virtual best']:
reps = 100
sum_metrics = dict()
for i in range(reps):
if heuristic == 'virtual best':
chosen_indices = [np.argmin(timings) for timings in testing_dataset['timings']]
elif heuristic == 'random':
chosen_indices = [random.randint(0, 5) for timings in testing_dataset['timings']]
else:
chosen_indices = [choose_order_given_projections(projection, heuristic)
for projection in testing_dataset['projections']]
metrics = compute_metrics(chosen_indices,
testing_dataset['labels'],
testing_dataset['timings'],
testing_dataset['cells'])
if len(sum_metrics) == 0:
sum_metrics = metrics
else:
sum_metrics = {key: metrics[key] + sum_metrics[key] for key in metrics}
aveg_metrics = {key: sum_metrics[key]/reps for key in sum_metrics}
augmented_metrics = {key: aveg_metrics[key] if key in ['Accuracy', 'Markup'] else math.factorial(nvar)*aveg_metrics[key] for key in sum_metrics}

print(heuristic, augmented_metrics)
if first_heuristic == 1:
first_heuristic = 0
keys = list(augmented_metrics.keys())
with open(output_file, 'a') as f:
f.write('Now choosing greedily \n')
f.write(', '.join(['Model'] + keys) + '\n')
with open(output_file, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow([heuristic] + [augmented_metrics[key] for key in keys])
# print(sum(min(timings) for timings in testing_dataset['timings']))
68 changes: 68 additions & 0 deletions main_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
The experiments in [1] are replicated with some changes.

The first change is that the testing data is balanced, so that all labels
are almost equally common.
Then we use three training sets; dataset as in [1], balanced dataset
and data augmentation dataset.

[1]Florescu, D., England, M. (2020). A Machine Learning Based Software Pipeline
to Pick the Variable Ordering for Algorithms with Polynomial Inputs.
Bigatti, A., Carette, J., Davenport, J., Joswig, M., de Wolff, T. (eds)
Mathematical Software, ICMS 2020. ICMS 2020. Lecture Notes in Computer Science,
vol 12097. Springer, Cham. https://doi.org/10.1007/978-3-030-52200-1_30
"""
import csv
from config.ml_models import ml_regressors
from create_clean_dataset import cleaning_dataset
from test_train_datasets import create_train_test_datasets
from test_train_datasets import create_regression_datasets
from choose_hyperparams import choose_hyperparams
from train_models import train_model
# from test_models import test_regressor
from test_models import test_model


# Hyperparameter tuning take a very long time,
# if tune_hyperparameters is used to decide whether to tune them
# or to used previously tuned
tune_hyperparameters = False
taking_logarithms = False


# cleaning_dataset()
# create_train_test_datasets()
create_regression_datasets(taking_logarithms=taking_logarithms)

paradigm = "regression"
# if tune_hyperparameters:
# for ml_model in ml_regressors:
# print(f"Choosing hyperparameters for {ml_model} in {paradigm}")
# choose_hyperparams(ml_model, paradigm)
for ml_model in ml_regressors:
print(f"Training {ml_model}")
print(f"for {paradigm}")
train_model(ml_model, paradigm)
testing_method = 'augmented'
output_file = "regression_output_acc_time.csv"
# with open(output_file, 'a') as f:
# f.write("Now without logarithms and without aveg_not_zero\n")

first_time = 1
for ml_model in ml_regressors:
###
# For KNNR running properly X.shape[0] has been changed to len(X)
# in line 240 of
# C:\Software\Python37\Lib\site-packages\sklearn\neighbors\_regression.py
print(f"Testing models trained in {ml_model}")
metrics = test_model(ml_model, paradigm=paradigm,
testing_method=testing_method)
if first_time == 1:
first_time = 0
keys = list(metrics.keys())
with open(output_file, 'a') as f:
f.write('No more cheating; no taking logarithms also\n')
f.write(', '.join(['Model'] + keys) + '\n')
with open(output_file, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow([ml_model] + [metrics[key] for key in keys])
48 changes: 48 additions & 0 deletions make_plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Make some plots"""
import os
import numpy as np
from numpy import sort
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
matplotlib.rcParams['mathtext.rm'] = 'Bitstream Vera Sans'
matplotlib.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic'
matplotlib.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold'
matplotlib.rcParams['mathtext.fontset'] = 'cm'
matplotlib.rcParams['font.family'] = 'STIXGeneral'

fontsize = 15
desired_font = {'fontname': 'monospace'}
matplotlib.rcParams.update({'font.size': fontsize})


def survival_plot(timings: dict, plot_name="survival_plot"):
"""Receive a dictionary where the keys are the name
of the methos and the timings that took for each of
the problems"""
color = cm.rainbow(np.linspace(0, 1, len(timings)+1))
# color[4]=[0.8,0.8,0.2,1]
# color[3]=[0.65,0.42,0.42,1]
# color[2]=[0.00,1,0.5,1]
# color = ['0','0.5','0','0.5','0','0.5']
style = ['--'] * len(timings)
dashes = [(1, 0), (5, 1), (5, 1, 1, 1), (2, 1, 2, 1), (1, 1), (5, 5)]\
+ [(1, 0)] * len(timings)

for method, c, s, d in zip(timings, color, style, dashes):
not_timeout_timings = [timing for timing in timings[method]
if timing != 30 and timing != 60]
sorted_timings = sort(not_timeout_timings)
accumulative_timings = [sum(sorted_timings[:i])
for i in range(len(sorted_timings))]
# plotting
plt.plot(accumulative_timings, list(range(len(accumulative_timings))),
s, color=c, label=method, dashes=d)
plt.xlabel('Time', fontsize=fontsize)
plt.ylabel('No. problems finished', fontsize=fontsize)
plt.legend(prop={'family': 'monospace', 'size': fontsize-2},
loc='lower right')
figure_location = os.path.join(os.path.dirname(__file__), 'Art',
f'{plot_name}.png')
plt.savefig(figure_location)
plt.cla()

0 comments on commit 94aaa66

Please sign in to comment.