Skip to content

Commit

Permalink
Some unused code removed
Browse files Browse the repository at this point in the history
Tereso del Rio committed Sep 23, 2023
1 parent c33c4a6 commit d287c98
Showing 8 changed files with 74 additions and 502 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ on: [push]

jobs:
test:
runs-on: ubuntu-latest
runs-on: windows-latest

steps:
- name: Checkout code
29 changes: 14 additions & 15 deletions create_clean_dataset.py
Original file line number Diff line number Diff line change
@@ -11,25 +11,24 @@
from dataset_manipulation import remove_notunique_features
else:
from packages.dataset_manipulation import remove_notunique_features
from from_poly_set_to_features import poly_set_feature_extractor
from find_filename import find_dataset_filename
from find_filename import find_other_filename


def create_dataframe(dataset):
all_features = []
all_labels = dataset[1][:]
all_timings = dataset[2][:]
all_original_polynomials = []
for index, all_projections in enumerate(dataset[0]):
original_polynomials = all_projections[0][0]
all_original_polynomials.append(original_polynomials)
names, all_features =\
poly_set_feature_extractor(all_original_polynomials,
determine_standarization=True,
determine_unique_features=True)
return np.array(all_original_polynomials), np.array(names),\
np.array(all_features), np.array(all_labels), np.array(all_timings)
# def create_dataframe(dataset):
# all_features = []
# all_labels = dataset[1][:]
# all_timings = dataset[2][:]
# all_original_polynomials = []
# for index, all_projections in enumerate(dataset[0]):
# original_polynomials = all_projections[0][0]
# all_original_polynomials.append(original_polynomials)
# names, all_features =\
# poly_set_feature_extractor(all_original_polynomials,
# determine_standarization=True,
# determine_unique_features=True)
# return np.array(all_original_polynomials), np.array(names),\
# np.array(all_features), np.array(all_labels), np.array(all_timings)


# dataset_filename = os.path.join(os.path.dirname(__file__),
125 changes: 0 additions & 125 deletions from_poly_set_to_features.py

This file was deleted.

11 changes: 0 additions & 11 deletions packages/dataset_manipulation/dataset_manipulation.py
Original file line number Diff line number Diff line change
@@ -2,8 +2,6 @@
import numpy as np
import math
import random
from .exploit_symmetries import give_all_symmetries
from .exploit_symmetries import augmentate_timings
from itertools import permutations
# from sklearn.preprocessing import normalize

@@ -106,15 +104,6 @@ def name_unique_features(names, features):
return new_names


def get_unique_feature_names(unique_names, names, features):
"""Return the features corresponding to a name in 'unique_names'."""
unique_features = []
for index, feature in enumerate(zip(*features)):
if names[index] in unique_names:
unique_features.append(feature)
return np.transpose(unique_features)


def remove_notunique_features(names, features, nvar=3):
# creating some targets and timing because the function requires them
timings = [list(range(math.factorial(nvar)))]*len(features)
88 changes: 0 additions & 88 deletions packages/dataset_manipulation/exploit_symmetries.py

This file was deleted.

6 changes: 1 addition & 5 deletions replicating_Dorians_features.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
"""
IS THIS BEING USED?
YES, IT IS!
"""

import itertools
# from xml.sax.handler import all_features
import numpy as np

from config.general_values import operations


111 changes: 15 additions & 96 deletions test_models.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
import csv
import math
import pickle
import importlib.util
import numpy as np
from sklearn import metrics
from config.general_values import dataset_qualities
from config.ml_models import all_models


from config.ml_models import regressors
from config.ml_models import classifiers
from config.ml_models import heuristics
from find_filename import find_output_filename
from find_filename import find_dataset_filename
from find_filename import find_model_filename
from main_heuristics import ordering_choices_heuristics
# from train_models import ordering_choice_reinforcement
@@ -22,97 +17,21 @@
from packages.dataset_manipulation.dataset_manipulation import augmentate_instance


# def test_model(trained_model_filename, test_dataset_filename):
# def test_model(ml_model, paradigm, testing_method='Augmented'):
# test_dataset_filename = find_dataset_filename('Test',
# testing_method)
# with open(test_dataset_filename, 'rb') as test_dataset_file:
# testing_dataset = pickle.load(test_dataset_file)
# trained_model_filename = find_model_filename(paradigm,
# ml_model)
# with open(trained_model_filename, 'rb') as trained_model_file:
# model = pickle.load(trained_model_file)
# with open(test_dataset_filename, 'rb') as test_dataset_file:
# x_test, y_test, _ = pickle.load(test_dataset_file)
# y_pred = model.predict(x_test)
# return metrics.accuracy_score(y_test, y_pred)


def test_results(training_method):
output_filename = find_output_filename(training_method)
with open(output_filename, 'w') as output_file:
writer_balanced = csv.writer(output_file)
writer_balanced.writerow(["Name"] + dataset_qualities)
for ml_model in all_models:
trained_model_filename = find_model_filename(training_method,
ml_model)
accuracy = dict()
for testing_method in dataset_qualities:
test_dataset_filename = find_dataset_filename('Test',
testing_method)
accuracy[testing_method] = test_model(trained_model_filename,
test_dataset_filename)
round_accuracies = [round(acc, 2)
for acc in [accuracy[method]
for method in dataset_qualities]]
writer_balanced.writerow([ml_model + "-" + training_method] +
round_accuracies)


def test_classifier(ml_model, testing_method='Augmented'):
trained_model_filename = find_model_filename('Classification',
ml_model)
test_dataset_filename = find_dataset_filename('Test',
testing_method)
with open(trained_model_filename, 'rb') as trained_model_file:
model = pickle.load(trained_model_file)
with open(test_dataset_filename, 'rb') as test_dataset_file:
x_test, y_test, all_timings = pickle.load(test_dataset_file)
chosen_indices = [return_regressor_choice(model, features) for features in x_test]
return compute_metrics(chosen_indices, y_test, all_timings)


def timings_in_test(model, testing_method='Augmented', training_method=None):
test_dataset_filename = find_dataset_filename('Test',
testing_method)
with open(test_dataset_filename, 'rb') as test_dataset_file:
x_test, _, all_timings = pickle.load(test_dataset_file)
if model == 'optimal':
t_pred = [min(timings) for timings in all_timings]
else:
trained_model_filename = find_model_filename(training_method,
model)
with open(trained_model_filename, 'rb') as trained_model_file:
model = pickle.load(trained_model_file)
y_pred = model.predict(x_test)
# This doesn't work because agumenteed and balanced
# only return one timing, not 6
t_pred = [timings[y] for timings, y in zip(all_timings, y_pred)]
return t_pred


def test_regressor(ml_model):
trained_model_filename = find_model_filename('Regression',
ml_model)
test_dataset_filename = find_dataset_filename('Test',
'Regression')
with open(trained_model_filename, 'rb') as trained_model_file:
model = pickle.load(trained_model_file)
with open(test_dataset_filename, 'rb') as test_dataset_file:
x_test, y_test, all_timings = pickle.load(test_dataset_file)
y_pred = model.predict(x_test)
avg_error = sum([abs(p-t) for p, t in zip(y_pred, y_test)])/len(y_pred)
print(f"{ml_model} gave {avg_error}")


def test_model(ml_model, paradigm, testing_method='Augmented'):
test_dataset_filename = find_dataset_filename('Test',
testing_method)
with open(test_dataset_filename, 'rb') as test_dataset_file:
testing_dataset = pickle.load(test_dataset_file)
trained_model_filename = find_model_filename(paradigm,
ml_model)
with open(trained_model_filename, 'rb') as trained_model_file:
model = pickle.load(trained_model_file)
chosen_indices = choose_indices(model, testing_dataset)
return compute_metrics(chosen_indices,
testing_dataset['labels'],
testing_dataset['timings'],
testing_dataset['cells'],
ml_model)
# chosen_indices = choose_indices(model, testing_dataset)
# return compute_metrics(chosen_indices,
# testing_dataset['labels'],
# testing_dataset['timings'],
# testing_dataset['cells'],
# ml_model)


def choose_indices(model_name, testing_dataset, paradigm='', training_quality='Augmented'):
204 changes: 43 additions & 161 deletions train_models.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
import math

import pickle
import random
from yaml_tools import read_yaml_from_file
from config.ml_models import all_models
from find_filename import find_dataset_filename
from find_filename import find_hyperparams_filename
from find_filename import find_model_filename
from find_filename import find_other_filename
from dataset_manipulation import give_all_symmetries
import numpy as np
# from find_filename import find_other_filename
# from dataset_manipulation import give_all_symmetries
# import numpy as np
# from sklearn import metrics
from itertools import combinations
from replicating_Dorians_features import compute_features_for_var
from test_models import compute_metrics
# from itertools import combinations
# from replicating_Dorians_features import compute_features_for_var
# from test_models import compute_metrics


def train_model(model_name, paradigm, training_quality):
@@ -33,156 +32,39 @@ def train_model(model_name, paradigm, training_quality):
return model


def train_regression_model(model_name, method):
train_data_filename = find_dataset_filename('Train', method=method)
with open(train_data_filename, 'rb') as train_data_file:
train_dataset = pickle.load(train_data_file)
# hyperparams_file = find_hyperparams_filename(method, model_name)
# hyperparams = read_yaml_from_file(hyperparams_file)
train_dataset['features'] = np.asarray([x_t for x_t, t_t in zip(train_dataset['features'], train_dataset['timings'])
if t_t[:4] != 'Over'], dtype=float)
train_dataset['timings'] = np.asarray([t_t for t_t in train_dataset['timings']
if t_t[:4] != 'Over'], dtype=float)
####
# IS THIS REALLY DOING SOMTHING?
# What if we used twice timelimit instead
current_model = ml_regressors[model_name]
reg = current_model() # **hyperparams)
reg.fit(train_dataset['features'], train_dataset['timings'])
# trained_model_filename = find_model_filename(method, model_name, 'regression')
# with open(trained_model_filename, 'wb') as trained_model_file:
# pickle.dump(reg, trained_model_file)
return reg


def choose_using_regression(x_test, regressor):
timings = regressor.predict(give_all_symmetries(x_test, 0))
return np.argmin(timings)


def test_regression_model(method, regressor):
test_data_filename = find_dataset_filename('Test', method=method)
with open(test_data_filename, 'rb') as test_data_file:
x_test, y_test, t_test = pickle.load(test_data_file)
x_test = np.asarray([x_t for x_t, t_t in zip(x_test, t_test)
if t_t[:4] != 'Over'], dtype=float)
y_test = np.asarray([y_t for y_t, t_t in zip(y_test, t_test)
if t_t[:4] != 'Over'], dtype=float)
y_pred = [choose_using_regression(x_i, regressor) for x_i in x_test]


def train_reinforcement_model(model_name, method='Normal'):
train_data_filename = find_dataset_filename('Train', method=method)
with open(train_data_filename, 'rb') as train_data_file:
train_dataset = pickle.load(train_data_file)
# hyperparams_file = find_hyperparams_filename(method, model_name)
# hyperparams = read_yaml_from_file(hyperparams_file)
current_model = all_models[model_name]
# model = current_model(**hyperparams)
model = current_model()
first_polys = train_dataset['projections'][0][0][0]
first_features = get_vars_features(first_polys)
first_labels = [random.random() for _ in range(len(first_features))]
model.fit(first_features, first_labels)
training_features, training_labels = [], []
for i in range(30):
for projections, timings \
in zip(train_dataset['projections'], train_dataset['timings']):
new_training_features, new_training_labels = \
training_instances_reinforcement(model, projections, timings)
training_features += new_training_features
training_labels += new_training_labels
model.fit(training_features, training_labels)
print(test_reinforcement_model(model))
trained_model_filename = find_model_filename('reinforcement', model_name)
with open(trained_model_filename, 'wb') as trained_model_file:
pickle.dump(model, trained_model_file)


def training_instances_reinforcement(model, projections, timings):
original_polynomials = projections[0][0]
nvar = len(original_polynomials[0][0]) - 1
vars_features = get_vars_features(original_polynomials)
evaluations = [model.predict([var_features])[0]
for var_features in vars_features]
timing = []
for var in range(nvar):
# retruns the polynomials after projection wrt var
projected_polynomials = projections[var * math.factorial(nvar-1)][1]
new_var = var_choice_reinforcement(model, projected_polynomials)
ordering_chosen = new_var + var * math.factorial(nvar-1)
timing.append(timings[ordering_chosen])
# now compute which part of the difference between
# evaluations[i]/evaluations[j] and timing[i]/timing[j]
# corresponds to each evaluation
instances_features = []
instances_labels = []
pairs = list(combinations(range(nvar), 2))
for i, j in pairs:
correction_coefficient = \
math.sqrt((timing[i]/timing[j])/(evaluations[i]/evaluations[j]))
instances_features += [vars_features[i], vars_features[j]]
instances_labels += [evaluations[i]*correction_coefficient,
evaluations[j]/correction_coefficient]
return instances_features, instances_labels


def get_vars_features(polynomials):
'''Will return the features of each variable
in the given set of polynomials'''
vars_features = []
nvar = len(polynomials[0][0]) - 1
unique_features_filename = find_other_filename("unique_features")
with open(unique_features_filename, 'rb') as unique_features_file:
unique_names = pickle.load(unique_features_file)
for var in range(nvar):
var_features, var_names = \
compute_features_for_var(polynomials, var)
var_features = [feature for feature, name
in zip(var_features, var_names)
if name in unique_names]
vars_features.append(var_features)
return vars_features


def var_choice_reinforcement(model, polynomials):
'''This function will return the next variable to project
chosen by the model trained using reinforcement'''
vars_features = get_vars_features(polynomials)
evaluations = model.predict(vars_features)
min_value = np.min(evaluations)
min_indices = np.where(evaluations == min_value)[0]
# Randomly select one of the minimal indices
return np.random.choice(min_indices)


def ordering_choice_reinforcement(model, projections):
'''This function will return the ordering chosen by the RL model'''
nvar = len(projections[0])
ordering = 0
for level in range(nvar-1):
polynomials = projections[ordering][level]
next_var = var_choice_reinforcement(model, polynomials)
ordering += next_var * math.factorial(nvar-1-level)
return ordering


def test_reinforcement_model(model_name, method='Normal', nvar=3):
train_data_filename = find_dataset_filename('Test', method=method)
with open(train_data_filename, 'rb') as train_data_file:
testing_dataset = pickle.load(train_data_file)
# trained_model_filename = find_model_filename('reinforcement', model_name)
# with open(trained_model_filename, 'rb') as trained_model_file:
# model = pickle.load(trained_model_file)
model = model_name
chosen_indices = [ordering_choice_reinforcement(model, projections)
for projections in testing_dataset['projections']]
metrics = compute_metrics(chosen_indices,
testing_dataset['labels'],
testing_dataset['timings'],
testing_dataset['cells'],
'reinfocement')
augmented_metrics = {key: metrics[key] if key in ['Accuracy', 'Markup']
else math.factorial(nvar)*metrics[key]
for key in metrics}
return augmented_metrics
# def train_regression_model(model_name, method):
# train_data_filename = find_dataset_filename('Train', method=method)
# with open(train_data_filename, 'rb') as train_data_file:
# train_dataset = pickle.load(train_data_file)
# # hyperparams_file = find_hyperparams_filename(method, model_name)
# # hyperparams = read_yaml_from_file(hyperparams_file)
# train_dataset['features'] = np.asarray([x_t for x_t, t_t in zip(train_dataset['features'], train_dataset['timings'])
# if t_t[:4] != 'Over'], dtype=float)
# train_dataset['timings'] = np.asarray([t_t for t_t in train_dataset['timings']
# if t_t[:4] != 'Over'], dtype=float)
# ####
# # IS THIS REALLY DOING SOMTHING?
# # What if we used twice timelimit instead
# current_model = ml_regressors[model_name]
# reg = current_model() # **hyperparams)
# reg.fit(train_dataset['features'], train_dataset['timings'])
# # trained_model_filename = find_model_filename(method, model_name, 'regression')
# # with open(trained_model_filename, 'wb') as trained_model_file:
# # pickle.dump(reg, trained_model_file)
# return reg


# def choose_using_regression(x_test, regressor):
# timings = regressor.predict(give_all_symmetries(x_test, 0))
# return np.argmin(timings)


# def test_regression_model(method, regressor):
# test_data_filename = find_dataset_filename('Test', method=method)
# with open(test_data_filename, 'rb') as test_data_file:
# x_test, y_test, t_test = pickle.load(test_data_file)
# x_test = np.asarray([x_t for x_t, t_t in zip(x_test, t_test)
# if t_t[:4] != 'Over'], dtype=float)
# y_test = np.asarray([y_t for y_t, t_t in zip(y_test, t_test)
# if t_t[:4] != 'Over'], dtype=float)
# y_pred = [choose_using_regression(x_i, regressor) for x_i in x_test]

0 comments on commit d287c98

Please sign in to comment.