-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtestConsistency.py
73 lines (56 loc) · 3.78 KB
/
testConsistency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import pandas as pd
from collections import defaultdict
from datetime import datetime
from GPy_ABCD.Models.modelSearch import *
from GPy_ABCD.Util.dataAndPlottingUtil import *
from synthetic_datasets import *
def get_model_round(bm, tested_models):
for i in range(len(tested_models)):
if bm.kernel_expression in [m.kernel_expression for m in tested_models[i]]: return i
raise ValueError('Somehow this model is not among the tested models; equality checking failure?')
def one_run_statistics(sorted_models, tested_models, tested_k_exprs, top_n):
res = defaultdict(list)
for m in sorted_models[:top_n]:
res['kex'].append(m.kernel_expression)
res['utility'].append(m.cached_utility_function)
res['round'].append(get_model_round(m, tested_models))
return res
def get_and_save_stats(n_runs_stats, dataset_name = dataset, expected_best = correct_k):
n_iterations = len(n_runs_stats)
final_stats = defaultdict(list)
for i in range(n_iterations):
final_stats['correct'].append(str(expected_best))
final_stats['correct_1st'].append(sum([expected_best == n_runs_stats[j]['kex'][0] for j in range(n_iterations)]) / n_iterations)
final_stats['correct_in_2'].append(sum([expected_best in n_runs_stats[j]['kex'][:1] for j in range(n_iterations)]) / n_iterations)
final_stats['correct_in_3'].append(sum([expected_best in n_runs_stats[j]['kex'][:2] for j in range(n_iterations)]) / n_iterations)
final_stats['1st'].append(str(n_runs_stats[i]['kex'][0]))
final_stats['1st_round'].append(str(n_runs_stats[i]['round'][0]))
final_stats['1st_utility'].append(str(n_runs_stats[i]['utility'][0]))
final_stats['1st_ratio'].append(sum([k == n_runs_stats[i]['kex'][0] for k in [n_runs_stats[j]['kex'][0] for j in range(n_iterations)]]) / n_iterations)
final_stats['1st_in_2'].append(sum([n_runs_stats[i]['kex'][0] in n_runs_stats[j]['kex'][:1] for j in range(n_iterations)]) / n_iterations)
final_stats['1st_in_3'].append(sum([n_runs_stats[i]['kex'][0] in n_runs_stats[j]['kex'][:2] for j in range(n_iterations)]) / n_iterations)
final_stats['2nd'].append(str(n_runs_stats[i]['kex'][1]))
final_stats['2nd_round'].append(str(n_runs_stats[i]['round'][1]))
final_stats['2nd_utility'].append(str(n_runs_stats[i]['utility'][1]))
final_stats['2nd_ratio'].append(sum([k == n_runs_stats[i]['kex'][1] for k in [n_runs_stats[j]['kex'][1] for j in range(n_iterations)]]) / n_iterations)
final_stats['2nd_in_3'].append(sum([n_runs_stats[i]['kex'][1] in n_runs_stats[j]['kex'][:2] for j in range(n_iterations)]) / n_iterations)
final_stats = pd.DataFrame(final_stats)
final_stats.to_csv(f'./Stats/{dataset_name}_{n_iterations}_runs_{datetime.now().strftime("%d-%m-%Y_%H-%M-%S")}')
return final_stats
def save_one_run(dataset_name, expected_best, best_mods, all_mods, all_exprs):
return get_and_save_stats([one_run_statistics(best_mods, all_mods, all_exprs, 5)], dataset_name, expected_best)
# if __name__ == '__main__':
# # np.seterr(all='raise') # Raise exceptions instead of RuntimeWarnings. The exceptions can then be caught by the debugger
#
# n_iterations = 3
# n_runs_stats = []
# for i in range(n_iterations):
# best_mods, all_mods, all_exprs, expanded, not_expanded = explore_model_space(X, Y,
# start_kernels = start_kernels['Default'], p_rules = production_rules['Default'], utility_function = BIC,
# rounds = 2, beam = 3, restarts = 4,
# model_list_fitter = fit_mods_parallel_processes, optimiser = GPy_optimisers[0], verbose = True)
# n_runs_stats.append(one_run_statistics(best_mods, all_mods, all_exprs, 5))
# print(f'{i+1} runs done')
#
# get_and_save_stats(n_runs_stats)