-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlearning_curves.py
137 lines (95 loc) · 3.2 KB
/
learning_curves.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python2.6
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Written (W) 2011-2012 Christian Widmer
# Copyright (C) 2011-2012 Max-Planck-Society
"""
Created on 09.12.2011
@author: Christian Widmer
@summary: Script for computing learning curves
"""
from __future__ import division
#import matplotlib as mpl
#mpl.use('Agg')
from collections import defaultdict
import numpy as np
# for debugging
#import ipdb
import sys
import traceback
import time
import dcd
from data import get_data, coshuffle
import helper
def learning_curve(data_name, solvers):
"""
call different solvers, compare objectives
available solvers:
- finite_diff_primal
- cvxopt_dual_solver
- finite_diff_dual
- dcd
- dcd_shrinking
- dcd_shogun
- mtk_shogun
"""
#solvers = ["mtk_shogun"]
#solvers = ["dcd_shogun"]
num_runs = 10
#fractions = np.linspace(0.1, 1.0, num_runs)
fractions = [float(c) for c in np.exp(np.linspace(np.log(0.1), np.log(1.0), num_runs))]
# keep track of training time
num_xt = np.zeros(num_runs)
train_times = np.zeros((2,num_runs))
for run_id, fraction_data in enumerate(fractions):
data, task_sim = get_data(data_name)
#fig = pylab.figure()
data_subset = defaultdict(dict)
num_xt[run_id] = 0
for task_name in data:
num_total = len(data[task_name]["xt"])
num_subset = int(float(num_total) * fraction_data)
xt, lt = coshuffle(data[task_name]["xt"], data[task_name]["lt"])
data_subset[task_name]["xt"] = xt[0:num_subset]
data_subset[task_name]["lt"] = lt[0:num_subset]
num_xt[run_id] += num_subset
for s_idx, solver in enumerate(solvers):
eps = 1e-3
start_time = time.time()
dcd.train_mtl_svm(data_subset, task_sim, solver, eps, 0, 0)
ttime = time.time() - start_time
print "training time:", ttime, "seconds"
train_times[s_idx,run_id] = ttime
# write progress to file
fn = "results/learning_curve_" + data_name + "_" + solver + ".txt"
txt_file = file(fn, "a")
txt_file.write("num_xt:\t%i\ttime:\t%i\n" % (num_xt[run_id], ttime))
txt_file.close()
# save results
fn = "results/learning_curve_" + data_name + ".pickle"
helper.save(fn, {"num_xt": num_xt, "time": train_times})
def main():
"""
runs experiment in different settings
"""
solvers = ["dcd_shogun", "mtk_shogun"]
#solvers = ["mtk_shogun"]
#learning_curve("landmine", solvers)
#learning_curve("splicing", solvers)
#learning_curve("toy", solvers)
#learning_curve("cancer", solvers)
learning_curve("mnist", solvers)
if __name__ == '__main__':
# enable post-mortem debugging
try:
main()
except:
etype, value, tb = sys.exc_info()
traceback.print_exc()
import ipdb
ipdb.post_mortem(tb)
if __name__ == "pyreport.main":
main()