-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit a5d8148
Showing
11 changed files
with
906 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#import random | ||
import torch | ||
from torch.autograd import grad | ||
import torch.nn.functional as F | ||
from torchvision import datasets, transforms | ||
import preconditioned_stochastic_gradient_descent as psgd | ||
import utilities as U | ||
|
||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | ||
|
||
train_loader = torch.utils.data.DataLoader( | ||
datasets.CIFAR10('../data', train=True, download=True, | ||
transform=transforms.Compose([ | ||
transforms.RandomCrop(32, padding=4), | ||
transforms.RandomHorizontalFlip(), | ||
transforms.ToTensor()])), | ||
batch_size=32, shuffle=True)#too slow with larger batch size on my machine | ||
test_loader = torch.utils.data.DataLoader( | ||
datasets.CIFAR10('../data', train=False, transform=transforms.Compose([ | ||
transforms.ToTensor()])), | ||
batch_size=32, shuffle=True) | ||
"""Test error rates of a few runs: 8.5%, 8.85%, 8.86%""" | ||
dim1, dim2, dim3 = 128, 192, 256 | ||
W1 = torch.tensor(torch.randn(3*3*3+1, dim1)/(3*3*3)**0.5, requires_grad=True, device=device) | ||
W2 = torch.tensor(torch.randn(dim1*3*3+1, dim1)/(dim1*3*3)**0.5, requires_grad=True, device=device) | ||
W3 = torch.tensor(torch.randn(dim1*3*3+1, dim1)/(dim1*3*3)**0.5, requires_grad=True, device=device) | ||
# decimation, i.e., stride=2 | ||
W4 = torch.tensor(torch.randn(dim1*3*3+1, dim2)/(dim1*3*3)**0.5, requires_grad=True, device=device) | ||
W5 = torch.tensor(torch.randn(dim2*3*3+1, dim2)/(dim2*3*3)**0.5, requires_grad=True, device=device) | ||
W6 = torch.tensor(torch.randn(dim2*3*3+1, dim2)/(dim2*3*3)**0.5, requires_grad=True, device=device) | ||
# decimation, i.e., stride=2 | ||
W7 = torch.tensor(torch.randn(dim2*3*3+1, dim3)/(dim2*3*3)**0.5, requires_grad=True, device=device) | ||
W8 = torch.tensor(torch.randn(dim3*3*3+1, dim3)/(dim3*3*3)**0.5, requires_grad=True, device=device) | ||
W9 = torch.tensor(torch.randn(dim3*3*3+1, dim3)/(dim3*3*3)**0.5, requires_grad=True, device=device) | ||
# detection layer | ||
W10 = torch.tensor(torch.randn(dim3*3*3+1, 10+1)/(dim3*3*3)**0.5, requires_grad=True, device=device) | ||
# pure CNN model | ||
def model(x): | ||
x = F.leaky_relu(F.conv2d(x, W1[:-1].view(dim1,3,3,3), bias=W1[-1], padding=1), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W2[:-1].view(dim1,dim1,3,3), bias=W2[-1], padding=1), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W3[:-1].view(dim1,dim1,3,3), bias=W3[-1], padding=1), negative_slope=0.1) | ||
#print(x.shape) | ||
x = F.leaky_relu(F.conv2d(x, W4[:-1].view(dim2,dim1,3,3), bias=W4[-1], padding=1, stride=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W5[:-1].view(dim2,dim2,3,3), bias=W5[-1], padding=1), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W6[:-1].view(dim2,dim2,3,3), bias=W6[-1], padding=1), negative_slope=0.1) | ||
#print(x.shape) | ||
x = F.leaky_relu(F.conv2d(x, W7[:-1].view(dim3,dim2,3,3), bias=W7[-1], padding=1, stride=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W8[:-1].view(dim3,dim3,3,3), bias=W8[-1], padding=1), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W9[:-1].view(dim3,dim3,3,3), bias=W9[-1]), negative_slope=0.1) | ||
#print(x.shape) | ||
x = F.conv2d(x, W10[:-1].view(11,dim3,3,3), bias=W10[-1]) | ||
#print(x.shape) | ||
return x | ||
|
||
def train_loss(images, labels): | ||
y = model(images) | ||
y = F.log_softmax(y, 1).double() | ||
loss = -torch.sum(U.log_prb_1l_per_smpl(y, labels)) | ||
return loss/y.shape[0]/y.shape[2]/y.shape[3] | ||
|
||
def test_loss_approx( ): | ||
# detection with approximate probability | ||
num_errs = 0 | ||
with torch.no_grad(): | ||
for data, target in test_loader: | ||
y = model(data.to(device)) | ||
y = F.log_softmax(y, 1) | ||
y = y[:,:-1]#remove NULL class | ||
y = torch.exp(y)#this is the likelihood | ||
y = torch.sum(y, dim=[2,3])#accumulate likelihood | ||
_, pred = torch.max(y, dim=1)#make a traditional decision | ||
num_errs += torch.sum(pred!=target.to(device)) | ||
return num_errs.item()/len(test_loader.dataset) | ||
|
||
def test_loss_exact( ): | ||
# detection with exact probability | ||
num_errs = 0 | ||
with torch.no_grad(): | ||
for data, target in test_loader: | ||
y = model(data.to(device)) | ||
y = F.log_softmax(y, 1) | ||
y = torch.exp(y) | ||
y = y[:,:-1] + y[:,-1:] | ||
y = torch.sum(torch.log(y), dim=[2,3]) | ||
_, pred = torch.max(y, dim=1)#make a traditional decision | ||
num_errs += torch.sum(pred!=target.to(device)) | ||
return num_errs.item()/len(test_loader.dataset) | ||
|
||
# train and test our model; use PSGD-Newton for optimization (virtually tuning free) | ||
Ws = [W1,W2,W3,W4,W5,W6,W7,W8,W9,W10] | ||
Qs = [[torch.eye(W.shape[0], device=device), torch.eye(W.shape[1], device=device)] for W in Ws] | ||
step_size = 0.01 | ||
num_epochs = 64 | ||
grad_norm_clip_thr = 0.1*sum(W.shape[0]*W.shape[1] for W in Ws)**0.5 | ||
TrainLoss, TestLossApprox, TestLossExact = [], [], [] | ||
for epoch in range(num_epochs): | ||
for batch_idx, (data, target) in enumerate(train_loader): | ||
#new_size = random.randint(28, 36)#random height rescaling | ||
#data = data[:,:,(torch.arange(new_size)*(32-1)/(new_size-1)).long()] | ||
#new_size = random.randint(28, 36)#random width rescaling | ||
#data = data[:,:,:,(torch.arange(new_size)*(32-1)/(new_size-1)).long()] | ||
|
||
loss = train_loss(data.to(device), target.to(device)) | ||
|
||
grads = grad(loss, Ws, create_graph=True) | ||
TrainLoss.append(loss.item()) | ||
if batch_idx%100==0: | ||
print('Epoch: {}; batch: {}; train loss: {}'.format(epoch, batch_idx, TrainLoss[-1])) | ||
|
||
v = [torch.randn(W.shape, device=device) for W in Ws] | ||
Hv = grad(grads, Ws, v)#just let Hv=grads if using whitened gradients | ||
with torch.no_grad(): | ||
Qs = [psgd.update_precond_kron(q[0], q[1], dw, dg) for (q, dw, dg) in zip(Qs, v, Hv)] | ||
pre_grads = [psgd.precond_grad_kron(q[0], q[1], g) for (q, g) in zip(Qs, grads)] | ||
grad_norm = torch.sqrt(sum([torch.sum(g*g) for g in pre_grads])) | ||
step_adjust = min(grad_norm_clip_thr/(grad_norm + 1.2e-38), 1.0) | ||
for i in range(len(Ws)): | ||
Ws[i] -= step_adjust*step_size*pre_grads[i] | ||
|
||
TestLossApprox.append(test_loss_approx()) | ||
TestLossExact.append(test_loss_exact()) | ||
print('Epoch: {}; best test loss (approximate): {}; best test loss (exact): {}'.format(epoch, min(TestLossApprox), min(TestLossExact))) | ||
|
||
if epoch+1 == int(num_epochs/2): | ||
step_size *= 0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import pickle | ||
import random | ||
import matplotlib.pyplot as plt | ||
import torch | ||
import torch.nn.functional as F | ||
from torchvision import datasets, transforms | ||
import utilities as U | ||
|
||
test_loader = torch.utils.data.DataLoader( | ||
datasets.MNIST('../data', train=False, transform=transforms.Compose([ | ||
transforms.ToTensor()])), | ||
batch_size=10, shuffle=True) | ||
|
||
#with open('mnist_model', 'rb') as f:# | ||
with open('mnist_model_test_error_rate_0p0032', 'rb') as f: | ||
Ws = pickle.load(f) | ||
W1,W2,W3,W4,W5,W6 = Ws | ||
device = W1.device | ||
|
||
def model(x): | ||
x = F.leaky_relu(F.conv2d(x, W1[:-1].view(64,1,5,5), bias=W1[-1], padding=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W2[:-1].view(64,64,5,5), bias=W2[-1], padding=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W3[:-1].view(64,64,5,5), bias=W3[-1], padding=2, stride=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W4[:-1].view(64,64,5,5), bias=W4[-1], padding=1), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W5[:-1].view(64,64,5,5), bias=W5[-1]), negative_slope=0.1) | ||
x = F.conv2d(x, W6[:-1].view(11,64,5,5), bias=W6[-1]) | ||
return x | ||
|
||
plt_cnt = 0 | ||
plt.figure() | ||
for batch_idx, (data, target) in enumerate(test_loader): | ||
random_size = random.randint(128, 192) | ||
new_data = U.nest_images(data, random_size, random_size) | ||
y = model(new_data[None,:,:,:].to(device))[0] | ||
|
||
plt_cnt += 1 | ||
if plt_cnt<3: | ||
plt.subplot(2,2,2*plt_cnt) | ||
else: | ||
break | ||
|
||
for i in range(y.shape[1]): | ||
for j in range(y.shape[2]): | ||
_, label = torch.max(y[:,i,j], dim=0) | ||
if label < 10: | ||
plt.text(j/y.shape[2], 1-i/y.shape[1], str(label.item())) | ||
|
||
if plt_cnt==1: | ||
plt.title('recognition results') | ||
plt.gca().set_aspect('equal', adjustable='box') | ||
plt.axis('off') | ||
|
||
plt.subplot(2,2,2*plt_cnt-1) | ||
plt.imshow(new_data[0]) | ||
if plt_cnt==1: | ||
plt.title('input images') | ||
plt.axis('off') | ||
|
||
#plt.savefig('test.eps', dpi=150) | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import torch | ||
from torch.autograd import grad | ||
import torch.nn.functional as F | ||
from torchvision import datasets, transforms | ||
import preconditioned_stochastic_gradient_descent as psgd | ||
import utilities as U | ||
|
||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | ||
|
||
train_loader = torch.utils.data.DataLoader( | ||
datasets.MNIST('../data', train=True, download=True, | ||
transform=transforms.Compose([ | ||
transforms.ToTensor()])), | ||
batch_size=64, shuffle=True) | ||
test_loader = torch.utils.data.DataLoader( | ||
datasets.MNIST('../data', train=False, transform=transforms.Compose([ | ||
transforms.ToTensor()])), | ||
batch_size=64, shuffle=True) | ||
|
||
"""Test error rates from a few runs: 0.41%, 0.51%, 0.52%, 0.5%, 0.46%""" | ||
W1 = torch.tensor(torch.randn(1*5*5+1, 64)/(1*5*5)**0.5, requires_grad=True, device=device) | ||
W2 = torch.tensor(torch.randn(64*5*5+1, 64)/(64*5*5)**0.5, requires_grad=True, device=device) | ||
W3 = torch.tensor(torch.randn(64*5*5+1, 64)/(64*5*5)**0.5, requires_grad=True, device=device) | ||
W4 = torch.tensor(torch.randn(64*5*5+1, 64)/(64*5*5)**0.5, requires_grad=True, device=device) | ||
W5 = torch.tensor(torch.randn(64*5*5+1, 64)/(64*5*5)**0.5, requires_grad=True, device=device) | ||
W6 = torch.tensor(torch.randn(64*5*5+1, 10+1)/(64*5*5)**0.5, requires_grad=True, device=device) | ||
def model(x): | ||
x = F.leaky_relu(F.conv2d(x, W1[:-1].view(64,1,5,5), bias=W1[-1], padding=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W2[:-1].view(64,64,5,5), bias=W2[-1], padding=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W3[:-1].view(64,64,5,5), bias=W3[-1], padding=2, stride=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W4[:-1].view(64,64,5,5), bias=W4[-1], padding=1), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W5[:-1].view(64,64,5,5), bias=W5[-1]), negative_slope=0.1) | ||
x = F.conv2d(x, W6[:-1].view(11,64,5,5), bias=W6[-1]) | ||
#print(x.shape) | ||
return x | ||
|
||
def train_loss(images, labels): | ||
y = model(images) | ||
y = F.log_softmax(y, 1) | ||
loss = -torch.sum(U.log_prb_1l_per_smpl(y, labels)) | ||
return loss/y.shape[0]/y.shape[2]/y.shape[3] | ||
|
||
def test_loss_approx( ): | ||
# detection with approximated probability | ||
num_errs = 0 | ||
with torch.no_grad(): | ||
for data, target in test_loader: | ||
y = model(data.to(device)) | ||
y = F.log_softmax(y, 1) | ||
y = y[:,:-1]#remove NULL class | ||
y = torch.exp(y)#this is the likelihood | ||
y = torch.sum(y, dim=[2,3])#accumulate likelihood | ||
_, pred = torch.max(y, dim=1)#make a traditional decision | ||
num_errs += torch.sum(pred!=target.to(device)) | ||
return num_errs.item()/len(test_loader.dataset) | ||
|
||
def test_loss_exact( ): | ||
# detection with exact probability | ||
num_errs = 0 | ||
with torch.no_grad(): | ||
for data, target in test_loader: | ||
y = model(data.to(device)) | ||
y = F.log_softmax(y, 1) | ||
y = torch.exp(y) | ||
y = y[:,:-1] + y[:,-1:] | ||
y = torch.sum(torch.log(y), dim=[2,3]) | ||
_, pred = torch.max(y, dim=1)#make a traditional decision | ||
num_errs += torch.sum(pred!=target.to(device)) | ||
return num_errs.item()/len(test_loader.dataset) | ||
|
||
# train and test our model; use PSGD-Newton for optimization (virtually tuning free) | ||
Ws = [W1,W2,W3,W4,W5,W6] | ||
Qs = [[torch.eye(W.shape[0], device=device), torch.eye(W.shape[1], device=device)] for W in Ws] | ||
step_size = 0.02 | ||
num_epochs = 20 | ||
grad_norm_clip_thr = 0.1*sum(W.shape[0]*W.shape[1] for W in Ws)**0.5 | ||
TrainLoss, TestLossApprox, TestLossExact = [], [], [] | ||
for epoch in range(num_epochs): | ||
for batch_idx, (data, target) in enumerate(train_loader): | ||
loss = train_loss(data.to(device), target.to(device)) | ||
|
||
grads = grad(loss, Ws, create_graph=True) | ||
TrainLoss.append(loss.item()) | ||
if batch_idx%100==0: | ||
print('Epoch: {}; batch: {}; train loss: {}'.format(epoch, batch_idx, TrainLoss[-1])) | ||
|
||
v = [torch.randn(W.shape, device=device) for W in Ws] | ||
Hv = grad(grads, Ws, v)#just let Hv=grads if using whitened gradients | ||
with torch.no_grad(): | ||
Qs = [psgd.update_precond_kron(q[0], q[1], dw, dg) for (q, dw, dg) in zip(Qs, v, Hv)] | ||
pre_grads = [psgd.precond_grad_kron(q[0], q[1], g) for (q, g) in zip(Qs, grads)] | ||
grad_norm = torch.sqrt(sum([torch.sum(g*g) for g in pre_grads])) | ||
step_adjust = min(grad_norm_clip_thr/(grad_norm + 1.2e-38), 1.0) | ||
for i in range(len(Ws)): | ||
Ws[i] -= step_adjust*step_size*pre_grads[i] | ||
|
||
TestLossApprox.append(test_loss_approx()) | ||
TestLossExact.append(test_loss_exact()) | ||
print('Epoch: {}; best test loss (approximate): {}; best test loss (exact): {}'.format(epoch, min(TestLossApprox), min(TestLossExact))) | ||
|
||
if epoch+1 == int(num_epochs/2): | ||
step_size *= 0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import pickle | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import torch | ||
import torch.nn.functional as F | ||
import utilities as U | ||
|
||
test_images, test_labels = U.read_svhn_mat('./svhn/test.mat') | ||
len_test = len(test_images) | ||
rp = np.random.permutation(len_test) | ||
test_images = test_images[rp] | ||
test_labels = test_labels[rp] | ||
|
||
with open('svhn_model_ks_5_dims_96_128_192', 'rb') as f: | ||
Ws = pickle.load(f) | ||
W1,W2,W3,W4,W5,W6,W7,W8,W9,W10 = Ws | ||
ks, dim1, dim2, dim3 = int((W3.shape[0]/W3.shape[1])**0.5), W3.shape[1], W6.shape[1], W9.shape[1] | ||
dim0 = 3#RGB images | ||
device = W1.device | ||
def model(x): | ||
x = F.leaky_relu(F.conv2d(x, W1[:-1].view(dim1,dim0,ks,ks), bias=W1[-1], padding=ks//2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W2[:-1].view(dim1,dim1,ks,ks), bias=W2[-1], padding=ks//2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W3[:-1].view(dim1,dim1,ks,ks), bias=W3[-1], padding=ks//2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W4[:-1].view(dim2,dim1,ks,ks), bias=W4[-1], padding=ks//2, stride=2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W5[:-1].view(dim2,dim2,ks,ks), bias=W5[-1], padding=ks//2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W6[:-1].view(dim2,dim2,ks,ks), bias=W6[-1], padding=ks//2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W7[:-1].view(dim3,dim2,ks,ks), bias=W7[-1], padding=ks//2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W8[:-1].view(dim3,dim3,ks,ks), bias=W8[-1], padding=ks//2), negative_slope=0.1) | ||
x = F.leaky_relu(F.conv2d(x, W9[:-1].view(dim3,dim3,ks,ks), bias=W9[-1], padding=ks//2), negative_slope=0.1) | ||
x = F.conv2d(x, W10[:-1].view(10+1,dim3,ks,ks), bias=W10[-1]) | ||
return x | ||
|
||
plt_cnt = 0 | ||
plt.figure() | ||
for im_cnt, im in enumerate(test_images): | ||
# if len(test_labels[im_cnt])<3: | ||
# continue | ||
|
||
image = torch.tensor(im/256, dtype=torch.float, device=device) | ||
y = model(image[None])[0] | ||
|
||
plt_cnt += 1 | ||
if plt_cnt<=8: | ||
plt.subplot(8,2,2*plt_cnt) | ||
else: | ||
break | ||
|
||
for i in range(y.shape[1]): | ||
for j in range(y.shape[2]): | ||
_, label = torch.max(y[:,i,j], dim=0) | ||
if label < 10: | ||
plt.text(j/y.shape[2], 1-i/y.shape[1], str(label.item())) | ||
|
||
if plt_cnt==1: | ||
plt.title('recognition results') | ||
#plt.gca().set_aspect('equal', adjustable='box') | ||
plt.axis('off') | ||
|
||
plt.subplot(8,2,2*plt_cnt-1) | ||
plt.imshow(np.transpose(im, [1,2,0])) | ||
#plt.title('(a) label: '+str(test_labels[cnt])) | ||
if plt_cnt==1: | ||
plt.title('input images') | ||
plt.axis('off') | ||
|
||
#plt.savefig('test.eps') | ||
plt.show() |
Oops, something went wrong.