Leo's Home page -- Github Page -- License: CC BY-SA 4.0
Sometimes we need to come back to the basis, this is the place I choose for that.
Here I'll experiment with different networks on the MNIST and MNIST variants datasets trying to find relations in which I can reduce the number of parameters in comparison with a Fully Connected (FC) network.
Later on, I might try with other datasets that are small enough for my GTX1080.
Yes, I know, the issue is already solved for Images with Convolutional Networks but what I want to see is not that. Instead I want to understand ways in which fully connected networks can be replaced by other types of connections to minimize the number of parameters in it. This is an exploratory work to get a deeper understanding on Neural Networks (NNs) that will at least give me some fun time.
Although training and evaluation time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
%matplotlib inline
# Network modules to try
from network_modules import *
from net_utils import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def transform_mnist():
return transforms.Compose([
# transforms.Grayscale(),
# transforms.Resize((w, h)), # this should be used ONLY if the image is bigger than this size
transforms.ToTensor()
# transforms.Normalize(0.5, 0.25)
])
# Datasets:
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform_mnist())
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist())
# something like this will be used to create the sparsity masks ... only that the sparsity distributions should be chosen before
torch.bernoulli(torch.rand(10, 14)).expand(10, 14).clone()
tensor([[1., 0., 0., 1., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1.], [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0.], [1., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0.], [1., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1.], [1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0.], [0., 1., 0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1.], [1., 0., 0., 1., 1., 0., 0., 1., 1., 1., 0., 0., 0., 1.], [1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.], [1., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 1., 0., 1.]])
img0, lbl0 = mnist_trainset[0]
# lbl0.tolist()
img0.shape
torch.Size([1, 28, 28])
plt.imshow(img0.squeeze())
<matplotlib.image.AxesImage at 0x7f7ab8367d60>
def train(model, mname, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
model.to(device)
num_epochs = 100
batch_size = 128
# learning_rate = 0.0001
learning_rate = 0.001
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
# criterion = nn.MSELoss()
criterion = F.nll_loss
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
transformation = transform_mnist()
train_loader, test_loader = get_loaders(batch_size, transformation)
for epoch in range(num_epochs):
for i, (img, labels) in enumerate(train_loader):
# print("shape: ", img.shape, labels.shape)
labels = labels.to(device)
img = img.to(device).view((-1,784))
# print("shape2: ", img.shape)
# ===================forward=====================
# print("encoding batch of images")
output = model(img)
# print("output shape: ", output.shape, labels.shape, labels[:10])
# print("computing loss")
loss = criterion(output, labels)
# ===================backward====================
# print("Backward ")
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
if epoch % 20 == 0:
print('epoch [{}/{}], loss:{:.6f}'.format(epoch+1, num_epochs, loss.data))
# if epoch % 10 == 0:
# pic = to_img(output.cpu().data)
# in_pic = to_img(img.cpu().data)
# save_image(pic, './results/2x2-out_image_{}.png'.format(epoch))
# save_image(in_pic, './results/2x2-in_image_{}.png'.format(epoch))
# if loss.data[0] < 0.015: #arbitrary number because I saw that it works well enough
# print("loss < 0.015, breaking")
# break
# model.save_model(mname, "model")
print('########################################################')
print('Final performance of model {} epoch [{}/{}], loss:{:.8f}'.format(mname, epoch+1, num_epochs, loss.data))
print('--------------------------------------------------------')
fcnets_layers = [
[784,500,10],
[784,1000,10],
[784,1500,10],
[784,500,500,10],
[784,1000,500,10],
[784,1000,1000,10],
[784,500,500,500,10],
[784,1000,500,500,10],
[784,1000,1000,500,10],
[784,1000,1000,1000,10],
]
model1 = FCNet(fcnets_layers[0], "relu")
models = [ FCNet(l, "relu") for l in fcnets_layers]
model1
FCNet( (fcnet): FCModule( (layers): ModuleList( (0): Linear(in_features=784, out_features=500, bias=True) (1): Linear(in_features=500, out_features=10, bias=True) ) ) )
%%time
for i in range(len(fcnets_layers)):
mname = str(fcnets_layers[i])
model = models[i]
train(model, mname)
epoch [1/100], loss:0.174584 epoch [21/100], loss:0.009200 epoch [41/100], loss:0.000172 epoch [61/100], loss:0.001544 epoch [81/100], loss:0.012820 ######################################################## Final performance of model [784, 500, 10] epoch [100/100], loss:0.00139014 -------------------------------------------------------- epoch [1/100], loss:0.157315 epoch [21/100], loss:0.007848 epoch [41/100], loss:0.000401 epoch [61/100], loss:0.000455 epoch [81/100], loss:0.000409 ######################################################## Final performance of model [784, 1000, 10] epoch [100/100], loss:0.00037554 -------------------------------------------------------- epoch [1/100], loss:0.135338 epoch [21/100], loss:0.001302 epoch [41/100], loss:0.009797 epoch [61/100], loss:0.010531 epoch [81/100], loss:0.005287 ######################################################## Final performance of model [784, 1500, 10] epoch [100/100], loss:0.00096606 -------------------------------------------------------- epoch [1/100], loss:0.060347 epoch [21/100], loss:0.005659 epoch [41/100], loss:0.000409 epoch [61/100], loss:0.002260 epoch [81/100], loss:0.000005 ######################################################## Final performance of model [784, 500, 500, 10] epoch [100/100], loss:0.00005414 -------------------------------------------------------- epoch [1/100], loss:0.106350 epoch [21/100], loss:0.001355 epoch [41/100], loss:0.034748 epoch [61/100], loss:0.001284 epoch [81/100], loss:0.000321 ######################################################## Final performance of model [784, 1000, 500, 10] epoch [100/100], loss:0.00028331 -------------------------------------------------------- epoch [1/100], loss:0.255634 epoch [21/100], loss:0.000093 epoch [41/100], loss:0.000121 epoch [61/100], loss:0.000139 epoch [81/100], loss:0.000041 ######################################################## Final performance of model [784, 1000, 1000, 10] epoch [100/100], loss:0.00031134 -------------------------------------------------------- epoch [1/100], loss:0.146664 epoch [21/100], loss:0.001849 epoch [41/100], loss:0.011287 epoch [61/100], loss:0.000064 epoch [81/100], loss:0.000249 ######################################################## Final performance of model [784, 500, 500, 500, 10] epoch [100/100], loss:0.00001985 -------------------------------------------------------- epoch [1/100], loss:0.096296 epoch [21/100], loss:0.000183 epoch [41/100], loss:0.015776 epoch [61/100], loss:0.000026 epoch [81/100], loss:0.000497 ######################################################## Final performance of model [784, 1000, 500, 500, 10] epoch [100/100], loss:0.00003355 -------------------------------------------------------- epoch [1/100], loss:0.173830 epoch [21/100], loss:0.020729 epoch [41/100], loss:0.000395 epoch [61/100], loss:0.001489 epoch [81/100], loss:0.075340 ######################################################## Final performance of model [784, 1000, 1000, 500, 10] epoch [100/100], loss:0.00001235 -------------------------------------------------------- epoch [1/100], loss:0.106391 epoch [21/100], loss:0.003898 epoch [41/100], loss:0.010115 epoch [61/100], loss:0.044211 epoch [81/100], loss:0.056438 ######################################################## Final performance of model [784, 1000, 1000, 1000, 10] epoch [100/100], loss:0.00018007 -------------------------------------------------------- CPU times: user 24min 17s, sys: 2min 20s, total: 26min 37s Wall time: 1h 26min 52s