Leo's Home page -- Github Page -- License: CC BY-SA 4.0
Sometimes we need to come back to the basis, this is the place I choose for that.
Here I'll experiment with different networks on the MNIST and MNIST variants datasets trying to find relations in which I can reduce the number of parameters in comparison with a Fully Connected (FC) network.
Later on, I might try with other datasets that are small enough for my GTX1080.
Yes, I know, the issue is already solved for Images with Convolutional Networks but what I want to see is not that. Instead I want to understand ways in which fully connected networks can be replaced by other types of connections to minimize the number of parameters in it. This is an exploratory work to get a deeper understanding on Neural Networks (NNs) that will at least give me some fun time.
ColumnNet experiments
A neural network that contains different networks, each consisting of a column, each column can have different activation units
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
%matplotlib inline
# Network modules to try
from network_modules import *
from net_utils import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
device(type='cuda')
def transform_mnist():
return transforms.Compose([
# transforms.Grayscale(),
# transforms.Resize((w, h)), # this should be used ONLY if the image is bigger than this size
transforms.ToTensor()
# transforms.Normalize(0.5, 0.25)
])
# Datasets:
# mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform_mnist())
# mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist())
def train(model, mname, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
model.to(device)
num_epochs = 100
batch_size = 128
# learning_rate = 0.0001
learning_rate = 0.001
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
# criterion = nn.MSELoss()
criterion = F.nll_loss
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
transformation = transform_mnist()
train_loader, test_loader = get_loaders(batch_size, transformation)
for epoch in range(num_epochs):
for i, (img, labels) in enumerate(train_loader):
# print("shape: ", img.shape, labels.shape)
labels = labels.to(device)
img = img.to(device).view((-1,784))
# print("shape2: ", img.shape)
# ===================forward=====================
# print("encoding batch of images")
output = model(img)
# print("output shape: ", output.shape, labels.shape, labels[:10])
# print("computing loss")
loss = criterion(output, labels)
# ===================backward====================
# print("Backward ")
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
if epoch % 20 == 0:
print('epoch [{}/{}], loss:{:.6f}'.format(epoch+1, num_epochs, loss.data))
# if epoch % 10 == 0:
# pic = to_img(output.cpu().data)
# in_pic = to_img(img.cpu().data)
# save_image(pic, './results/2x2-out_image_{}.png'.format(epoch))
# save_image(in_pic, './results/2x2-in_image_{}.png'.format(epoch))
# if loss.data[0] < 0.015: #arbitrary number because I saw that it works well enough
# print("loss < 0.015, breaking")
# break
# model.save_model(mname, "model")
print('########################################################')
print('Final performance of model {} epoch [{}/{}], loss:{:.8f}'.format(mname, epoch+1, num_epochs, loss.data))
print('--------------------------------------------------------')
fcnets_layers = [
[784,50,50],
[784,100,50],
[784,150,50],
[784,200,50],
[784,50,50,50],
[784,100,50,50],
[784,100,100,50],
[784,200,50,50],
[784,200,100,50],
[784,200,150,50],
[784,200,200,50],
[784,50,50,50,50],
[784,100,50,50,50],
[784,100,100,50,50],
[784,100,100,100,50],
[784,200,100,100,50],
]
activations = [
"relu",
"relu6",
"sigmoid",
"elu",
"leaky_relu",
"logsigmoid",
]
columns = [[fc]*len(activations) for fc in fcnets_layers]
models = [ColumnNet(c, activations) for c in columns]
%%time
for i in range(len(fcnets_layers)):
mname = str(fcnets_layers[i])
model = models[i]
train(model, mname)
epoch [1/100], loss:1.098784 epoch [21/100], loss:0.206056 epoch [41/100], loss:0.000080 epoch [61/100], loss:0.032990 epoch [81/100], loss:0.001306 ######################################################## Final performance of model [784, 50, 50] epoch [100/100], loss:0.00123214 -------------------------------------------------------- epoch [1/100], loss:1.367187 epoch [21/100], loss:0.029596 epoch [41/100], loss:0.000118 epoch [61/100], loss:0.001094 epoch [81/100], loss:0.001255 ######################################################## Final performance of model [784, 100, 50] epoch [100/100], loss:0.00013174 -------------------------------------------------------- epoch [1/100], loss:0.403242 epoch [21/100], loss:0.012076 epoch [41/100], loss:0.000257 epoch [61/100], loss:0.004873 epoch [81/100], loss:0.000759 ######################################################## Final performance of model [784, 150, 50] epoch [100/100], loss:0.00014679 -------------------------------------------------------- epoch [1/100], loss:1.438411 epoch [21/100], loss:1.175742 epoch [41/100], loss:0.677915 epoch [61/100], loss:0.024253 epoch [81/100], loss:0.000468 ######################################################## Final performance of model [784, 200, 50] epoch [100/100], loss:0.00005428 -------------------------------------------------------- epoch [1/100], loss:1.736917 epoch [21/100], loss:0.207412 epoch [41/100], loss:0.000413 epoch [61/100], loss:0.000363 epoch [81/100], loss:0.005977 ######################################################## Final performance of model [784, 50, 50, 50] epoch [100/100], loss:0.00000984 -------------------------------------------------------- epoch [1/100], loss:0.321369 epoch [21/100], loss:0.281001 epoch [41/100], loss:0.000223 epoch [61/100], loss:0.000319 epoch [81/100], loss:0.000864 ######################################################## Final performance of model [784, 100, 50, 50] epoch [100/100], loss:0.00385269 -------------------------------------------------------- epoch [1/100], loss:1.114298 epoch [21/100], loss:0.752717 epoch [41/100], loss:0.264392 epoch [61/100], loss:0.055476 epoch [81/100], loss:0.000323 ######################################################## Final performance of model [784, 100, 100, 50] epoch [100/100], loss:0.01610803 -------------------------------------------------------- epoch [1/100], loss:1.075133 epoch [21/100], loss:0.508439 epoch [41/100], loss:0.001142 epoch [61/100], loss:0.000106 epoch [81/100], loss:0.000110 ######################################################## Final performance of model [784, 200, 50, 50] epoch [100/100], loss:0.00054766 -------------------------------------------------------- epoch [1/100], loss:1.336636 epoch [21/100], loss:0.015125 epoch [41/100], loss:0.018807 epoch [61/100], loss:0.000254 epoch [81/100], loss:0.018729 ######################################################## Final performance of model [784, 200, 100, 50] epoch [100/100], loss:0.00002173 -------------------------------------------------------- epoch [1/100], loss:0.787781 epoch [21/100], loss:0.338957 epoch [41/100], loss:0.064825 epoch [61/100], loss:0.000097 epoch [81/100], loss:0.004881 ######################################################## Final performance of model [784, 200, 150, 50] epoch [100/100], loss:0.00381731 -------------------------------------------------------- epoch [1/100], loss:1.230452 epoch [21/100], loss:0.891845 epoch [41/100], loss:0.651148 epoch [61/100], loss:0.145539 epoch [81/100], loss:0.013066 ######################################################## Final performance of model [784, 200, 200, 50] epoch [100/100], loss:0.00077140 -------------------------------------------------------- epoch [1/100], loss:1.977042 epoch [21/100], loss:1.251442 epoch [41/100], loss:0.000248 epoch [61/100], loss:0.012526 epoch [81/100], loss:0.001485 ######################################################## Final performance of model [784, 50, 50, 50, 50] epoch [100/100], loss:0.04552653 -------------------------------------------------------- epoch [1/100], loss:1.453422 epoch [21/100], loss:1.047088 epoch [41/100], loss:1.175504 epoch [61/100], loss:0.336584 epoch [81/100], loss:0.000342 ######################################################## Final performance of model [784, 100, 50, 50, 50] epoch [100/100], loss:0.00134811 -------------------------------------------------------- epoch [1/100], loss:0.296155 epoch [21/100], loss:0.002681 epoch [41/100], loss:0.000084 epoch [61/100], loss:0.000112 epoch [81/100], loss:0.000180 ######################################################## Final performance of model [784, 100, 100, 50, 50] epoch [100/100], loss:0.00054182 -------------------------------------------------------- epoch [1/100], loss:1.094167 epoch [21/100], loss:0.589513 epoch [41/100], loss:0.360399 epoch [61/100], loss:0.599690 epoch [81/100], loss:0.000083 ######################################################## Final performance of model [784, 100, 100, 100, 50] epoch [100/100], loss:0.00003289 -------------------------------------------------------- epoch [1/100], loss:0.415148 epoch [21/100], loss:0.063293 epoch [41/100], loss:0.038689 epoch [61/100], loss:0.001151 epoch [81/100], loss:0.003858 ######################################################## Final performance of model [784, 200, 100, 100, 50] epoch [100/100], loss:0.00005394 -------------------------------------------------------- CPU times: user 1h 59min, sys: 4min 34s, total: 2h 3min 35s Wall time: 2h 16min 28s