Leo's Home page -- Github Page -- License: CC BY-SA 4.0
Sometimes we need to come back to the basis, this is the place I choose for that.
Here I'll experiment with different networks on the MNIST and MNIST variants datasets trying to find relations in which I can reduce the number of parameters in comparison with a Fully Connected (FC) network.
Later on, I might try with other datasets that are small enough for my GTX1080.
Yes, I know, the issue is already solved for Images with Convolutional Networks but what I want to see is not that. Instead I want to understand ways in which fully connected networks can be replaced by other types of connections to minimize the number of parameters in it. This is an exploratory work to get a deeper understanding on Neural Networks (NNs) that will at least give me some fun time.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
%matplotlib inline
# Network modules to try
from network_modules import *
from net_utils import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def transform_mnist():
return transforms.Compose([
# transforms.Grayscale(),
# transforms.Resize((w, h)), # this should be used ONLY if the image is bigger than this size
transforms.ToTensor()
# transforms.Normalize(0.5, 0.25)
])
def train(model, mname, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
model.to(device)
num_epochs = 100
batch_size = 128
# learning_rate = 0.0001
learning_rate = 0.001
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
# criterion = nn.MSELoss()
criterion = F.nll_loss
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
transformation = transform_mnist()
train_loader, test_loader = get_loaders(batch_size, transformation)
for epoch in range(num_epochs):
for i, (img, labels) in enumerate(train_loader):
# print("shape: ", img.shape, labels.shape)
labels = labels.to(device)
img = img.to(device).view((-1,784))
# print("shape2: ", img.shape)
# ===================forward=====================
# print("encoding batch of images")
output = model(img)
# print("output shape: ", output.shape, labels.shape, labels[:10])
# print("computing loss")
loss = criterion(output, labels)
# ===================backward====================
# print("Backward ")
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
if epoch % 20 == 0:
print('epoch [{}/{}], loss:{:.6f}'.format(epoch+1, num_epochs, loss.data))
# if epoch % 10 == 0:
# pic = to_img(output.cpu().data)
# in_pic = to_img(img.cpu().data)
# save_image(pic, './results/2x2-out_image_{}.png'.format(epoch))
# save_image(in_pic, './results/2x2-in_image_{}.png'.format(epoch))
# if loss.data[0] < 0.015: #arbitrary number because I saw that it works well enough
# print("loss < 0.015, breaking")
# break
# model.save_model(mname, "model")
print('########################################################')
print('Final performance of model {} epoch [{}/{}], loss:{:.8f}'.format(mname, epoch+1, num_epochs, loss.data))
print('--------------------------------------------------------')
activations = [
"relu",
"relu6",
"sigmoid",
"elu",
"leaky_relu",
"logsigmoid",
]
fcnets_layers = [
[784,500,10],
[784,1000,10],
[784,1500,10],
[784,500,500,10],
[784,1000,500,10],
[784,1000,1000,10],
[784,500,500,500,10],
[784,1000,500,500,10],
[784,1000,1000,500,10],
[784,1000,1000,1000,10],
]
sparsities = [0.9, 0.8, 0.7, 0.6] # , 0.5, 0.4, 0.3, 0.2, 0.1] # [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
models = []
for l in fcnets_layers:
for s in sparsities:
# for a in activations:
for a in ["relu"]:
mname = str(l)+"_"+str(s)+"_"+a
models.append(SparseNet(l, sparsity=s, activation=a, name=mname) )
len(models)
40
for i in range(len(fcnets_layers)):
model = models[i]
mname = model.name
train(model, mname)
epoch [1/100], loss:-20891.966797 epoch [21/100], loss:-6133424.000000 epoch [41/100], loss:-19577186.000000 epoch [61/100], loss:-41361500.000000 epoch [81/100], loss:-73920136.000000 ######################################################## Final performance of model [784, 500, 10]_0.9_relu epoch [100/100], loss:-113872960.00000000 -------------------------------------------------------- epoch [1/100], loss:-18592.039062 epoch [21/100], loss:-5453422.500000 epoch [41/100], loss:-18066410.000000 epoch [61/100], loss:-38785812.000000 epoch [81/100], loss:-65402916.000000 ######################################################## Final performance of model [784, 500, 10]_0.8_relu epoch [100/100], loss:-99117352.00000000 -------------------------------------------------------- epoch [1/100], loss:-16703.435547 epoch [21/100], loss:-5007824.000000 epoch [41/100], loss:-17129934.000000 epoch [61/100], loss:-37054696.000000 epoch [81/100], loss:-56878508.000000 ######################################################## Final performance of model [784, 500, 10]_0.7_relu epoch [100/100], loss:-91349248.00000000 -------------------------------------------------------- epoch [1/100], loss:-15320.723633 epoch [21/100], loss:-4618579.000000 epoch [41/100], loss:-15034589.000000 epoch [61/100], loss:-30653106.000000 epoch [81/100], loss:-54229072.000000 ######################################################## Final performance of model [784, 500, 10]_0.6_relu epoch [100/100], loss:-76519448.00000000 -------------------------------------------------------- epoch [1/100], loss:-44628.816406 epoch [21/100], loss:-12222195.000000 epoch [41/100], loss:-41137748.000000 epoch [61/100], loss:-88938472.000000 epoch [81/100], loss:-150824304.000000 ######################################################## Final performance of model [784, 1000, 10]_0.9_relu epoch [100/100], loss:-221163344.00000000 -------------------------------------------------------- epoch [1/100], loss:-37582.246094 epoch [21/100], loss:-10890504.000000 epoch [41/100], loss:-36614332.000000 epoch [61/100], loss:-78083320.000000 epoch [81/100], loss:-139106496.000000 ######################################################## Final performance of model [784, 1000, 10]_0.8_relu epoch [100/100], loss:-202615760.00000000 -------------------------------------------------------- epoch [1/100], loss:-33153.589844 epoch [21/100], loss:-10460612.000000 epoch [41/100], loss:-32208406.000000 epoch [61/100], loss:-68113896.000000 epoch [81/100], loss:-112439144.000000 ######################################################## Final performance of model [784, 1000, 10]_0.7_relu epoch [100/100], loss:-172079392.00000000 -------------------------------------------------------- epoch [1/100], loss:-26603.984375 epoch [21/100], loss:-8168572.000000 epoch [41/100], loss:-26347406.000000 epoch [61/100], loss:-53424112.000000 epoch [81/100], loss:-95889032.000000 ######################################################## Final performance of model [784, 1000, 10]_0.6_relu epoch [100/100], loss:-143723952.00000000 -------------------------------------------------------- epoch [1/100], loss:-69771.195312 epoch [21/100], loss:-19515738.000000 epoch [41/100], loss:-68503256.000000 epoch [61/100], loss:-126409976.000000 epoch [81/100], loss:-230093120.000000 ######################################################## Final performance of model [784, 1500, 10]_0.9_relu epoch [100/100], loss:-342650624.00000000 -------------------------------------------------------- epoch [1/100], loss:-60908.652344 epoch [21/100], loss:-16855800.000000 epoch [41/100], loss:-57882336.000000 epoch [61/100], loss:-113473672.000000 epoch [81/100], loss:-201333488.000000 ######################################################## Final performance of model [784, 1500, 10]_0.8_relu epoch [100/100], loss:-320420576.00000000 --------------------------------------------------------