import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


import matplotlib.pyplot as plt

%matplotlib inline


# Network modules  to try
from network_modules import *
from net_utils import *


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def transform_mnist():
    return transforms.Compose([
#         transforms.Grayscale(),
#         transforms.Resize((w, h)),  # this should be used ONLY if the image is bigger than this size
        transforms.ToTensor()
#         transforms.Normalize(0.5, 0.25)
    ])
# Datasets:
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform_mnist())
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist())


# something like this will be used to create the sparsity masks ... only that the sparsity distributions should be chosen before
torch.bernoulli(torch.rand(10, 14)).expand(10, 14).clone()

tensor([[1., 0., 0., 1., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1.],
        [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0.],
        [1., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1.],
        [1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0.],
        [0., 1., 0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1.],
        [1., 0., 0., 1., 1., 0., 0., 1., 1., 1., 0., 0., 0., 1.],
        [1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 1., 0., 1.]])


img0, lbl0 = mnist_trainset[0]


# lbl0.tolist()


img0.shape

torch.Size([1, 28, 28])


plt.imshow(img0.squeeze())

<matplotlib.image.AxesImage at 0x7f7ab8367d60>


def train(model, mname, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):

    model.to(device)
    num_epochs = 100
    batch_size = 128
#     learning_rate = 0.0001
    learning_rate = 0.001
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

#     criterion = nn.MSELoss()
    criterion = F.nll_loss
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

    transformation = transform_mnist()
    train_loader, test_loader = get_loaders(batch_size, transformation)


    for epoch in range(num_epochs):
        for i, (img, labels) in enumerate(train_loader):
#             print("shape: ", img.shape, labels.shape)
            labels = labels.to(device)
            img = img.to(device).view((-1,784))
            
#             print("shape2: ", img.shape)
            # ===================forward=====================
            #         print("encoding batch of  images")
            output = model(img)
#             print("output shape: ", output.shape, labels.shape, labels[:10])
            #         print("computing loss")
            loss = criterion(output, labels)
            # ===================backward====================
            #         print("Backward ")
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # ===================log========================
        if epoch % 20 == 0:
            print('epoch [{}/{}], loss:{:.6f}'.format(epoch+1, num_epochs, loss.data))
#         if epoch % 10 == 0:
#             pic = to_img(output.cpu().data)
#             in_pic = to_img(img.cpu().data)
#             save_image(pic, './results/2x2-out_image_{}.png'.format(epoch))
#             save_image(in_pic, './results/2x2-in_image_{}.png'.format(epoch))
#         if loss.data[0] < 0.015: #arbitrary number because I saw that it works well enough
#             print("loss < 0.015, breaking")
#             break
#     model.save_model(mname, "model")

    print('########################################################')
    print('Final performance of model {} epoch [{}/{}], loss:{:.8f}'.format(mname, epoch+1, num_epochs, loss.data))
    print('--------------------------------------------------------')


fcnets_layers = [
        [784,500,10],
        [784,1000,10],
        [784,1500,10],
        [784,500,500,10],
        [784,1000,500,10],
        [784,1000,1000,10],
        [784,500,500,500,10],
        [784,1000,500,500,10],
        [784,1000,1000,500,10],
        [784,1000,1000,1000,10],
]


model1 = FCNet(fcnets_layers[0], "relu")


models = [ FCNet(l, "relu") for l in fcnets_layers]


model1

FCNet(
  (fcnet): FCModule(
    (layers): ModuleList(
      (0): Linear(in_features=784, out_features=500, bias=True)
      (1): Linear(in_features=500, out_features=10, bias=True)
    )
  )
)


%%time
for i in range(len(fcnets_layers)):
    mname = str(fcnets_layers[i])
    model = models[i]
    train(model, mname)

epoch [1/100], loss:0.174584
epoch [21/100], loss:0.009200
epoch [41/100], loss:0.000172
epoch [61/100], loss:0.001544
epoch [81/100], loss:0.012820
########################################################
Final performance of model [784, 500, 10] epoch [100/100], loss:0.00139014
--------------------------------------------------------
epoch [1/100], loss:0.157315
epoch [21/100], loss:0.007848
epoch [41/100], loss:0.000401
epoch [61/100], loss:0.000455
epoch [81/100], loss:0.000409
########################################################
Final performance of model [784, 1000, 10] epoch [100/100], loss:0.00037554
--------------------------------------------------------
epoch [1/100], loss:0.135338
epoch [21/100], loss:0.001302
epoch [41/100], loss:0.009797
epoch [61/100], loss:0.010531
epoch [81/100], loss:0.005287
########################################################
Final performance of model [784, 1500, 10] epoch [100/100], loss:0.00096606
--------------------------------------------------------
epoch [1/100], loss:0.060347
epoch [21/100], loss:0.005659
epoch [41/100], loss:0.000409
epoch [61/100], loss:0.002260
epoch [81/100], loss:0.000005
########################################################
Final performance of model [784, 500, 500, 10] epoch [100/100], loss:0.00005414
--------------------------------------------------------
epoch [1/100], loss:0.106350
epoch [21/100], loss:0.001355
epoch [41/100], loss:0.034748
epoch [61/100], loss:0.001284
epoch [81/100], loss:0.000321
########################################################
Final performance of model [784, 1000, 500, 10] epoch [100/100], loss:0.00028331
--------------------------------------------------------
epoch [1/100], loss:0.255634
epoch [21/100], loss:0.000093
epoch [41/100], loss:0.000121
epoch [61/100], loss:0.000139
epoch [81/100], loss:0.000041
########################################################
Final performance of model [784, 1000, 1000, 10] epoch [100/100], loss:0.00031134
--------------------------------------------------------
epoch [1/100], loss:0.146664
epoch [21/100], loss:0.001849
epoch [41/100], loss:0.011287
epoch [61/100], loss:0.000064
epoch [81/100], loss:0.000249
########################################################
Final performance of model [784, 500, 500, 500, 10] epoch [100/100], loss:0.00001985
--------------------------------------------------------
epoch [1/100], loss:0.096296
epoch [21/100], loss:0.000183
epoch [41/100], loss:0.015776
epoch [61/100], loss:0.000026
epoch [81/100], loss:0.000497
########################################################
Final performance of model [784, 1000, 500, 500, 10] epoch [100/100], loss:0.00003355
--------------------------------------------------------
epoch [1/100], loss:0.173830
epoch [21/100], loss:0.020729
epoch [41/100], loss:0.000395
epoch [61/100], loss:0.001489
epoch [81/100], loss:0.075340
########################################################
Final performance of model [784, 1000, 1000, 500, 10] epoch [100/100], loss:0.00001235
--------------------------------------------------------
epoch [1/100], loss:0.106391
epoch [21/100], loss:0.003898
epoch [41/100], loss:0.010115
epoch [61/100], loss:0.044211
epoch [81/100], loss:0.056438
########################################################
Final performance of model [784, 1000, 1000, 1000, 10] epoch [100/100], loss:0.00018007
--------------------------------------------------------
CPU times: user 24min 17s, sys: 2min 20s, total: 26min 37s
Wall time: 1h 26min 52s

MNIST Experiments - Playing With Different Architectures¶

Index¶

Datasets to use:¶

Experiments I intend to run:¶

KPIs I intend to measure:¶

Things I would like to analyse later:¶

Fully Connected¶