WUT_Computer_Science/lab5/code/main.py

309 lines
9.8 KiB
Python
Raw Normal View History

2023-05-13 15:41:16 +02:00
""" Implementation of a network analyzing MNIST dataset """
2023-05-13 12:45:54 +02:00
import torch
2023-05-13 15:41:16 +02:00
from torch import nn
from torch import optim
2023-05-13 13:47:48 +02:00
from torchvision import datasets, transforms
2023-05-13 14:21:56 +02:00
import matplotlib.pyplot as plt
2023-05-16 19:42:24 +02:00
import time
2023-05-16 17:52:14 +02:00
2023-05-13 15:41:16 +02:00
def set_hyperparameters():
""" sets hyperparameters used throughout the network """
return {
2023-05-16 17:52:14 +02:00
"num_epochs": 5,
"init_input_size": 28 * 28, # MNIST images are 28x28 pixels
2023-05-13 15:41:16 +02:00
"num_classes": 10,
}
def load_datasets():
""" Loads train and test dataset from MNIST """
train_dataset = datasets.MNIST(
root="./data", train=True, transform=transforms.ToTensor(), download=True
)
test_dataset = datasets.MNIST(
root="./data", train=False, transform=transforms.ToTensor(), download=True
)
return train_dataset, test_dataset
2023-05-16 17:52:14 +02:00
def create_data_loaders(train_dataset, test_dataset):
2023-05-13 15:41:16 +02:00
""" Create train and test data loaders """
train_loader = torch.utils.data.DataLoader(
2023-05-16 17:52:14 +02:00
dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True
2023-05-13 15:41:16 +02:00
)
test_loader = torch.utils.data.DataLoader(
2023-05-16 17:52:14 +02:00
dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False
2023-05-13 15:41:16 +02:00
)
return train_loader, test_loader
2023-05-13 14:21:56 +02:00
# Lists to store loss and accuracy values
loss_values = []
train_acc_values = []
val_acc_values = []
2023-05-13 15:41:16 +02:00
def define_model(hyperparameters):
""" Define the multilayer perceptron training_parameters['model'] """
2023-05-16 17:52:14 +02:00
# Define the multilayer perceptron model
model = nn.Sequential()
model.add_module('flatten', nn.Flatten())
input_size = hyperparameters['init_input_size']
for i in range(NUM_HIDDEN_LAYERS):
model.add_module(f'linear{i}', nn.Linear(input_size, WIDTH))
model.add_module(f'relu{i}', nn.ReLU())
input_size = WIDTH
model.add_module('output', nn.Linear(
input_size, hyperparameters['num_classes']))
2023-05-13 15:41:16 +02:00
return model
2023-05-16 17:52:14 +02:00
def get_optimizer(model):
""" Return optimizer function """
if OPTIMIZER_TYPE == 'SGD':
return optim.SGD(model.parameters(), lr=LEARNING_RATE)
2023-05-16 18:21:01 +02:00
if OPTIMIZER_TYPE == 'SGD_Momentum':
2023-05-16 17:52:14 +02:00
return optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
2023-05-16 18:21:01 +02:00
if OPTIMIZER_TYPE == 'Adam':
2023-05-16 17:52:14 +02:00
return optim.Adam(model.parameters(), lr=LEARNING_RATE)
2023-05-16 18:21:01 +02:00
raise ValueError("Unsupported optimizer type!")
2023-05-16 17:52:14 +02:00
2023-05-13 15:41:16 +02:00
def initial_configuration():
"""
Perform all operations needed for training network
"""
# Set random seed for reproducibility
torch.manual_seed(42)
hyperparameters = set_hyperparameters()
# Load MNIST dataset and apply transformations
train_dataset, test_dataset = load_datasets()
train_loader, test_loader = create_data_loaders(
2023-05-16 17:52:14 +02:00
train_dataset, test_dataset)
2023-05-13 15:41:16 +02:00
model = define_model(hyperparameters)
# Loss function
criterion = nn.CrossEntropyLoss()
# training_parameters['optimizer']
2023-05-16 17:52:14 +02:00
optimizer = get_optimizer(model)
2023-05-13 15:41:16 +02:00
return hyperparameters, train_loader, test_loader, model, criterion, optimizer
def single_train_iteration(
data, training_parameters, targets, batch_idx, epoch
):
"""
Train network for single batch
"""
# Reshape the input data
data = data.view(data.size(0), -1)
# Forward pass
outputs = training_parameters['model'](data)
loss = training_parameters['criterion'](outputs, targets)
# Backward pass and optimization
training_parameters['optimizer'].zero_grad()
loss.backward()
training_parameters['optimizer'].step()
# Print loss value for every learning step
"""if (batch_idx + 1) % 100 == 0:
2023-05-13 15:41:16 +02:00
print(
f'''
Epoch [{epoch+1}/{training_parameters['hyperparameters']["num_epochs"]}],
2023-05-16 17:52:14 +02:00
Step [{batch_idx+1}/{len(training_parameters['loaders']['train_loader'])}],
2023-05-13 15:41:16 +02:00
Loss: {loss.item():.4f}
'''
)"""
# Append loss value for every learning step
loss_values.append(loss.item())
2023-05-13 15:41:16 +02:00
return data, training_parameters['optimizer']
def set_loaders(train_loader, test_loader):
"""
Put train and test loaders into one object
"""
return {
'train_loader': train_loader,
'test_loader': test_loader
}
def set_training_parameters(hyperparameters, loaders, model, criterion, optimizer):
"""
Put all training parameters into one object
"""
return {
'hyperparameters': hyperparameters,
'loaders': {
'train_loader': loaders['train_loader'],
'test_loader': loaders['test_loader']
},
'model': model,
'criterion': criterion,
'optimizer': optimizer,
}
def training_loop(training_parameters, print_info=True):
2023-05-13 15:41:16 +02:00
"""
Train network for all epochs
"""
epochs_num = training_parameters["hyperparameters"]["num_epochs"]
# Training loop
for epoch in range(epochs_num):
for batch_idx, (data, targets) in enumerate(training_parameters['loaders']['train_loader']):
data, training_parameters['optimizer'] = single_train_iteration(
data, training_parameters, targets, batch_idx, epoch
)
calculate_accuracy_epoch(
training_parameters, epoch, print_info)
2023-05-13 15:41:16 +02:00
calculate_validation_set_accuracy(
training_parameters, epoch, print_info)
2023-05-13 15:41:16 +02:00
return epoch, training_parameters['loaders']['train_loader']
def calculate_accuracy_epoch(training_parameters, epoch, print_info=True):
2023-05-13 15:41:16 +02:00
""" Calculate accuracy on train set after each epoch """
2023-05-13 13:47:48 +02:00
correct = 0
total = 0
2023-05-13 15:41:16 +02:00
for data, targets in training_parameters['loaders']['train_loader']:
2023-05-13 13:47:48 +02:00
data = data.view(data.size(0), -1)
2023-05-13 15:41:16 +02:00
outputs = training_parameters['model'](data)
2023-05-13 13:47:48 +02:00
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
train_accuracy = 100 * correct / total
if print_info:
print(
f"Accuracy on Train Set after Epoch {epoch+1}: {train_accuracy:.2f}%")
2023-05-13 14:21:56 +02:00
train_acc_values.append(train_accuracy)
2023-05-13 15:41:16 +02:00
2023-05-13 12:45:54 +02:00
def calculate_validation_set_accuracy(training_parameters, epoch, print_info=True):
2023-05-13 15:41:16 +02:00
""" Calculate accuracy on validation set after each epoch """
2023-05-13 13:47:48 +02:00
correct = 0
total = 0
2023-05-13 15:41:16 +02:00
for data, targets in training_parameters['loaders']['test_loader']:
2023-05-13 13:47:48 +02:00
data = data.view(data.size(0), -1)
2023-05-13 15:41:16 +02:00
outputs = training_parameters['model'](data)
2023-05-13 13:47:48 +02:00
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
2023-05-13 15:41:16 +02:00
2023-05-13 13:47:48 +02:00
validation_accuracy = 100 * correct / total
if print_info:
print(
f"Accuracy on Validation Set after Epoch {epoch+1}: {validation_accuracy:.2f}%"
)
print("---")
2023-05-13 14:21:56 +02:00
val_acc_values.append(validation_accuracy)
2023-05-13 15:41:16 +02:00
2023-05-13 12:45:54 +02:00
2023-05-16 19:42:24 +02:00
def main_part(show_plot=True):
2023-05-13 15:41:16 +02:00
(
HYPERPARAMETERS,
TRAIN_LOADER,
TEST_LOADER,
MODEL,
CRITERION,
OPTIMIZER,
) = initial_configuration()
2023-05-16 19:42:24 +02:00
start_time = time.time()
2023-05-13 15:41:16 +02:00
LOADERS = set_loaders(
TRAIN_LOADER, TEST_LOADER)
TRAINING_PARAMETERS = set_training_parameters(
HYPERPARAMETERS, LOADERS, MODEL, CRITERION, OPTIMIZER)
training_loop(TRAINING_PARAMETERS, show_plot)
2023-05-16 19:42:24 +02:00
file = open("results.txt", "a")
file.write(
2023-05-16 20:52:21 +02:00
"-------------------------------------------------------------------------------------" + "\n")
2023-05-16 19:42:24 +02:00
file.write(
2023-05-16 20:52:21 +02:00
f"loss-lr{LEARNING_RATE}-bs{BATCH_SIZE}-hl{NUM_HIDDEN_LAYERS}-w{WIDTH}-{OPTIMIZER_TYPE}" + "\n")
file.write(f"Execution time: {(time.time() - start_time)}" + "\n")
2023-05-16 19:42:24 +02:00
file.write(
2023-05-16 20:52:21 +02:00
"-------------------------------------------------------------------------------------" + "\n")
2023-05-16 19:42:24 +02:00
# Plot the loss value for every learning step
2023-05-16 20:52:21 +02:00
learning_step_title = f'loss-lr{LEARNING_RATE}-bs{BATCH_SIZE}-hl{NUM_HIDDEN_LAYERS}-w{WIDTH}-{OPTIMIZER_TYPE}.png'
2023-05-16 19:42:24 +02:00
plt.plot(loss_values)
plt.xlabel('Learning Step')
plt.ylabel('Loss')
2023-05-16 20:52:21 +02:00
plt.title(learning_step_title)
plt.savefig(learning_step_title
)
2023-05-16 19:42:24 +02:00
if show_plot:
plt.show()
2023-05-16 20:52:21 +02:00
plt.close()
2023-05-16 19:42:24 +02:00
# Plot the accuracy on train set after each epoch
2023-05-16 20:52:21 +02:00
train_accuracy_title = f'trainAccuracy-lr{LEARNING_RATE}-bs{BATCH_SIZE}-hl{NUM_HIDDEN_LAYERS}-w{WIDTH}-{OPTIMIZER_TYPE}.png'
2023-05-16 19:42:24 +02:00
plt.plot(train_acc_values)
plt.xlabel('Epoch')
plt.ylabel('Train Accuracy')
2023-05-16 20:52:21 +02:00
plt.title(train_accuracy_title)
2023-05-16 19:42:24 +02:00
plt.savefig(
2023-05-16 20:52:21 +02:00
train_accuracy_title)
2023-05-16 19:42:24 +02:00
if show_plot:
plt.show()
2023-05-16 20:52:21 +02:00
plt.close()
2023-05-16 19:42:24 +02:00
# Plot the accuracy on validation set after each epoch
2023-05-16 20:52:21 +02:00
validation_accuracy_title = f'validationAccuracy-lr{LEARNING_RATE}-bs{BATCH_SIZE}-hl{NUM_HIDDEN_LAYERS}-w{WIDTH}-{OPTIMIZER_TYPE}.png'
2023-05-16 19:42:24 +02:00
plt.plot(val_acc_values)
plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy')
2023-05-16 20:52:21 +02:00
plt.title(validation_accuracy_title)
2023-05-16 19:42:24 +02:00
plt.savefig(
2023-05-16 20:52:21 +02:00
validation_accuracy_title)
2023-05-16 19:42:24 +02:00
if show_plot:
plt.show()
2023-05-16 20:52:21 +02:00
plt.close()
2023-05-13 14:21:56 +02:00
2023-05-16 19:42:24 +02:00
if __name__ == "__main__":
LEARNING_RATE = 0.001
BATCH_SIZE = 64
NUM_HIDDEN_LAYERS = 2
WIDTH = 128
OPTIMIZER_TYPE = 'Adam'
learning_rate_values = [0.1, 0.01, 0.001]
2023-05-16 21:23:28 +02:00
i = 0
MAX_TESTS = 17
2023-05-16 19:42:24 +02:00
for lr in learning_rate_values:
LEARNING_RATE = lr
main_part(False)
i += 1
print(f"Test {i}/{MAX_TESTS} ran")
2023-05-16 19:42:24 +02:00
LEARNING_RATE = 0.001
batch_size_values = [64, 128, 256]
for bs in batch_size_values:
BATCH_SIZE = bs
main_part(False)
i += 1
print(f"Test {i}/{MAX_TESTS} ran")
2023-05-16 19:42:24 +02:00
BATCH_SIZE = 64
hidden_layers_values = [1, 2, 3]
for hl in hidden_layers_values:
NUM_HIDDEN_LAYERS = hl
main_part(False)
i += 1
print(f"Test {i}/{MAX_TESTS} ran")
2023-05-16 19:42:24 +02:00
NUM_HIDDEN_LAYERS = 2
2023-05-16 21:23:28 +02:00
2023-05-16 19:42:24 +02:00
width_values = [64, 128, 256, 512, 1024]
2023-05-16 20:52:21 +02:00
for width in width_values:
2023-05-16 19:42:24 +02:00
WIDTH = width
main_part(False)
i += 1
print(f"Test {i}/{MAX_TESTS} ran")
2023-05-16 19:42:24 +02:00
WIDTH = 128
for optimizer in ['SGD', 'SGD_Momentum', 'Adam']:
OPTIMIZER_TYPE = optimizer
main_part(False)
i += 1
print(f"Test {i}/{MAX_TESTS} ran")