# build network
class Neural_Network(nn.Module):
'''
The neural network object sits a level above the classifier to
store relevant properties and values. The classifier uses nn.LogSoftmax so use the
negative log likelihood loss criterion nn.NLLLoss
Args:
inputs (int): The number of inputs.
hidden_sizes (list of ints): The hidden layer sizes.
outputs (int): The number of outputs.
hidden_activation (str): The hidden layer activation functions (ex. relu, sigmoid, tahn).
device (str): The gpu or the cpu.
optimizer_name (str): The optimizer name ('sgd' or 'adam') to update the weights and gradients
dropout (float): The dropout rate, value to randomly drop input units through training.
learn_rate (float): The learning rate value, used along with the gradient to update the weights,
small values ensure that the weight update steps are small enough.
Attributes:
inputs (int): This is where we store the input count,
hidden_sizes (list of int): This is where we store the hidden layer sizes,
outputs (int): This is where we store the output size,
hidden_activation (str): This is where we store the hidden activation type,
dropout (float): This is where we store the random input unit dropout rate,
learn_rate (float): This is where we store the learn rate value,
processing_device (str): This is where we store the device to calculate the results,
linear_layers (list): This is where we store the values to sequentially build the classifier,
model (torch.nn.module or torchvision model): Where either the generated classifier or the loaded model is stored,
optimizer (torch.optim): This is where we store the optimizer used,
criterior (torch.nn.module.loss): This is where we store the loss function type,
device (str): This is where we store the device,
epochs_completed (int): This is where we store how many total epochs of training this model has.
'''
def __init__(self, inputs, hidden_sizes,
outputs, hidden_activation, device,
dropout = 0.3, learn_rate = 0.002):
super().__init__()
# Props
self.inputs = inputs
self.hidden_sizes = hidden_sizes
self.outputs = outputs
self.hidden_activation = hidden_activation
self.dropout = dropout
self.learn_rate = learn_rate
self.processing_device = device
# Layers
self.linear_layers = []
self.data = hidden_sizes
self.data.insert(0,inputs)
self.data.append(outputs)
# Model Stuff
self.model, self.optimizer = None, None
self.criterion = nn.NLLLoss()
self.device = device
self.epochs_completed = 0
self.generate_classifier()
def generate_classifier(self):
'''Generates the nn.module container Sequential classfier as the default for this class.
Args:
None.
Raises:
TODO: Update exceptions with error_handling class.
Returns:
None.
'''
self.linear_layers = []
n = len(self.data)
for i in range(n-1):
self.linear_layers.append(nn.Linear(self.data[i],self.data[(i + 1) % n]))
if i != n-2:
if self.hidden_activation == 'relu':
self.linear_layers.append(nn.ReLU())
elif self.hidden_activation == 'sigmoid':
self.linear_layers.append(nn.Sigmoid())
elif self.hidden_activation == 'tanh':
self.linear_layers.append(nn.Tanh())
self.linear_layers.append(nn.Dropout(self.dropout))
self.linear_layers.append(nn.LogSoftmax(dim = 1))
# expand the list into sequential args
self.model = nn.Sequential(*self.linear_layers)
def train_network(self, train_data, validation_data, epochs = 1, load_best_params = False, plot = False):
'''Trains the model, requires the criterion and optimizer to be passed into the class args before hand.
TODO: add exception handling for optimizer and criterion as None values.
Args:
train_data (torch.utils.data.dataloader.DataLoader): The training torch data loader.
validation_data (torch.utils.data.dataloader.DataLoader): The validation torch data loader.
epochs (int): The number of epochs for training.
load_best_params (bool): If true then we will load the model_state_dict from the highest accuracy iteration
plot (bool): If true we plot both losses.
Raises:
TODO: Add exceptions.
Returns:
None.
'''
# move the model to whatever device we have
self.model.to(self.device)
# if we loaded the model in eval mode and want to train switch it
if not self.model.training:
self.model.train()
iteration, running_loss = 0, 0
highest_accuracy, high_acc_iter, high_acc_epoch = 0, 0, 0
training_loss_set, validation_loss_set = [], []
best_params = None
for epoch in range(epochs):
batch_iteration = 0
for x, y_labels in train_data:
# move to whatever device we have
x, y_labels = x.to(self.device), y_labels.to(self.device)
# zero out the gradients
self.optimizer.zero_grad()
# forward pass - get the log probabilities (logits / scores)
output = self.model(x)
# calculate the loss
loss = self.criterion(output, y_labels)
# backprop - calculate the gradients for the parameters
loss.backward()
# parameter update based on gradient
self.optimizer.step()
# update stats
running_loss += loss.item()
iteration += 1
batch_iteration += 1
else:
# Validation Process
validation_loss, accuracy = self.validate_network(validation_data)
training_loss = running_loss/len(train_data)
print('Model has a total of {} training epochs completed.'.format(self.epochs_completed))
print('Active session Epoch {} out of {}'.format(epoch + 1, epochs))
print('Currently model has Accuracy of {}% \nCurrent training loss is {} \
\nCurrent validation loss is {}'.format(accuracy,
training_loss, validation_loss))
training_loss_set.append(training_loss)
validation_loss_set.append(validation_loss)
print('-------------')
running_loss = 0
# Track best run
if accuracy > highest_accuracy:
highest_accuracy = accuracy
high_acc_iter = batch_iteration
high_acc_epoch = epoch + 1
if load_best_params:
best_params = copy.deepcopy(self.model.state_dict())
# Set the model back to train mode, enable dropout again
self.model.train()
self.epochs_completed += 1
t_slope, v_slope = self.check_overfitting(training_loss_set, validation_loss_set, plot)
print('Slope of linear reg training curve fit is {} \nSlope of linear reg Validation curve fit is {}'.format(t_slope,
v_slope))
print('Training session highest accuracy was {} on epoch {} batch iteration {}'.format(highest_accuracy,
high_acc_epoch,
high_acc_iter))
if load_best_params:
self.model.load_state_dict(best_params)
print('Params from {} epoch, {} batch iteration were loaded'.format(high_acc_epoch, high_acc_iter))
def validate_network(self, data):
'''Validate our model to check the loss and accuracy.
Args:
data (torch.utils.data.dataloader.DataLoader): The data we want to validate as torch data loader.
Raises:
TODO: Add exceptions.
Returns:
loss,accuracy (tuple): The loss and accuracy of the validation.
'''
# enable eval mode, turn off dropout
self.model.eval()
# turn off the gradients since we are not updating params
with torch.no_grad():
batch_loss = 0
batch_accuracy = 0
# validation pass
for x, y_labels in data:
# move to device
x, y_labels = x.to(self.device), y_labels.to(self.device)
output = self.model(x)
# update loss and extract tensor as python float
batch_loss += self.criterion(output, y_labels).item()
# calculate the probability
probability = torch.exp(output)
# get the top n indexes and values
_, top_class = probability.topk(1, dim=1)
# reshape top class to match label and get binary value from equals,
# check if the prediction matches label
equals = top_class == y_labels.view(*top_class.shape)
# have to convert byte tensor to float tensor and get accuracy
batch_accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
test_accuracy = (batch_accuracy / len(data))*100
test_loss = batch_loss / len(data)
return test_loss, test_accuracy
def check_overfitting(self, train_losses, validation_losses, plot = False):
'''Validate our model to check the loss and accuracy
Args:
train_losses (list of floats): The list of training losses per epoch.
validation_losses (list of floats): The list of validation losses per epoch.
plot (bool): If true we plot both losses.
Raises:
TODO: Add exceptions.
Returns:
slopes (tuple): The slopes of the linear reg curve fits for both validation/training.
'''
# Data
tl_x_val = np.arange(0, len(train_losses))
vl_x_val = np.arange(0, len(validation_losses))
# To numpy
train_data = np.array([tl_x_val, train_losses])
validate_data = np.array([vl_x_val, validation_losses])
# Least squares polynomial fit.
train_slope, train_intercept = np.polyfit(train_data[0], train_data[1], 1)
validation_slope, validation_intercept = np.polyfit(validate_data[0], validate_data[1], 1)
if plot:
plt.plot(train_data[0], train_data[1], 'o', label='training loss')
plt.plot(validate_data[0], validate_data[1], 'o', label='validation loss')
plt.plot(train_data[0], train_intercept + train_slope*train_data[0], 'r', label='train_regg')
plt.plot(validate_data[0], validation_intercept + validation_slope*validate_data[0], 'r', label='val_regg')
plt.legend()
plt.show()
return train_slope, validation_slope
def save_model_checkpoint(self, full_path, training_class_to_idx):
'''Save the model checkpoint.
Args:
full_path (str): The full path to save the checkpoint to
training_class_to_idx (dic of ints): This is where we store the dictionary mapping the name of the class to the index (label)
Raises:
TODO: Add exceptions
Returns:
None
'''
net_data_dic = {'input_count': self.inputs,
'hidden_sizes': self.hidden_sizes,
'outputs': self.outputs,
'h_activation': self.hidden_activation,
'dropout': self.dropout,
'learn_rate': self.learn_rate,
'epochs_completed' : self.epochs_completed}
checkpoint = {'data' : net_data_dic,
'model' : self.model,
'classifier' : self.model.classifier,
'optimizer.state_dict' : self.optimizer.state_dict(),
'state_dict' : self.model.state_dict(),
'device' : self.device,
'class_to_idx': training_class_to_idx}
torch.save (checkpoint, full_path)