Commit 58e4126e authored by s8_filimo_m's avatar s8_filimo_m
Browse files

Update yolov3/utils/utils.py, yolov3/utils/torch_utils.py,...

Update yolov3/utils/utils.py, yolov3/utils/torch_utils.py, yolov3/utils/parse_config.py, yolov3/utils/layers.py, yolov3/utils/google_utils.py, yolov3/utils/__init__.py, yolov3/cfg/yolov3-spp-1cls-608.cfg, handClassifier/ConfidenceInPredictions.ipynb files
parent 18f4c425
Pipeline #1236 failed with stages
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"\n",
"from __future__ import print_function, division\n",
"import itertools\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"from torch.optim import lr_scheduler\n",
"from torch.autograd import Variable\n",
"import torchvision\n",
"from torchvision import datasets, models, transforms\n",
"import torchvision.transforms.functional as TF\n",
"import matplotlib.pyplot as plt\n",
"import time\n",
"import os\n",
"import copy\n",
"import re\n",
"from PIL import Image\n",
"\n",
"plt.ion() # interactive mode"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"#Augmenting training data, validation is left as-is\n",
"data_transforms = {\n",
" 'train': transforms.Compose([\n",
" transforms.Resize((264, 264)),\n",
" transforms.ColorJitter(brightness=0.1,saturation=0.1,contrast=0.1, hue=0),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
" ]),\n",
" 'val': transforms.Compose([\n",
" transforms.Resize((264, 264)),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
" ]),\n",
"}\n",
"\n",
"#Real directory for training\n",
"data_dir = '/media/data3/HandSigns/Gestures/Pictures/ClassificatorData'\n",
"image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),\n",
" data_transforms[x])\n",
" for x in ['train', 'val']}\n",
"\n",
"\n",
"#Changed the batch size to ~16, for faster processing\n",
"dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16,\n",
" shuffle=True, num_workers=4)\n",
" for x in ['train', 'val']}\n",
"\n",
"#Batch size 1 for checking the confidence of each picture\n",
"valLoader = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=1,\n",
" shuffle=True, num_workers=4)\n",
" for x in ['val']}\n",
"\n",
"dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}\n",
"class_names = image_datasets['train'].classes\n",
"\n",
"#Utilizes the GPU if available, otherwise uses CPU, which has less processing power for image processing\n",
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Shows the augmented images\n",
"def imshow(inp, title=None):\n",
" #Rotates an array\n",
" inp = inp.numpy().transpose((1, 2, 0))\n",
" #Contributes towards normalization, which helps neural networks learn\n",
" mean = np.array([0.485, 0.456, 0.406])\n",
" std = np.array([0.229, 0.224, 0.225])\n",
" inp = std * inp + mean\n",
" #Applies the normalization to the image\n",
" inp = np.clip(inp, 0, 1)\n",
" #Shows the image\n",
" plt.imshow(inp)\n",
" if title is not None:\n",
" plt.title(title)\n",
" #Pauses for plot updating\n",
" plt.pause(0.001)\n",
"\n",
"\n",
"# Get a batch of training data\n",
"inputs, classes = next(iter(dataloaders['train']))\n",
"\n",
"# Make a grid from batch\n",
"out = torchvision.utils.make_grid(inputs)\n",
"\n",
"imshow(out, title=[class_names[x] for x in classes])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"\n",
"accuracyList = []\n",
"#Model training\n",
"def train_model(model, criterion, optimizer, scheduler, num_epochs=25):\n",
" since = time.time()\n",
"\n",
" best_model_wts = copy.deepcopy(model.state_dict())\n",
" best_acc = 0.0\n",
" worst_acc = 0.0\n",
" summThing = 0\n",
" indexList = []\n",
" \n",
" accuracyList2 = []\n",
" indexIterator = 0\n",
" accuracyIterator = 0\n",
" \n",
"\n",
" for epoch in range(num_epochs):\n",
" print('Epoch {}/{}'.format(epoch, num_epochs - 1))\n",
" print('-' * 10)\n",
"\n",
" # Each epoch has a training and validation phase\n",
" for phase in ['train', 'val']:\n",
" if phase == 'train':\n",
" model.train() # Set model to training mode\n",
" else:\n",
" model.eval() # Set model to evaluate mode\n",
"\n",
" running_loss = 0.0\n",
" running_corrects = 0\n",
"\n",
" # Iterates over the data\n",
" for inputs, labels in dataloaders[phase]:\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" # zero the parameter gradients\n",
" optimizer.zero_grad()\n",
"\n",
" #Enables gradient value calculation, sets the mode to 'train'\n",
" with torch.set_grad_enabled(phase == 'train'):\n",
" #Passes the image to the model\n",
" outputs = model(inputs)\n",
" #Stores the prediction\n",
" _, preds = torch.max(outputs, 1)\n",
" #Stores the loss - how much the network has learned\n",
" loss = criterion(outputs, labels)\n",
"\n",
" #Uses backpropogation to optimize the learning process\n",
" if phase == 'train':\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" #Stores the statistics\n",
" running_loss += loss.item() * inputs.size(0)\n",
" running_corrects += torch.sum(preds == labels.data)\n",
" \n",
" #Advances the training if the model is actually training\n",
" if phase == 'train':\n",
" scheduler.step()\n",
"\n",
" #Calculates the epoch statistics\n",
" epoch_loss = running_loss / dataset_sizes[phase]\n",
" epoch_acc = running_corrects.double() / dataset_sizes[phase]\n",
" accuracyList.append(running_corrects.double())\n",
"\n",
" print('{} Loss: {:.4f} Acc: {:.4f}'.format(\n",
" phase, epoch_loss, epoch_acc))\n",
"\n",
" #Makes a copy of the model after validation, and if the model is superior than previous iterations\n",
" if phase == 'val' and epoch_acc > best_acc:\n",
" best_acc = epoch_acc\n",
" print('Best val Acc: {:4f}'.format(best_acc))\n",
" best_model_wts = copy.deepcopy(model.state_dict())\n",
"\n",
"\n",
" time_elapsed = time.time() - since\n",
" print('Training complete in {:.0f}m {:.0f}s'.format(\n",
" time_elapsed // 60, time_elapsed % 60))\n",
" print('Best val Acc: {:4f}'.format(best_acc))\n",
"\n",
" #Loads the best trained model\n",
" model.load_state_dict(best_model_wts)\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#Displays predictioncs for some images\n",
"def visualize_model(model, num_images=6):\n",
" was_training = model.training\n",
" model.eval()\n",
" images_so_far = 0\n",
" fig = plt.figure()\n",
"\n",
" with torch.no_grad():\n",
" for i, (inputs, labels) in enumerate(dataloaders['val']):\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
"\n",
" outputs = model(inputs)\n",
" _, preds = torch.max(outputs, 1)\n",
"\n",
" for j in range(inputs.size()[0]):\n",
" images_so_far += 1\n",
" ax = plt.subplot(num_images//2, 2, images_so_far)\n",
" ax.axis('off')\n",
" ax.set_title('predicted: {}'.format(class_names[preds[j]]))\n",
" imshow(inputs.cpu().data[j])\n",
"\n",
" if images_so_far == num_images:\n",
" model.train(mode=was_training)\n",
" return\n",
" model.train(mode=was_training)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"class_names = ['a', 'b', 'c', 'd', 'e']\n",
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
"model_ft = models.resnet34(pretrained=True)\n",
"num_ftrs = model_ft.fc.in_features\n",
"\n",
"model_ft.fc = nn.Linear(num_ftrs, len(class_names))\n",
"\n",
"model_ft = model_ft.to(device)\n",
"\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"# Observe that all parameters are being optimized\n",
"#Changed from lr0.001 to lr0.0005\n",
"optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0005, momentum=0.9)\n",
"\n",
"# Decay LR by a factor of 0.1 every 7 epochs\n",
"exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=250)\n",
"#Saves the state_dict\n",
"torch.save(model_ft.state_dict(), '/media/data3/HandSigns/Gestures/Pictures/ClassificatorData/stateDict.pt')\n",
"#Saves the entire model\n",
"torch.save(model_ft, '/home/Documents/Maksims/CourseWork/handGestures/cutOuts/entireModel.pt')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Loads the state_dict\n",
"#model = TheModelClass(*args, **kwargs)\n",
"model_ft.load_state_dict(torch.load('/media/data3/HandSigns/Gestures/Pictures/ClassificatorData/stateDict.pt'))\n",
"model_ft.eval()\n",
"\n",
"\n",
"visualize_model(model_ft)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def accuracy_predict(model, num_images=6):\n",
" was_training = model.training\n",
" model.eval()\n",
" images_so_far = 0\n",
" fig = plt.figure()\n",
"\n",
" thisdict =\t{\n",
" \"0\": \"a\",\n",
" \"1\": \"b\",\n",
" \"2\": \"c\",\n",
" \"3\": \"d\",\n",
" \"4\": \"e\"\n",
" }\n",
" \n",
" with torch.no_grad():\n",
" for i, (inputs, labels) in enumerate(valLoader['val']):\n",
" labelbackup = labels\n",
" inputs = inputs.to(device)\n",
" \n",
" outputs = model(inputs)\n",
" print(outputs)\n",
" \n",
" torchValue = torch.max(outputs, 1)\n",
" _, preds = torchValue\n",
" \n",
" testerin = str(torchValue[0])\n",
"\n",
" m = re.search(r'\\[([^]]*)\\]',testerin)\n",
" prediction = preds.item()\n",
" reality = labels.item()\n",
" \n",
"\n",
" for j in range(inputs.size()[0]):\n",
" images_so_far += 1\n",
" ax = plt.subplot(num_images//2, 2, images_so_far)\n",
" ax.axis('off')\n",
" a = 10\n",
" \n",
" ax.set_title('Prediction: '+ thisdict[str(prediction)]\n",
" + '\\nReality: ' \n",
" + thisdict[str(reality)] + '\\nConfidence: ' + str(m.group(0)))\n",
" imshow(inputs.cpu().data[j])\n",
"\n",
" testCorrect = labelbackup.data\n",
"\n",
"\n",
" if images_so_far == num_images:\n",
" model.train(mode=was_training)\n",
" return\n",
" model.train(mode=was_training)\n",
"\n",
"accuracy_predict(model_ft)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total amount of pictures per class:\n",
" [206, 176, 93, 91, 72]\n",
"Total amount of correct pictures per class:\n",
" [194, 174, 89, 85, 68]\n",
"0\n",
"A: 94% B: 99% C: 96% D: 93% E: 94%\n"
]
}
],
"source": [
"'''Prints the average prediction for each class, the images that are confusing'''\n",
"with torch.no_grad():\n",
" counterList = [0, 0, 0, 0, 0]\n",
" correctList = [0, 0, 0, 0, 0]\n",
" cmt = torch.zeros(5,5, dtype=torch.int64)\n",
" wrongCounter = 0\n",
" for i, (inputs, labels) in enumerate(valLoader['val']):\n",
" #Stores the labels in CPU, letting us know which label the picture really has\n",
" #labelbackup = labels\n",
"\n",
" inputs = inputs.to(device)\n",
" labels = labels.to(device)\n",
" \n",
" outputs = model_ft(inputs)\n",
" outputsCpu = outputs.cpu().numpy()[0]\n",
" \n",
" #The value perceived by the neural network\n",
" cpuIndex = np.argmax(outputsCpu)\n",
" \n",
" #Counts how many images were processed in total\n",
" counterList[cpuIndex] = counterList[cpuIndex] + 1\n",
" \n",
" #The label from the class of the image\n",
" currentLabel = labels.data.cpu().numpy()[0]\n",
"\n",
" \n",
" #For zero\n",
" if(str(currentLabel) == str(cpuIndex)):\n",
" correctList[cpuIndex] = correctList[cpuIndex] + 1\n",
" else:\n",
" cmt[currentLabel][cpuIndex] += 1\n",
" print(\"True value: \", currentLabel)\n",
" print(\"Perceived value: \", cpuIndex)\n",
" imshow(inputs.cpu().data[0])\n",
" wrongCounter += 1\n",
"\n",
"print(\"Total amount of pictures per class:\\n\", counterList)\n",
"print(\"Total amount of correct pictures per class:\\n\", correctList)\n",
"print(wrongCounter)\n",
"\n",
"#Fills in the rest of the confusion matrix\n",
"for i in range(len(correctList)):\n",
" cmt[i][i] = correctList[i]\n",
" \n",
"\n",
"\n",
"\n",
"print(\"A: \", \"{0:.0%}\".format(correctList[0] / counterList[0]),\n",
" \"B: \", \"{0:.0%}\".format(correctList[1] / counterList[1]), \n",
" \"C: \", \"{0:.0%}\".format(correctList[2] / counterList[2]),\n",
" \"D: \", \"{0:.0%}\".format(correctList[3] / counterList[3]),\n",
" \"E: \", \"{0:.0%}\".format(correctList[4] / counterList[4]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plotConfusionMatrix(cm, classes, title='Confusion matrix', cmap=plt.cm.YlOrRd):\n",
" plt.figure(figsize = (4,4))\n",
" plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
" plt.title(title)\n",
" plt.colorbar()\n",
" tick_marks = np.arange(len(classes))\n",
" plt.xticks(tick_marks, classes, rotation=45)\n",
" plt.yticks(tick_marks, classes)\n",
"\n",
" fmt = 'd'\n",
" thresh = cm.max() / 2.\n",
" for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
" plt.text(j, i, format(cm[i, j], fmt), horizontalalignment=\"center\", color=\"white\" if cm[i, j] > thresh else \"black\")\n",
"\n",
" plt.tight_layout()\n",
" plt.ylabel('True label')\n",
" plt.xlabel('Predicted label')\n",
" \n",
"names = ('a', 'b', 'c', 'd', 'e')\n",
"plotConfusionMatrix(cmt, names)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
[net]
batch=8
subdivisions=2
width=608
height=608
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=100
max_batches = 5000
policy=steps
steps=4000,4500
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1