it:ai_computervision_linear
Table of Contents
AI - Computer Vision - linear/non-linear neural networks
see also:
- nvidia CUDA toolkit to install CUDA
- http://colab.research.google.com - to play with Python code online in a Jupyter notebook and combine text and code cells
-
Introduction
- computer vision is used for:
- object detection (eg. is there a car in the image if so place a box around it)
- object classification (eg. what type of object is in the image)
- image segregation (eg. isolate an object within an image - such as for image semantic masking)
- smartphones use in-camera panoptic segmentation via transformers to blur backgrounds, remove unwanted objects, enhance faces, etc
- combine camera views into a 3D vector space model and ascertain motion (eg. Tesla driving uses 8 cameras)
- workflow is similar to AI deep learning but computer vision usually uses either CNN or transformer neural networks rather than this linear/non-linear model architecture
A linear/non-linear NN computer vision model
initial code
import torch from torch import nn #neural networks import torchvision from torchvision import datasets from torchvision import transforms from torchvision.transforms import ToTensor import matplotlib.pyplot as plt
input images
#set up training data
train_data = datasets.FashionMNIST(
root="data", #destination folder to download to
train=True, #just the training dataset
download=True,
transform=torchvision.transforms.ToTensor(), #convert images to a tensor
target_transform=NONE #transforming the labels
)
test_data = datasets.FashionMNIST(
root="data", #destination folder to download to
train=False, #just the test dataset
download=True,
transform=torchvision.transforms.ToTensor(), #convert images to a NCHW tensor NB. color channel values of 0..255 are converted to range 0..1
target_transform=NONE #transforming the labels
)
visualise the data
#check how much data was imported:
len(train_data), len(test_data) #for this dataset there should be 60,000 and 10,000 respectively
#see what our Y label class names are: (eg. dress, trousers)
class_names = train_data.classes
print(class_names)
# see what Y index value they correspond to:
class_to_idx = train.data.class_to_idx
print(class_to_idx)
#check the shapes of the tensors:
print(f"Image shape: {image.shape} -> [color_channels, height, width]")
print(f"Image label: {class_names[label]}")
#visualise data
import matpltlib as plt
image, label = train_data[0]
print(f"Image shape: {image.shape}")
#matplotlib imshow() expects tensor of h,w or h,w c so we either need to drop the color via doing a squeeze to remove the 1st dimension or do a tensor transform
plt.imshow(image.squeeze(), cmap="gray") #opt. cmap to display in gray scale given we have dropped the color channel
plt.title(class_names[label])
plt.axis=False #don't need the H and W axis numbers
#view random images in a grid:
fig = plt.figure(figsize=(9,9))
rows, cols = 4, 4
for i in range(1, rows*cols+1)
random_idx = torch.randint(0, len(train_data), size=[1]).item()
img, label = train_data[random_idx]
fig.add_subplot(rows, cols, i)
plt.imshow(image.squeeze(), cmap="gray")
plt.title(class_names[label])
plt.axis=False
Transform image dataset into a batches of Python iterable using DataLoader
- this is to to reduce RAM impacts and allow mini-batching of gradient descents which makes learning more efficient per epoch
from torch.utils.data import DataLoader
BATCH_SIZE=32 #process 32 images at a time
train_dataloader = DataLoader(dataset=train_data,
batch_size=BATCH_SIZE,
shuffle=True #randomly pick images to go into each batch in case images are ordered by class type
)
test_dataloader = DataLoader(dataset=test_data,
batch_size=BATCH_SIZE,
shuffle=False #no need to with the test dataset
)
Create a baseline model
#create a flatten layer to reduce a dimension by making a new value H*W and removing H and W so we can use a linear layer which can't use the extra dimension
flatten_model = nn.Flatten()
#create model class
from torch import nn
class FashionMNISTModel(nn.Module):
def __init__(self,
input_shape:int,
hidden_units: int,
output_shape: int):
super() __init__()
self.layer.stack = nn.Sequential(
nn.flatten(),
nn.Linear(in_features=input_shape, #this needs to be the output shape of flatten ie, H*W
out_features=hidden_units), #this can be something like 10
nn.Linear(in_features=hidden_units,
out_features=output_shape), #this is the number of classes or categories ie. len(class_names)
)
def forward(self,x):
return self.layer_stack(x)
#create instance of the model
model_0 = FashionMNISTModel(
input_shape=28*28,
hidden_units=10,
output_shape=10
)
Create a timer
#create a timing process
from timeit import default_timer as timer
def print_train_time(start:float,
end:float,
device: torch.device = None):
total_time = end - start
print(f"Train time on {device}: {total_time:.3f} seconds")
return total_time
#usage:
start_time = timer() #place at start of code
#some code here
end_time = timer() #place at end of code
print_train_time(start=start_time, end=end_time, device = "cpu")
Train the model
#loss function and optimizer
loss_fn = nn.CrossEntropyLoss() #loss gets calculated per batch not per epoch
optimizer = torch.optim.SGD((params=model_0.parameters(), # parameters of target model to optimize
lr=0.01) #this will update once per batch rather than per epoch
accuracy = torchmetrics.accuracy() ?
from tqdm.auto import tqdm #progress bar
train_start_time = timer()
epochs = 3
for epoch in tqdm(range(epochs)):
print(f"Epoch: {epochs}\n------")
train_loss = 0
for batch, (X,y) in enumerate(train_dataloader):
model_0.train()
y_pred = model_0(X)
loss= loss_fn(y_pred, y) #loss per batch
train_loss += loss #total loss per epoch
optimizer.zero_grad()
loss.backward()
optimizer.step()
#print what's happening:
if batch %% 400 == 0 #inside batch loop
print(f"Looked at {batch *len(X)}/{len(train_dataloader.dataset} samples")
train_loss /= len(train_dataloader) #inside epoch loop
#testing
test_loss, test_acc = 0, 0
model_0.eval()
with torch.inference_mode():
for X_test, y_test in test_dataloader:
test_pred = model_0(X_test)
test_loss += loss_fn(test_pred, y_test)
test_acc += accuracy_fn(y_true=y_test, y_pred=test_pred.argmax(dim=1) ) #convert from test_pred logits
test_loss /= len(test_data) #get average loss per batch
test_acc /= len(test_dataloader)
#print whats happening:
print(f"\nTrain loss: {train_loss:.4f} | Test loss: {test_loss:.4f} | Test acc: {test_acc:.4f} ")
train_end_time = timer()
total_train_time = print_train_time(start=train_start_time,
end=train_end_time,
device=str(next(model_0.parameters() ).device ) )
it/ai_computervision_linear.txt · Last modified: 2023/08/09 05:02 by gary1