Власні набори даних 🔥PyTorch

Технології комп’ютерного зору

Ігор Мірошниченко

КНУ імені Тараса Шевченка, ФІТ

Збір даних

Візьмемо підвибірку з Food-101
101 клас їжі
101000 зображень
75000 для тренування
25000 для тестування

torchvision.datasets.Food101
pizza_steak_sushi.zip

Завантаження даних з google drive

import gdown
import tempfile
import zipfile
import os

file_url = 'https://drive.google.com/file/d/1Jf8jhskDmN3DxP0yk9PET5Kr44OnwUgU/view?usp=sharing'
file_id = file_url.split('/')[-2]

with tempfile.TemporaryDirectory() as tmpdir:
    print(f'Створено тимчасову папку: {tmpdir}')

    zip_path = os.path.join(tmpdir, 'pizza_steak_sushi.zip')

    print('Завантаження файлу...')
    gdown.download(id=file_id, output=zip_path, quiet=False)
    print('Завантаження завершено.')

    extract_path = 'data/pizza_steak_sushi'
    os.makedirs(extract_path, exist_ok=True)

    print(f'Розархівація файлу в {extract_path}...')
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print('Розархівація завершена.')

print('Тимчасову папку та її вміст видалено.')

Створено тимчасову папку: C:\Users\ihorm\AppData\Local\Temp\tmpwntb3ef1
Завантаження файлу...

Завантаження завершено.
Розархівація файлу в data/pizza_steak_sushi...
Розархівація завершена.
Тимчасову папку та її вміст видалено.

Структура даних

Код

from pathlib import Path

def list_directory_structure_limited(path):
    """
    Головна функція для виклику. Виводить назву кореневої папки.
    """
    if not path.is_dir():
        print(f'Помилка: Директорія {path} не знайдена.')
        return

    print(f'Структура директорії {path} (по 2 файли в папці):')
    print(f'{path.name}/')
    _walk_directory_limited(path, prefix="")

def _walk_directory_limited(path, prefix=""):
    """
    Рекурсивна допоміжна функція для обходу та виведення структури.
    """
    try:
        all_items = list(path.iterdir())
    except PermissionError:
        print(f"{prefix}└── [Немає доступу]")
        return

    dirs = sorted([p for p in all_items if p.is_dir()])
    files = sorted([p for p in all_items if p.is_file()])

    files_to_show = files[:2]
    hidden_files_count = len(files) - len(files_to_show)

    items_to_display = dirs + files_to_show

    for i, item_path in enumerate(items_to_display):
        is_last_item = (i == len(items_to_display) - 1) and (hidden_files_count == 0)
        pointer = '└── ' if is_last_item else '├── '

        print(f'{prefix}{pointer}{item_path.name}')

        if item_path.is_dir():
            extension = '    ' if is_last_item else '│   '
            _walk_directory_limited(item_path, prefix=prefix + extension)

    if hidden_files_count > 0:
        print(f'{prefix}└── ... та ще {hidden_files_count} файлів')


directory_to_scan = Path('data/pizza_steak_sushi')

list_directory_structure_limited(directory_to_scan)

Структура директорії data\pizza_steak_sushi (по 2 файли в папці):
pizza_steak_sushi/
├── test
│   ├── pizza
│   │   ├── 1152100.jpg
│   │   ├── 1503858.jpg
│   │   └── ... та ще 23 файлів
│   ├── steak
│   │   ├── 100274.jpg
│   │   ├── 1016217.jpg
│   │   └── ... та ще 17 файлів
│   └── sushi
│       ├── 1172255.jpg
│       ├── 1230335.jpg
│       └── ... та ще 29 файлів
└── train
    ├── pizza
    │   ├── 1008844.jpg
    │   ├── 1033251.jpg
    │   └── ... та ще 76 файлів
    ├── steak
    │   ├── 100135.jpg
    │   ├── 1225762.jpg
    │   └── ... та ще 73 файлів
    └── sushi
        ├── 1070104.jpg
        ├── 1129338.jpg
        └── ... та ще 70 файлів

Перегляд зображень 1/2

import random
from PIL import Image

image_path = Path('data/pizza_steak_sushi')

random.seed(73)

image_path_list = list(image_path.glob("*/*/*.jpg"))

random_image_path = random.choice(image_path_list)

image_class = random_image_path.parent.stem

img = Image.open(random_image_path)

print(f"Random image path: {random_image_path}")
print(f"Image class: {image_class}")
print(f"Image height: {img.height}") 
print(f"Image width: {img.width}")
img

Random image path: data\pizza_steak_sushi\train\pizza\68684.jpg
Image class: pizza
Image height: 384
Image width: 512

Перегляд зображень 2/2

import numpy as np
import matplotlib.pyplot as plt

img_as_array = np.asarray(img)

plt.figure(figsize=(10, 7))
plt.imshow(img_as_array)
plt.title(f"Image class: {image_class} | Image shape: {img_as_array.shape} $\\rightarrow$ [height, width, color_channels]")
plt.axis(False);

Підготовка даних

Рисунки \(\rightarrow\) тензори
Перетворіть в torch.utils.data.Dataset, а потім в torch.utils.data.DataLoader.

import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

Задача	Готові набори даних та функції
Зір	`torchvision.datasets`
Аудіо	`torchaudio.datasets`
Текст	`torchtext.datasets`
Система рекомендацій	`torchrec.datasets`

Перетворення зображень в тензори

torchvision.transforms - бібліотека для перетворення зображень
transforms.Resize() - змінює розмір зображення
transforms.RandomHorizontalFlip() - випадкове горизонтальне відображення
transforms.ToTensor() - перетворює зображення в тензор
torchvision.transforms.Compose() - об’єднує кілька перетворень

data_transform = transforms.Compose([
    transforms.Resize(size=(64, 64)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor()
])

Функція перетворення

def plot_transformed_images(image_paths, transform, n=3, seed=73):
    random.seed(seed)
    random_image_paths = random.sample(image_paths, k=n)
    for image_path in random_image_paths:
        with Image.open(image_path) as f:
            fig, ax = plt.subplots(1, 2)
            ax[0].imshow(f) 
            ax[0].set_title(f"Original \nSize: {f.size}")
            ax[0].axis("off")

1            transformed_image = transform(f).permute(1, 2, 0)
            ax[1].imshow(transformed_image) 
            ax[1].set_title(f"Transformed \nSize: {transformed_image.shape}")
            ax[1].axis("off")

            fig.suptitle(f"Class: {image_path.parent.stem}", fontsize=16)

plot_transformed_images(image_path_list, 
                        transform=data_transform, 
                        n=1)

1: permute() змінить форму зображення відповідно до matplotlib (за замовчуванням PyTorch використовує [C, H, W], а Matplotlib — [H, W, C])

Завантаження рисунків з `ImageFolder`

torchvision.datasets.ImageFolder - завантажує зображення з папок

from torchvision import datasets

train_dir = image_path / "train"
test_dir = image_path / "test"

train_data = datasets.ImageFolder(root=train_dir,
                                  transform=data_transform,
                                  target_transform=None) 

test_data = datasets.ImageFolder(root=test_dir, 
                                 transform=data_transform)

print(f"Train data:\n{train_data}\nTest data:\n{test_data}")

Train data:
Dataset ImageFolder
    Number of datapoints: 225
    Root location: data\pizza_steak_sushi\train
    StandardTransform
Transform: Compose(
               Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=warn)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
           )
Test data:
Dataset ImageFolder
    Number of datapoints: 75
    Root location: data\pizza_steak_sushi\test
    StandardTransform
Transform: Compose(
               Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=warn)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
           )

Перевірка 1/2

class_names = train_data.classes
class_dict = train_data.class_to_idx

print(f"Length of training data: {len(train_data)}")
print(f"Length of testing data: {len(test_data)}")
print(f"Classes: {class_names}")
print(f"Code of classes: {class_dict}")

Length of training data: 225
Length of testing data: 75
Classes: ['pizza', 'steak', 'sushi']
Code of classes: {'pizza': 0, 'steak': 1, 'sushi': 2}

Перевірка 2/2

img, label = train_data[0][0], train_data[0][1]
print(f"Image tensor:\n{img}")
print(f"Image shape: {img.shape}")
print(f"Image datatype: {img.dtype}")
print(f"Image label: {label}")
print(f"Label datatype: {type(label)}")

Image tensor:
tensor([[[0.1176, 0.1216, 0.1255,  ..., 0.0980, 0.1020, 0.1137],
         [0.1294, 0.1294, 0.1294,  ..., 0.0980, 0.0980, 0.1059],
         [0.1333, 0.1333, 0.1333,  ..., 0.0941, 0.0980, 0.1020],
         ...,
         [0.1686, 0.1647, 0.1686,  ..., 0.1255, 0.1098, 0.1098],
         [0.1686, 0.1647, 0.1686,  ..., 0.1098, 0.0941, 0.0902],
         [0.1647, 0.1647, 0.1686,  ..., 0.0980, 0.0863, 0.0863]],

        [[0.0588, 0.0588, 0.0588,  ..., 0.0745, 0.0706, 0.0745],
         [0.0627, 0.0627, 0.0627,  ..., 0.0745, 0.0706, 0.0745],
         [0.0706, 0.0706, 0.0706,  ..., 0.0745, 0.0745, 0.0706],
         ...,
         [0.2392, 0.2392, 0.2510,  ..., 0.1373, 0.1333, 0.1255],
         [0.2314, 0.2392, 0.2510,  ..., 0.1255, 0.1176, 0.1098],
         [0.2275, 0.2353, 0.2431,  ..., 0.1137, 0.1059, 0.1020]],

        [[0.0196, 0.0196, 0.0157,  ..., 0.0902, 0.0902, 0.0941],
         [0.0196, 0.0157, 0.0196,  ..., 0.0902, 0.0863, 0.0902],
         [0.0196, 0.0157, 0.0157,  ..., 0.0902, 0.0902, 0.0902],
         ...,
         [0.1843, 0.1882, 0.1961,  ..., 0.1490, 0.1333, 0.1294],
         [0.1804, 0.1843, 0.1922,  ..., 0.1255, 0.1137, 0.1098],
         [0.1765, 0.1804, 0.1882,  ..., 0.1059, 0.0980, 0.1059]]])
Image shape: torch.Size([3, 64, 64])
Image datatype: torch.float32
Image label: 0
Label datatype: <class 'int'>

Перегляд тензорів зображень

img_permute = img.permute(1, 2, 0)

print(f"Original shape: {img.shape} -> [color_channels, height, width]")
print(f"Image permute shape: {img_permute.shape} -> [height, width, color_channels]")

plt.figure(figsize=(10, 7))
plt.imshow(img.permute(1, 2, 0))
plt.axis("off")
plt.title(class_names[label], fontsize=14);

Original shape: torch.Size([3, 64, 64]) -> [color_channels, height, width]
Image permute shape: torch.Size([64, 64, 3]) -> [height, width, color_channels]

Передаємо дані в DataLoader

torch.utils.data.DataLoader - створює ітератор над датасетом
batch_size - кількість зразків в одному пакеті
num_workers - кількість потоків для завантаження даних (os.cpu_count()-1)

from torch.utils.data import DataLoader
print(f"Number of CPU cores: {os.cpu_count()}")
train_dataloader = DataLoader(dataset=train_data, 
                              batch_size=1,
                              num_workers=os.cpu_count()-1,
                              shuffle=True)

test_dataloader = DataLoader(dataset=test_data, 
                             batch_size=1, 
                             num_workers=os.cpu_count()-1, 
                             shuffle=False)

train_dataloader, test_dataloader

Number of CPU cores: 20

(<torch.utils.data.dataloader.DataLoader at 0x1c8dbca1f10>,
 <torch.utils.data.dataloader.DataLoader at 0x1c8e6a4c310>)

Перевірка форми даних

img, label = next(iter(train_dataloader))

print(f"Image shape: {img.shape} -> [batch_size, color_channels, height, width]")
print(f"Label shape: {label.shape}")

Image shape: torch.Size([1, 3, 64, 64]) -> [batch_size, color_channels, height, width]
Label shape: torch.Size([1])

Аугментація даних

Ілюстрації аугментації.
transforms.TrivialAugmentWide() - випадкове аугментування зображень.
transforms.Compose() - об’єднує кілька перетворень.

Спробуємо аугментацію

num_magnitude_bins - кількість рівнів інтенсивності аугментації

from torchvision import transforms

train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.TrivialAugmentWide(num_magnitude_bins=31),
    transforms.ToTensor()
])

test_transforms = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor()
])

Зазвичай аугментація не застосовується до тестових даних.

Перевірка аугментації

image_path_list = list(image_path.glob("*/*/*.jpg"))

plot_transformed_images(
    image_paths=image_path_list,
    transform=train_transforms,
    n=3,
    seed=None
)

Модель 0: TinyVGG без аугментації

Завантажимо дані, спочатку перетворивши кожну з наших папок для навчання та тестування на набір даних за допомогою torchvision.datasets.ImageFolder()
Потім перетворимо їх на DataLoader за допомогою torch.utils.data.DataLoader().
Ми встановимо batch_size=32 та num_workers на кількість процесорів на нашій машині.

simple_transform = transforms.Compose([ 
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

from torchvision import datasets
train_data_simple = datasets.ImageFolder(root=train_dir, transform=simple_transform)
test_data_simple = datasets.ImageFolder(root=test_dir, transform=simple_transform)

import os
from torch.utils.data import DataLoader

BATCH_SIZE = 32
NUM_WORKERS = os.cpu_count()
print(f"Creating DataLoader's with batch size {BATCH_SIZE} and {NUM_WORKERS} workers.")

train_dataloader_simple = DataLoader(train_data_simple, 
                                     batch_size=BATCH_SIZE, 
                                     shuffle=True, 
                                     num_workers=NUM_WORKERS)

test_dataloader_simple = DataLoader(test_data_simple, 
                                    batch_size=BATCH_SIZE, 
                                    shuffle=False, 
                                    num_workers=NUM_WORKERS)

train_dataloader_simple, test_dataloader_simple

Creating DataLoader's with batch size 32 and 20 workers.

(<torch.utils.data.dataloader.DataLoader at 0x1c8e7ed9c90>,
 <torch.utils.data.dataloader.DataLoader at 0x1c8e7eef310>)

Модель 0: клас TinyVGG

class TinyVGG(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, 
                      out_channels=hidden_units, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, 
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2)
        )
        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*16*16,
                      out_features=output_shape)
        )
    
    def forward(self, x: torch.Tensor):
        x = self.conv_block_1(x)
        x = self.conv_block_2(x)
        x = self.classifier(x)
        return x

torch.manual_seed(73)
model_0 = TinyVGG(input_shape=3,
                  hidden_units=10, 
                  output_shape=len(train_data.classes)).to(device)
model_0

TinyVGG(
  (conv_block_1): Sequential(
    (0): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=2560, out_features=3, bias=True)
  )
)

Модель 0: forward pass на одному зображенні

Отримайте пакет зображень і міток з DataLoader.
Отримайте одне зображення з пакета і розгорніть його за допомогою unsqueeze(), щоб розмір пакета становив 1 (тобто його форма відповідала моделі).
Виконайте інференцію на одному зображенні (обов’язково надішліть зображення на цільовий пристрій).
Виведіть на екран інформацію про те, що відбувається, і перетворіть необроблені вихідні логіти моделі на ймовірності прогнозування за допомогою torch.softmax() (оскільки ми працюємо з багатокласовими даними), а потім перетворіть ймовірності прогнозування на мітки прогнозування за допомогою torch.argmax().

img_batch, label_batch = next(iter(train_dataloader_simple))

img_single, label_single = img_batch[0].unsqueeze(dim=0), label_batch[0]
print(f"Single image shape: {img_single.shape}\n")

model_0.eval()
with torch.inference_mode():
    pred = model_0(img_single.to(device))
    
print(f"Output logits:\n{pred}\n")
print(f"Output prediction probabilities:\n{torch.softmax(pred, dim=1)}\n")
print(f"Output prediction label:\n{torch.argmax(torch.softmax(pred, dim=1), dim=1)}\n")
print(f"Actual label:\n{label_single}")

Single image shape: torch.Size([1, 3, 64, 64])

Output logits:
tensor([[-0.0240, -0.0225,  0.0036]], device='cuda:0')

Output prediction probabilities:
tensor([[0.3301, 0.3306, 0.3393]], device='cuda:0')

Output prediction label:
tensor([2], device='cuda:0')

Actual label:
1

Модель 0: `torchinfo`

torchinfo.summary() - виводить інформацію про модель PyTorch

try: 
    import torchinfo
except:
    !pip install torchinfo
    import torchinfo
    
from torchinfo import summary
summary(model_0, input_size=[1, 3, 64, 64])

==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
TinyVGG                                  [1, 3]                    --
├─Sequential: 1-1                        [1, 10, 32, 32]           --
│    └─Conv2d: 2-1                       [1, 10, 64, 64]           280
│    └─ReLU: 2-2                         [1, 10, 64, 64]           --
│    └─Conv2d: 2-3                       [1, 10, 64, 64]           910
│    └─ReLU: 2-4                         [1, 10, 64, 64]           --
│    └─MaxPool2d: 2-5                    [1, 10, 32, 32]           --
├─Sequential: 1-2                        [1, 10, 16, 16]           --
│    └─Conv2d: 2-6                       [1, 10, 32, 32]           910
│    └─ReLU: 2-7                         [1, 10, 32, 32]           --
│    └─Conv2d: 2-8                       [1, 10, 32, 32]           910
│    └─ReLU: 2-9                         [1, 10, 32, 32]           --
│    └─MaxPool2d: 2-10                   [1, 10, 16, 16]           --
├─Sequential: 1-3                        [1, 3]                    --
│    └─Flatten: 2-11                     [1, 2560]                 --
│    └─Linear: 2-12                      [1, 3]                    7,683
==========================================================================================
Total params: 10,693
Trainable params: 10,693
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 6.75
==========================================================================================
Input size (MB): 0.05
Forward/backward pass size (MB): 0.82
Params size (MB): 0.04
Estimated Total Size (MB): 0.91
==========================================================================================

Модель 0: `train_step()`

train_step() - виконує один крок навчання
test_step() - виконує один крок тестування
train() - виконує цикл навчання

def train_step(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer):
    # Put model in train mode
    model.train()
    
    # Setup train loss and train accuracy values
    train_loss, train_acc = 0, 0
    
    # Loop through data loader data batches
    for batch, (X, y) in enumerate(dataloader):
        # Send data to target device
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate  and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item() 

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Calculate and accumulate accuracy metrics across all batches
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    # Adjust metrics to get average loss and accuracy per batch 
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_loss, train_acc

Модель 0: `test_step()`

def test_step(model: torch.nn.Module, 
              dataloader: torch.utils.data.DataLoader, 
              loss_fn: torch.nn.Module):
    # Put model in eval mode
    model.eval() 
    
    # Setup test loss and test accuracy values
    test_loss, test_acc = 0, 0
    
    # Turn on inference context manager
    with torch.inference_mode():
        # Loop through DataLoader batches
        for batch, (X, y) in enumerate(dataloader):
            # Send data to target device
            X, y = X.to(device), y.to(device)
    
            # 1. Forward pass
            test_pred_logits = model(X)

            # 2. Calculate and accumulate loss
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()
            
            # Calculate and accumulate accuracy
            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))
            
    # Adjust metrics to get average loss and accuracy per batch 
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

Модель 0: об’єднуємо в `train()`

from tqdm.auto import tqdm

# 1. Take in various parameters required for training and test steps
def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module = nn.CrossEntropyLoss(),
          epochs: int = 5):
    
    # 2. Create empty results dictionary
    results = {"train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }
    
    # 3. Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer)
        test_loss, test_acc = test_step(model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn)
        
        # 4. Print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # 5. Update results dictionary
        # Ensure all data is moved to CPU and converted to float for storage
        results["train_loss"].append(train_loss.item() if isinstance(train_loss, torch.Tensor) else train_loss)
        results["train_acc"].append(train_acc.item() if isinstance(train_acc, torch.Tensor) else train_acc)
        results["test_loss"].append(test_loss.item() if isinstance(test_loss, torch.Tensor) else test_loss)
        results["test_acc"].append(test_acc.item() if isinstance(test_acc, torch.Tensor) else test_acc)

    # 6. Return the filled results at the end of the epochs
    return results

Модель 0: тренуємо!

# Set random seeds
torch.manual_seed(73) 
torch.cuda.manual_seed(73)

# Set number of epochs
NUM_EPOCHS = 5

# Recreate an instance of TinyVGG
model_0 = TinyVGG(input_shape=3, # number of color channels (3 for RGB) 
                  hidden_units=10, 
                  output_shape=len(train_data.classes)).to(device)

# Setup loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_0.parameters(), lr=0.001)

# Start the timer
from timeit import default_timer as timer 
start_time = timer()

# Train model_0 
model_0_results = train(model=model_0, 
                        train_dataloader=train_dataloader_simple,
                        test_dataloader=test_dataloader_simple,
                        optimizer=optimizer,
                        loss_fn=loss_fn, 
                        epochs=NUM_EPOCHS)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

Epoch: 1 | train_loss: 1.0950 | train_acc: 0.4219 | test_loss: 1.1165 | test_acc: 0.2604
Epoch: 2 | train_loss: 1.0841 | train_acc: 0.4258 | test_loss: 1.1255 | test_acc: 0.2604
Epoch: 3 | train_loss: 1.0664 | train_acc: 0.4258 | test_loss: 1.1440 | test_acc: 0.2604
Epoch: 4 | train_loss: 1.1020 | train_acc: 0.3086 | test_loss: 1.1328 | test_acc: 0.3229
Epoch: 5 | train_loss: 0.9879 | train_acc: 0.5820 | test_loss: 1.0888 | test_acc: 0.3011
Total training time: 33.485 seconds

Модель 0: графіки результатів

from typing import Tuple, Dict, List

def plot_loss_curves(results: Dict[str, List[float]]):
    
    # Get the loss values of the results dictionary (training and test)
    loss = results['train_loss']
    test_loss = results['test_loss']

    # Get the accuracy values of the results dictionary (training and test)
    accuracy = results['train_acc']
    test_accuracy = results['test_acc']

    # Figure out how many epochs there were
    epochs = range(len(results['train_loss']))

    # Setup a plot 
    plt.figure(figsize=(15, 7))

    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, loss, label='train_loss')
    plt.plot(epochs, test_loss, label='test_loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()

    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, accuracy, label='train_accuracy')
    plt.plot(epochs, test_accuracy, label='test_accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend();

plot_loss_curves(model_0_results)

Що робити при перенавчанні?

Збільшити кількість даних
Регуляризація
Спростити модель
Аугментація даних
Transfer learning
Dropout-шари
Зменшення кроку навчання
Рання зупинка

Що робити при недонавчанні?

Збільшити складність моделі
Тренувати довше
Зменшити регуляризацію
Використовувати інші архітектури
Transfer learning

Модель 1: TinyVGG з аугментацією

train_transform_trivial_augment = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.TrivialAugmentWide(num_magnitude_bins=31),
    transforms.ToTensor() 
])

test_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

Модель 1: DataLoader з аугментацією

train_data_augmented = datasets.ImageFolder(train_dir, transform=train_transform_trivial_augment)
test_data_simple = datasets.ImageFolder(test_dir, transform=test_transform)

train_data_augmented, test_data_simple

(Dataset ImageFolder
     Number of datapoints: 225
     Root location: data\pizza_steak_sushi\train
     StandardTransform
 Transform: Compose(
                Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=warn)
                TrivialAugmentWide(num_magnitude_bins=31, interpolation=InterpolationMode.NEAREST, fill=None)
                ToTensor()
            ),
 Dataset ImageFolder
     Number of datapoints: 75
     Root location: data\pizza_steak_sushi\test
     StandardTransform
 Transform: Compose(
                Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=warn)
                ToTensor()
            ))

# Turn Datasets into DataLoader's
import os
BATCH_SIZE = 32
NUM_WORKERS = os.cpu_count()

torch.manual_seed(42)
train_dataloader_augmented = DataLoader(train_data_augmented, 
                                        batch_size=BATCH_SIZE, 
                                        shuffle=True,
                                        num_workers=NUM_WORKERS)

test_dataloader_simple = DataLoader(test_data_simple, 
                                    batch_size=BATCH_SIZE, 
                                    shuffle=False, 
                                    num_workers=NUM_WORKERS)

train_dataloader_augmented, test_dataloader

(<torch.utils.data.dataloader.DataLoader at 0x1c88ef4b550>,
 <torch.utils.data.dataloader.DataLoader at 0x1c8e6a4c310>)

Модель 1: перегляд

torch.manual_seed(73)
model_1 = TinyVGG(
    input_shape=3,
    hidden_units=10,
    output_shape=len(train_data_augmented.classes)).to(device)
model_1

TinyVGG(
  (conv_block_1): Sequential(
    (0): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2): Sequential(
    (0): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=2560, out_features=3, bias=True)
  )
)

Модель 1: тренуємо!

# Set random seeds
torch.manual_seed(73) 
torch.cuda.manual_seed(73)

# Set number of epochs
NUM_EPOCHS = 5

# Setup loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_1.parameters(), lr=0.001)

# Start the timer
from timeit import default_timer as timer 
start_time = timer()

# Train model_1
model_1_results = train(model=model_1, 
                        train_dataloader=train_dataloader_augmented,
                        test_dataloader=test_dataloader_simple,
                        optimizer=optimizer,
                        loss_fn=loss_fn, 
                        epochs=NUM_EPOCHS)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

Epoch: 1 | train_loss: 1.1112 | train_acc: 0.2734 | test_loss: 1.1036 | test_acc: 0.2604
Epoch: 2 | train_loss: 1.0971 | train_acc: 0.3086 | test_loss: 1.1181 | test_acc: 0.1979
Epoch: 3 | train_loss: 1.0869 | train_acc: 0.4297 | test_loss: 1.0841 | test_acc: 0.2708
Epoch: 4 | train_loss: 1.0905 | train_acc: 0.3164 | test_loss: 1.0843 | test_acc: 0.2604
Epoch: 5 | train_loss: 1.0765 | train_acc: 0.4531 | test_loss: 1.0589 | test_acc: 0.4527
Total training time: 33.521 seconds

Модель 1: результати

plot_loss_curves(model_1_results)

Порівняння моделей 1/2

import pandas as pd
model_0_df = pd.DataFrame(model_0_results)
model_1_df = pd.DataFrame(model_1_results)
model_0_df

	train_loss	train_acc	test_loss	test_acc
0	1.094972	0.421875	1.116524	0.260417
1	1.084137	0.425781	1.125486	0.260417
2	1.066426	0.425781	1.143981	0.260417
3	1.102041	0.308594	1.132759	0.322917
4	0.987885	0.582031	1.088801	0.301136

Порівняння моделей 2/2

Код

# Setup a plot 
plt.figure(figsize=(15, 10))

# Get number of epochs
epochs = range(len(model_0_df))

# Plot train loss
plt.subplot(2, 2, 1)
plt.plot(epochs, model_0_df["train_loss"], label="Model 0")
plt.plot(epochs, model_1_df["train_loss"], label="Model 1")
plt.title("Train Loss")
plt.xlabel("Epochs")
plt.legend()

# Plot test loss
plt.subplot(2, 2, 2)
plt.plot(epochs, model_0_df["test_loss"], label="Model 0")
plt.plot(epochs, model_1_df["test_loss"], label="Model 1")
plt.title("Test Loss")
plt.xlabel("Epochs")
plt.legend()

# Plot train accuracy
plt.subplot(2, 2, 3)
plt.plot(epochs, model_0_df["train_acc"], label="Model 0")
plt.plot(epochs, model_1_df["train_acc"], label="Model 1")
plt.title("Train Accuracy")
plt.xlabel("Epochs")
plt.legend()

# Plot test accuracy
plt.subplot(2, 2, 4)
plt.plot(epochs, model_0_df["test_acc"], label="Model 0")
plt.plot(epochs, model_1_df["test_acc"], label="Model 1")
plt.title("Test Accuracy")
plt.xlabel("Epochs")
plt.legend();

Завантажимо власне фото

# Download custom image
import requests

data_path = Path("data/")
custom_image_path = data_path / "yar-pizza.jpeg"

# Download the image if it doesn't already exist
if not custom_image_path.is_file():
    with open(custom_image_path, "wb") as f:
        # When downloading from GitHub, need to use the "raw" file link
        request = requests.get("https://raw.githubusercontent.com/Aranaur/aranaur.rbind.io/refs/heads/main/lectures/cv/slides/2025/img/yar-pizza.jpg")
        print(f"Downloading {custom_image_path}...")
        f.write(request.content)
else:
    print(f"{custom_image_path} already exists, skipping download.")

data\yar-pizza.jpeg already exists, skipping download.

Передаємо власне фото в модель

import torchvision

# Read in custom image
custom_image_uint8 = torchvision.io.read_image(str(custom_image_path))

# Print out image data
print(f"Custom image tensor:\n{custom_image_uint8}\n")
print(f"Custom image shape: {custom_image_uint8.shape}\n")
print(f"Custom image dtype: {custom_image_uint8.dtype}")

Custom image tensor:
tensor([[[125, 134, 154,  ...,  87,  94,  71],
         [122, 133, 153,  ...,  86,  94,  71],
         [119, 130, 148,  ...,  86,  94,  70],
         ...,
         [ 52,  51,  49,  ..., 132, 127,  97],
         [ 53,  52,  50,  ..., 132, 127,  97],
         [ 55,  53,  52,  ..., 132, 127,  97]],

        [[158, 170, 190,  ...,  86,  93,  70],
         [160, 170, 190,  ...,  85,  93,  70],
         [161, 172, 192,  ...,  85,  93,  69],
         ...,
         [ 32,  31,  29,  ..., 147, 142, 112],
         [ 33,  32,  30,  ..., 147, 142, 112],
         [ 35,  33,  32,  ..., 147, 142, 112]],

        [[173, 184, 206,  ...,  65,  72,  49],
         [173, 186, 206,  ...,  64,  72,  49],
         [175, 186, 205,  ...,  64,  72,  48],
         ...,
         [ 25,  24,  22,  ..., 168, 163, 133],
         [ 26,  25,  23,  ..., 168, 163, 133],
         [ 28,  26,  25,  ..., 168, 163, 133]]], dtype=torch.uint8)

Custom image shape: torch.Size([3, 1280, 964])

Custom image dtype: torch.uint8

Конвертуємо 1/2

model_1.eval()
with torch.inference_mode():
    model_1(custom_image_uint8.to(device))

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[38], line 3
      1 model_1.eval()
      2 with torch.inference_mode():
----> 3     model_1(custom_image_uint8.to(device))

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[19], line 34, in TinyVGG.forward(self, x)
     33 def forward(self, x: torch.Tensor):
---> 34     x = self.conv_block_1(x)
     35     x = self.conv_block_2(x)
     36     x = self.classifier(x)

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\container.py:217, in Sequential.forward(self, input)
    215 def forward(self, input):
    216     for module in self:
--> 217         input = module(input)
    218     return input

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\conv.py:463, in Conv2d.forward(self, input)
    462 def forward(self, input: Tensor) -> Tensor:
--> 463     return self._conv_forward(input, self.weight, self.bias)

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\conv.py:459, in Conv2d._conv_forward(self, input, weight, bias)
    455 if self.padding_mode != 'zeros':
    456     return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
    457                     weight, bias, self.stride,
    458                     _pair(0), self.dilation, self.groups)
--> 459 return F.conv2d(input, weight, bias, self.stride,
    460                 self.padding, self.dilation, self.groups)

RuntimeError: Input type (unsigned char) and bias type (float) should be the same

Конвертуємо 2/2

custom_image = torchvision.io.read_image(str(custom_image_path)).type(torch.float32)

custom_image = custom_image / 255. 

print(f"Custom image tensor:\n{custom_image}\n")
print(f"Custom image shape: {custom_image.shape}\n")
print(f"Custom image dtype: {custom_image.dtype}")

Custom image tensor:
tensor([[[0.4902, 0.5255, 0.6039,  ..., 0.3412, 0.3686, 0.2784],
         [0.4784, 0.5216, 0.6000,  ..., 0.3373, 0.3686, 0.2784],
         [0.4667, 0.5098, 0.5804,  ..., 0.3373, 0.3686, 0.2745],
         ...,
         [0.2039, 0.2000, 0.1922,  ..., 0.5176, 0.4980, 0.3804],
         [0.2078, 0.2039, 0.1961,  ..., 0.5176, 0.4980, 0.3804],
         [0.2157, 0.2078, 0.2039,  ..., 0.5176, 0.4980, 0.3804]],

        [[0.6196, 0.6667, 0.7451,  ..., 0.3373, 0.3647, 0.2745],
         [0.6275, 0.6667, 0.7451,  ..., 0.3333, 0.3647, 0.2745],
         [0.6314, 0.6745, 0.7529,  ..., 0.3333, 0.3647, 0.2706],
         ...,
         [0.1255, 0.1216, 0.1137,  ..., 0.5765, 0.5569, 0.4392],
         [0.1294, 0.1255, 0.1176,  ..., 0.5765, 0.5569, 0.4392],
         [0.1373, 0.1294, 0.1255,  ..., 0.5765, 0.5569, 0.4392]],

        [[0.6784, 0.7216, 0.8078,  ..., 0.2549, 0.2824, 0.1922],
         [0.6784, 0.7294, 0.8078,  ..., 0.2510, 0.2824, 0.1922],
         [0.6863, 0.7294, 0.8039,  ..., 0.2510, 0.2824, 0.1882],
         ...,
         [0.0980, 0.0941, 0.0863,  ..., 0.6588, 0.6392, 0.5216],
         [0.1020, 0.0980, 0.0902,  ..., 0.6588, 0.6392, 0.5216],
         [0.1098, 0.1020, 0.0980,  ..., 0.6588, 0.6392, 0.5216]]])

Custom image shape: torch.Size([3, 1280, 964])

Custom image dtype: torch.float32

Перегляд зображення

plt.imshow(custom_image.permute(1, 2, 0))
plt.title(f"Image shape: {custom_image.shape}")
plt.axis(False);

Змінюємо розмір

custom_image_transform = transforms.Compose([
    transforms.Resize((64, 64)),
])

custom_image_transformed = custom_image_transform(custom_image)

print(f"Original shape: {custom_image.shape}")
print(f"New shape: {custom_image_transformed.shape}")

Original shape: torch.Size([3, 1280, 964])
New shape: torch.Size([3, 64, 64])

Спробуємо передати в модель

model_1.eval()
with torch.inference_mode():
    custom_image_pred = model_1(custom_image_transformed)

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[42], line 3
      1 model_1.eval()
      2 with torch.inference_mode():
----> 3     custom_image_pred = model_1(custom_image_transformed)

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[19], line 34, in TinyVGG.forward(self, x)
     33 def forward(self, x: torch.Tensor):
---> 34     x = self.conv_block_1(x)
     35     x = self.conv_block_2(x)
     36     x = self.classifier(x)

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\container.py:217, in Sequential.forward(self, input)
    215 def forward(self, input):
    216     for module in self:
--> 217         input = module(input)
    218     return input

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\conv.py:463, in Conv2d.forward(self, input)
    462 def forward(self, input: Tensor) -> Tensor:
--> 463     return self._conv_forward(input, self.weight, self.bias)

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\conv.py:459, in Conv2d._conv_forward(self, input, weight, bias)
    455 if self.padding_mode != 'zeros':
    456     return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
    457                     weight, bias, self.stride,
    458                     _pair(0), self.dilation, self.groups)
--> 459 return F.conv2d(input, weight, bias, self.stride,
    460                 self.padding, self.dilation, self.groups)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument weight in method wrapper_CUDA___slow_conv2d_forward)

Виправляємо: передаємо на обраний пристрій

model_1.eval()
with torch.inference_mode():
    custom_image_pred = model_1(custom_image_transformed.to(device))

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[43], line 3
      1 model_1.eval()
      2 with torch.inference_mode():
----> 3     custom_image_pred = model_1(custom_image_transformed.to(device))

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[19], line 36, in TinyVGG.forward(self, x)
     34 x = self.conv_block_1(x)
     35 x = self.conv_block_2(x)
---> 36 x = self.classifier(x)
     37 return x

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\container.py:217, in Sequential.forward(self, input)
    215 def forward(self, input):
    216     for module in self:
--> 217         input = module(input)
    218     return input

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File C:\Python\Python311\Lib\site-packages\torch\nn\modules\linear.py:114, in Linear.forward(self, input)
    113 def forward(self, input: Tensor) -> Tensor:
--> 114     return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x256 and 2560x3)

Виправляємо: додаємо batch dimension

model_1.eval()
with torch.inference_mode():
    custom_image_transformed_with_batch_size = custom_image_transformed.unsqueeze(dim=0)
    
    print(f"Custom image transformed shape: {custom_image_transformed.shape}")
    print(f"Unsqueezed custom image shape: {custom_image_transformed_with_batch_size.shape}")
    
    custom_image_pred = model_1(custom_image_transformed.unsqueeze(dim=0).to(device))

Custom image transformed shape: torch.Size([3, 64, 64])
Unsqueezed custom image shape: torch.Size([1, 3, 64, 64])

Примітка

Неправильні типи даних — наша модель очікує torch.float32, тоді як наше оригінальне зображення було uint8.
Неправильний пристрій — наша модель була на цільовому пристрої (у нашому випадку — GPU), тоді як наші цільові дані ще не були переміщені на цільовий пристрій.
Неправильні форми — наша модель очікувала вхідне зображення форми [N, C, H, W] або [batch_size, color_channels, height, width], тоді як наш власний тензор зображення мав форму [color_channels, height, width].

Прогноз

# Print out prediction logits
print(f"Prediction logits: {custom_image_pred}")

# Convert logits -> prediction probabilities (using torch.softmax() for multi-class classification)
custom_image_pred_probs = torch.softmax(custom_image_pred, dim=1)
print(f"Prediction probabilities: {custom_image_pred_probs}")

# Convert prediction probabilities -> prediction labels
custom_image_pred_label = torch.argmax(custom_image_pred_probs, dim=1)
print(f"Prediction label: {custom_image_pred_label}")

Prediction logits: tensor([[ 0.0263, -0.1027, -0.0962]], device='cuda:0')
Prediction probabilities: tensor([[0.3618, 0.3180, 0.3201]], device='cuda:0')
Prediction label: tensor([0], device='cuda:0')

Виводимо мітку класу

custom_image_pred_class = class_names[custom_image_pred_label.cpu()]
custom_image_pred_class

'pizza'

Збираємо все в купу

def pred_and_plot_image(model: torch.nn.Module, 
                        image_path: str, 
                        class_names: List[str] = None, 
                        transform=None,
                        device: torch.device = device):
    
    # 1. Load in image and convert the tensor values to float32
    target_image = torchvision.io.read_image(str(image_path)).type(torch.float32)
    
    # 2. Divide the image pixel values by 255 to get them between [0, 1]
    target_image = target_image / 255. 
    
    # 3. Transform if necessary
    if transform:
        target_image = transform(target_image)
    
    # 4. Make sure the model is on the target device
    model.to(device)
    
    # 5. Turn on model evaluation mode and inference mode
    model.eval()
    with torch.inference_mode():
        # Add an extra dimension to the image
        target_image = target_image.unsqueeze(dim=0)
    
        # Make a prediction on image with an extra dimension and send it to the target device
        target_image_pred = model(target_image.to(device))
        
    # 6. Convert logits -> prediction probabilities (using torch.softmax() for multi-class classification)
    target_image_pred_probs = torch.softmax(target_image_pred, dim=1)

    # 7. Convert prediction probabilities -> prediction labels
    target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)
    
    # 8. Plot the image alongside the prediction and prediction probability
    plt.imshow(target_image.squeeze().permute(1, 2, 0)) # make sure it's the right size for matplotlib
    if class_names:
        title = f"Pred: {class_names[target_image_pred_label.cpu()]} | Prob: {target_image_pred_probs.max().cpu():.3f}"
    else: 
        title = f"Pred: {target_image_pred_label} | Prob: {target_image_pred_probs.max().cpu():.3f}"
    plt.title(title)
    plt.axis(False);

Використовуємо функцію

pred_and_plot_image(model=model_1,
                    image_path=custom_image_path,
                    class_names=class_names,
                    transform=custom_image_transform,
                    device=device)

Дякую за увагу!

Матеріали курсу

ihor.miroshnychenko@knu.ua

Власні набори даних 🔥PyTorch

Збір даних

Завантаження даних з google drive

Структура даних

Перегляд зображень 1/2

Перегляд зображень 2/2

Підготовка даних

Перетворення зображень в тензори

Функція перетворення

Завантаження рисунків з ImageFolder

Перевірка 1/2

Перевірка 2/2

Перегляд тензорів зображень

Передаємо дані в DataLoader

Перевірка форми даних

Аугментація даних

Спробуємо аугментацію

Перевірка аугментації

Модель 0: TinyVGG без аугментації

Модель 0: клас TinyVGG

Модель 0: forward pass на одному зображенні

Модель 0: torchinfo

Модель 0: train_step()

Модель 0: test_step()

Модель 0: об’єднуємо в train()

Модель 0: тренуємо!

Модель 0: графіки результатів

Що робити при перенавчанні?

Що робити при недонавчанні?

Модель 1: TinyVGG з аугментацією

Модель 1: DataLoader з аугментацією

Модель 1: перегляд

Модель 1: тренуємо!

Модель 1: результати

Порівняння моделей 1/2

Порівняння моделей 2/2

Завантажимо власне фото

Передаємо власне фото в модель

Конвертуємо 1/2

Конвертуємо 2/2

Перегляд зображення

Змінюємо розмір

Спробуємо передати в модель

Виправляємо: передаємо на обраний пристрій

Виправляємо: додаємо batch dimension

Прогноз

Виводимо мітку класу

Збираємо все в купу

Використовуємо функцію

Дякую за увагу!

Завантаження рисунків з `ImageFolder`

Модель 0: `torchinfo`

Модель 0: `train_step()`

Модель 0: `test_step()`

Модель 0: об’єднуємо в `train()`