Thiết kế website giá rẻ

Question

I am creating CNN model to recognize dogs and cats. I trained it and when I evaluate accuracy of it by hand it has 80-85% accuracy on an unseen data.

But, when I try to use library torchmetrics.accuracy to calculate my accuracy then for some reason I get wrong accuracy calculations. Let me explain:

The code of the model(I use python, torch, lightning to optimize the model and code):

import lightning as L
import torch
import torchmetrics
import torchvision
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torchvision.transforms import ToTensor

from CustomDataset import CustomDataset


class Model(L.LightningModule):
    def __init__(self, batch_size, learning_rate, num_classes):
        super(Model, self).__init__()
        self.save_hyperparameters()
        ## HERE GOES MODEL LAYERS CRITERION etc

        self.accuracy = torchmetrics.Accuracy(num_classes=2, average='macro', task='multiclass')

        self.test_transform = transforms.Compose([
            transforms.Resize((200, 200)),  # Resize images to 256x256
            transforms.ToTensor(),  # Convert images to PyTorch tensors
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
        ])
        self.transform = transforms.Compose([
            transforms.RandomResizedCrop(200),  # Randomly crops and resizes images to 224x224
            transforms.RandomHorizontalFlip(p=0.5),  # Randomly flips images horizontally
            transforms.RandomRotation(15),  # Resize images to 256x256
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            transforms.ToTensor(),  # Convert images to PyTorch tensors
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
        ])

    def forward(self, image):
        image = F.relu(self.conv1(image))
        image = self.pool(image)
        image = F.relu(self.conv2(image))
        image = self.pool(image)
        image = F.relu(self.conv3(image))
        image = self.pool(image)  # Output is now (128, 25, 25)
        image = torch.flatten(image, 1)  # Flatten the output
        image = F.relu(self.fc1(image))
        image = self.fc2(image)
        return image

    def training_step(self, batch, batch_idx):
        images, labels = batch
        predictions = self(images)  # Forward pass
        loss = self.criterion(predictions, labels)  # Compute the loss
        predicted_classes = torch.argmax(F.softmax(predictions, dim=1), dim=1)
        predictions_softmax = F.softmax(predictions, dim=1)
        acc = self.accuracy(predictions_softmax, labels)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
        return loss  # Returning the loss for backpropagation

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        predictions = self(images)
        loss = self.criterion(predictions, labels)
        predicted_classes = torch.argmax(F.softmax(predictions, dim=1), dim=1)
        predictions_softmax = F.softmax(predictions, dim=1)
        acc = self.accuracy(predictions_softmax, labels)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        images, labels = batch
        predictions = self(images)  # Forward pass
        loss = self.criterion(predictions, labels)  # Compute the loss
        predicted_classes = torch.argmax(F.softmax(predictions, dim=1), dim=1)
        predictions_softmax = F.softmax(predictions, dim=1)
        acc = self.accuracy(predictions_softmax, labels)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
        return loss  # Returning the loss for backpropagation
        # images, labels = batch
        # predictions = self(images)
        # loss = self.criterion(predictions, labels)
        # predicted_classes = torch.argmax(F.softmax(predictions, dim=1), dim=1)
        # predictions_softmax = F.softmax(predictions, dim=1)
        # acc = self.accuracy(predictions_softmax, labels)
        # real_step_acc = (labels == predicted_classes).sum() / self.batch_size
        # self.log('test_loss', loss, prog_bar=True)
        # self.log('real_test_acc', real_step_acc, prog_bar=True)
        # self.log('test_acc', acc, prog_bar=True)
        # return loss

    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate, momentum=0.9)
        return optimizer

    def train_dataloader(self):
        # Set up and return the training DataLoader
        filepath_train = "dataset/test/"

        train_dataset = datasets.ImageFolder(root=filepath_train, transform=self.transform)
        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=False, num_workers=16)

        return train_loader

    def test_dataloader(self):
        # Set up and return the training DataLoader
        filepath_train = "dataset/test/"

        test_dataset = datasets.ImageFolder(root=filepath_train, transform=self.transform)
        test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=True, num_workers=16)

        return test_loader

    def val_dataloader(self):
        # Set up and return the validation DataLoader
        filepath_train = "dataset/val/"

        val_dataset = datasets.ImageFolder(root=filepath_train, transform=self.test_transform)
        val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=16)

        return val_loader

Output is like this:
train_acc_epoch 0.7635096907615662
real_test_acc 0.7901701927185059
test_acc 0.39825108647346497

Real test accuracy I compute like this:

predictions_softmax = F.softmax(predictions, dim=1)
acc = self.accuracy(predictions_softmax, labels)
real_step_acc = (labels == predicted_classes).sum() / self.batch_size

So the problem is:
When I run the testing the test accuracy inside test_step method is 40% but the real test accuracy that I compute myself is 80-85%.
so what I tried:
When I enable shuffling on test data(I know it is bad practice but it was part of the debugging), torchmetrics.accuracy becomes correct! It outputs 80-85% accuracy.

So why the shuffling changes the thing? Help me guys please, I think that it might also be some kind of bug.

Thiết kế website giá rẻ

Danh mục

Torchmetrics Accuracy issue when dont shuffle test data. Why?