Thiết kế website giá rẻ

Question

I am new with working with PyTorch and wanted to make a simple autoencoder with 255×255 RGB images to play around with it, however the output shape isn’t the same as the input shape.

Here’s the model

<code>class AutoEncoder(nn.Module):

def __init__(self) -> None:

super().__init__()

self.encoder = nn.Sequential(

nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),

nn.ReLU(),

nn.MaxPool2d(kernel_size=2),

nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, padding=1),

nn.ReLU(),

nn.MaxPool2d(kernel_size=2)

)

self.decoder = nn.Sequential(

nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),

nn.ReLU(),

nn.ConvTranspose2d(in_channels=128, out_channels=32, kernel_size=3, output_padding=1),

nn.ReLU(),

nn.ConvTranspose2d(in_channels=32, out_channels=3, kernel_size=3, output_padding=1),

nn.Sigmoid()

)

def forward(self, x):

x = self.encoder(x)

x = self.decoder(x)

return x

</code>

<code>class AutoEncoder(nn.Module): def __init__(self) -> None: super().__init__() self.encoder = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2), nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2) ) self.decoder = nn.Sequential( nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.ReLU(), nn.ConvTranspose2d(in_channels=128, out_channels=32, kernel_size=3, output_padding=1), nn.ReLU(), nn.ConvTranspose2d(in_channels=32, out_channels=3, kernel_size=3, output_padding=1), nn.Sigmoid() ) def forward(self, x): x = self.encoder(x) x = self.decoder(x) return x </code>

class AutoEncoder(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.decoder = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(in_channels=128, out_channels=32, kernel_size=3, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(in_channels=32, out_channels=3, kernel_size=3, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

And here are the shapes given by the torchsummary package

<code>----------------------------------------------------------------

Layer (type) Output Shape Param #

================================================================

Conv2d-1 [-1, 32, 255, 255] 896

ReLU-2 [-1, 32, 255, 255] 0

MaxPool2d-3 [-1, 32, 127, 127] 0

Conv2d-4 [-1, 128, 127, 127] 36,992

ReLU-5 [-1, 128, 127, 127] 0

MaxPool2d-6 [-1, 128, 63, 63] 0

Conv2d-7 [-1, 128, 63, 63] 147,584

ReLU-8 [-1, 128, 63, 63] 0

ConvTranspose2d-9 [-1, 32, 66, 66] 36,896

ReLU-10 [-1, 32, 66, 66] 0

ConvTranspose2d-11 [-1, 3, 69, 69] 867

Sigmoid-12 [-1, 3, 69, 69] 0

</code>

<code>---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 32, 255, 255] 896 ReLU-2 [-1, 32, 255, 255] 0 MaxPool2d-3 [-1, 32, 127, 127] 0 Conv2d-4 [-1, 128, 127, 127] 36,992 ReLU-5 [-1, 128, 127, 127] 0 MaxPool2d-6 [-1, 128, 63, 63] 0 Conv2d-7 [-1, 128, 63, 63] 147,584 ReLU-8 [-1, 128, 63, 63] 0 ConvTranspose2d-9 [-1, 32, 66, 66] 36,896 ReLU-10 [-1, 32, 66, 66] 0 ConvTranspose2d-11 [-1, 3, 69, 69] 867 Sigmoid-12 [-1, 3, 69, 69] 0 </code>

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 32, 255, 255]             896
              ReLU-2         [-1, 32, 255, 255]               0
         MaxPool2d-3         [-1, 32, 127, 127]               0
            Conv2d-4        [-1, 128, 127, 127]          36,992
              ReLU-5        [-1, 128, 127, 127]               0
         MaxPool2d-6          [-1, 128, 63, 63]               0
            Conv2d-7          [-1, 128, 63, 63]         147,584
              ReLU-8          [-1, 128, 63, 63]               0
   ConvTranspose2d-9           [-1, 32, 66, 66]          36,896
             ReLU-10           [-1, 32, 66, 66]               0
  ConvTranspose2d-11            [-1, 3, 69, 69]             867
          Sigmoid-12            [-1, 3, 69, 69]               0

I have seen from another post that the output_padding option in the decoder part would help with the output shape but it hasn’t worked for me.

I don’t know what the problem might be, coming from Tensorflow I would’ve used an Upscale layer but from what I’ve seen this isn’t the way to do it in PyTorch.

Could anyone explain to me why my shapes are broken with my current model? Thanks

Thiết kế website giá rẻ

Danh mục

PyTorch convolutional autoencoder, output dimensions different from input