I’ve just get acquainted with GAN model. I decided to build a DCGAN model base on this link, the different is that they use keras and i use pytorch.
The problem is my D loss and my G loss seems not reasonable because G loss is very high (around 8) and D loss is very tiny (around 0.01). I have tried various way but it not help much. About the result, it seems very good at the beginning, but gradually become stuck later, until high epoch the model collapse. This is my loss graph:
enter image description here
Note : my dataset is 64×64 coloured image.
Here is some part of my code:
My Discriminator
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.main = nn.Sequential(
# input: 3 x 64 x 64
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.01, inplace=True),
# output: 64 x 32 x 32
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.01, inplace=True),
# output: 128 x 16 x 16
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.01, inplace=True),
# output: 256 x 8 x 8
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.01, inplace=True),
# output: 512 x 4 x 4
nn.Flatten(),
#output : 512*4*4
nn.Linear(in_features=512*4*4,out_features=256),
nn.LeakyReLU(0.01, inplace=True),
nn.Linear(in_features=256,out_features=1),
nn.Sigmoid()
# output: 1 x 1 x 1
)
def forward(self, x):
return self.main(x)
My Generator:
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.main = nn.Sequential(
nn.Linear(in_features=250, out_features=8 * 8 * 512),
nn.ReLU(),
nn.Unflatten(1, (512, 8, 8)),
# input: 512 x 8 x 8
nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
# output: 256 x 16 x 16
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
# output: 128 x 32 x 32
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
# output: 64 x 64 x 64
nn.Conv2d(in_channels=64, out_channels=3, kernel_size=4, padding='same', bias=False),
nn.Sigmoid()
)
def forward(self, x):
return self.main(x)
criterion and train loop
criterion = nn.BCELoss()
Discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr = 0.0001)
Generator_optimizer = torch.optim.Adam(generator.parameters(), lr = 0.0002)
g_history = []
d_history = []
for epoch in range(num_epochs + 1):
batch_count = int(data.shape[0] / batch_size)
print("Epoch",epoch)
total_D_loss = 0
total_G_loss = 0
for i in tqdm(range(batch_count)):
data_train = data[np.random.randint(low = 0, high = data.shape[0],size= batch_size )]
data_train = torch.tensor(data_train).float().to(device)
# train D on real data
discriminator.zero_grad()
real_label = torch.ones(batch_size, 1).to(device)
r_output = discriminator(data_train)
D_loss_real = criterion(r_output, real_label)
#train D on fake data
noise = torch.randn(batch_size, 250).to(device)
fake_label = torch.zeros(batch_size,1).to(device)
f_output1 = generator(noise)
f_output2 = discriminator(f_output1)
D_loss_fake = criterion(f_output2, fake_label)
D_loss = (D_loss_real + D_loss_fake)/2
D_loss.backward()
Discriminator_optimizer.step()
total_D_loss += D_loss.item()
#train G on data
generator.zero_grad()
noise = torch.randn(batch_size, 250).to(device)
G_output1 = generator(noise)
G_output2 = discriminator(G_output1)
G_loss = criterion(G_output2, real_label)
total_G_loss += G_loss.item()
G_loss.backward()
Generator_optimizer.step()
avg_D_loss = total_D_loss / batch_count
avg_G_loss = total_G_loss / batch_count
g_history.append(avg_G_loss)
d_history.append(avg_D_loss)
print(f"D_loss: {avg_D_loss:.4f}, G_loss: {avg_G_loss:.4f}")
I appreciate all of your guidance. Thank you very much!!
I have tried to change the architecture of G ( because i have to upgrade G) and rewrite the train loop.
I am looking for my fault in develope this model
Hưng Nguyễn is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.