0

Nowadays, i have been studied DCGAN, LSGAN using weight initialize.

I


def weights_init(m):

  if isinstance(m, nn.Conv2d):
    nn.init.normal_(m.weight.data, 0.0, 0.02)
    print('Conv')

  elif isinstance(m, nn.ConvTranspose2d):
    nn.init.normal_(m.weight.data, 0.0, 0.02)
    print('Trans')

  elif isinstance(m, nn.BatchNorm2d):
    nn.init.normal_(m.weight.data, 1.0, 0.02)
    nn.init.constant_(m.bias, 0)
    print('Batch')


this is my initialize function

And this is my Generator.

class Generator(nn.Module):
  def __init__(self, nz, ngf, channels):
    super(Generator, self).__init__()

    self.layer1 = nn.Sequential(
        nn.ConvTranspose2d(in_channels = nz, out_channels = ngf * 4, kernel_size = 4, stride =1,
                           padding = 0, bias = False),
                           nn.BatchNorm2d(ngf*4),
                           nn.ReLU(True)
    )
    self.layer2 = nn.Sequential(
        nn.ConvTranspose2d(in_channels = ngf*4, out_channels = ngf*2, kernel_size = 4, stride = 2,
                           padding = 1, bias = False),
                           nn.BatchNorm2d(ngf*2),
                           nn.ReLU(True)
    )
    self.layer3 = nn.Sequential(
        nn.ConvTranspose2d(in_channels = ngf*2, out_channels = ngf*1, kernel_size = 4, stride = 2,
                           padding = 1, bias = False),
                           nn.BatchNorm2d(ngf*1),
                           nn.ReLU(True))
    self.layer4 = nn.Sequential(
        nn.ConvTranspose2d(in_channels = ngf*1, out_channels = channels, kernel_size = 4, stride = 2,
                           padding = 1, bias = False),
                           nn.Tanh()
    )
generator = Generator(100, 64, 1).to(device)
generator.apply(weights_init).state_dict()
OrderedDict([('layer1.0.weight',
              tensor([[[[ 2.8698e-03,  5.4211e-03, -1.2506e-02, -9.0855e-03],
                        [-1.3270e-02,  6.4097e-03, -3.0736e-03,  7.3850e-03],
                        [-8.1306e-03, -1.4132e-05, -1.0484e-02, -1.5072e-02],
                        [-1.0174e-02, -4.2638e-03, -8.8196e-03,  4.4663e-03]],
              
                       [[ 1.8402e-03, -1.3164e-02, -1.4002e-02, -1.2906e-02],
                        [ 1.2970e-02,  1.0097e-02,  1.1278e-02,  1.3000e-02],
                        [-1.0825e-02,  1.3762e-03,  5.6415e-03,  9.2425e-03],
                        [-9.3556e-03,  4.9029e-03, -3.5206e-03,  1.0317e-02]],

result is that not applied....

How can i apply this initialize..?

I try

def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
        print('layer success')
    elif classname.find("BatchNorm") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)
        print('batch success')

this function but not applied too..

I notice that some layer is applied weights initialize, but it is randomly... i try to solve this problem but didn't

please help me

  • You initialized the weight by a normal distribution with zero mean and 0.02 std, the tensor `layer1.0.weight` has a maximum value is 0.013 and a minimum is -0.0151. I don't see any problem here – CuCaRot Nov 03 '22 at 01:41

1 Answers1

0

After a simple check, I found no evidence indicating that the weights and bias were not initialized as intended by your initialization function.

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator = Generator(100, 64, 1).to(device)
before = deepcopy(generator.state_dict())
generator.apply(weights_init)
after = generator.state_dict()

for k, v in before.items():
    if not k.endswith(("weight", "bias")):
        continue
    if k.endswith("weight"):
        print(torch.std(after[k]).item())
        print(k, torch.allclose(v, after[k]))
    if k.endswith("bias"):
        print(torch.all(before[k]==0.).item(), torch.all(after[k]==0.).item())
TQCH
  • 1,162
  • 1
  • 6
  • 13