I'm trying to use stable diffusion's sampler:
covered_images=diffusion_model_model.get_first_stage_encoding(diffusion_model_model.encode_first_stage(covered_images))
print(covered_images.shape)
destenation_images=diffusion_model_model.encode_first_stage(destenation_images)
The size of covered images (in the print; in the latent space) is:
torch.Size([1, 4, 64, 64])
The size of the original picture is:
torch.Size([1, 3, 512, 512])
The error I get is:
Traceback (most recent call last):
File "/home/user/stable-diffusion/VAE/train.py", line 71, in <module>
latenet_covered_images=sampler.sample(S=50,batch_size=BATCH_SIZE,shape=(4,64,64),conditioning=covered_images)
File "/home/user/.local/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/user/stable-diffusion/ldm/models/diffusion/ddim.py", line 96, in sample
samples, intermediates = self.ddim_sampling(conditioning, size,
File "/home/user/.local/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/user/stable-diffusion/ldm/models/diffusion/ddim.py", line 149, in ddim_sampling
outs = self.p_sample_ddim(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps,
File "/home/user/.local/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/user/stable-diffusion/ldm/models/diffusion/ddim.py", line 172, in p_sample_ddim
e_t = self.model.apply_model(x, t, c)
File "/home/user/stable-diffusion/ldm/models/diffusion/ddpm.py", line 987, in apply_model
x_recon = self.model(x_noisy, t, **cond)
File "/home/user/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/stable-diffusion/ldm/models/diffusion/ddpm.py", line 1410, in forward
out = self.diffusion_model(x, t, context=cc)
File "/home/user/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/stable-diffusion/ldm/modules/diffusionmodules/openaimodel.py", line 732, in forward
h = module(h, emb, context)
File "/home/user/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/stable-diffusion/ldm/modules/diffusionmodules/openaimodel.py", line 85, in forward
x = layer(x, context)
File "/home/user/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/stable-diffusion/ldm/modules/attention.py", line 258, in forward
x = block(x, context=context)
File "/home/user/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/stable-diffusion/ldm/modules/attention.py", line 209, in forward
return checkpoint(self._forward, (x, context), self.parameters(), self.checkpoint)
File "/home/user/stable-diffusion/ldm/modules/diffusionmodules/util.py", line 114, in checkpoint
return CheckpointFunction.apply(func, len(inputs), *args)
File "/home/user/stable-diffusion/ldm/modules/diffusionmodules/util.py", line 127, in forward
output_tensors = ctx.run_function(*ctx.input_tensors)
File "/home/user/stable-diffusion/ldm/modules/attention.py", line 213, in _forward
x = self.attn2(self.norm2(x), context=context) + x
File "/home/user/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/stable-diffusion/ldm/modules/attention.py", line 175, in forward
k = self.to_k(context)
File "/home/user/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/.local/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x64 and 768x320)
I tried to play with shape of the sampler, it hasn't changed much.