I'm trying to run ControlNet for Stable Diffusion, on my Mac M1. Here's the code I'm running
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
import torch
import PIL.Image as Image
checkpoint = "lllyasviel/control_v11p_sd15_openpose"
controlnet = ControlNetModel.from_pretrained(checkpoint, torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch.float16,
controlnet=controlnet,
safety_checker=None).to("mps")
pipe.enable_attention_slicing()
prompt = "An astronaut, RAW photo, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3"
generator = torch.Generator(device="cpu").manual_seed(-1)
image = pipe(
prompt,
image=Image.open("control.png"),
num_inference_steps=10,
generator=generator
).images[0]
image.save('output.png')
Where control.png is an image of the pose that was processed with this code
from controlnet_aux import OpenposeDetector
processor = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
control_image = processor(image, hand_and_face=True)
control_image.save("control.png")
However whenever I run the code I get and error message that looks like this
loc("varianceEps"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/a0876c02-1788-11ed-b9c4-96898e02b808/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":219:0)): error: input types 'tensor<1x77x1xf16>' and 'tensor<1xf32>' are not broadcast compatible
LLVM ERROR: Failed to infer result type(s).
zsh: abort /Users/ulto4/miniforge3/envs/myenv/bin/python
I don't know how to approach this problem, and any help would be appreciated.