I am trying to extract images from a PDF using PyMuPDF and this recipe. For some images with a hard edge transparency it works. But for images with a matte transparency, I get artifacts along the edges.
When I extract the image without alpha information and the alpha mask as separate PNGs and combine them using GIMP, the result is as expected, so the information seems to be there.
Can anyone help? Am I missing a step?
import fitz
doc = fitz.open(r"Some.pdf")
for i in range(len(doc)):
for img in doc.getPageImageList(i):
xref = img[0]
smask = img[1]
pix1 = fitz.Pixmap(doc, xref)
if smask != 0:
pix2 = fitz.Pixmap(doc, smask)
pix = fitz.Pixmap(pix1)
pix.setAlpha(pix2.samples)
else:
pix = pix1
if pix.n - pix.alpha < 4: # this is GRAY or RGB
pix.writePNG("img-%s-%s.png" % (i, xref))
else: # CMYK: convert to RGB first
pix1 = fitz.Pixmap(fitz.csRGB, pix)
pix1.writePNG("img-%s-%s.png" % (i, xref))
pix1 = None
pix = None