I found a solution using pdfplumber.
Here is rough sample code.
from typing import Optional
import pdfplumber
from pdfplumber.page import Page, Table
def cmyk_to_rgb(cmyk: tuple[float, float, float, float]):
r = 255 * (1.0 - (cmyk[0] + cmyk[3]))
g = 255 * (1.0 - (cmyk[1] + cmyk[3]))
b = 255 * (1.0 - (cmyk[2] + cmyk[3]))
return r, g, b
def to_bbox(rect: dict) -> tuple[float, float, float, float]:
return (rect["x0"], rect["top"], rect["x1"], rect["bottom"])
def is_included(cell_box: tuple[float, float, float, float], rect_box: tuple[float, float, float, float]):
c_left, c_top, c_right, c_bottom = cell_box
r_left, r_top, r_right, r_bottom = rect_box
return c_left >= r_left and c_top >= r_top and c_right <= r_right and c_bottom <= r_bottom
def find_rect_for_cell(cell: tuple[float, float, float, float], rects: list[dict]) -> Optional[dict]:
return next((r for r in rects if is_included(cell, to_bbox(r))), None)
def get_cell_color(cell: tuple[float, float, float, float], page: Page) -> tuple[float, float, float]:
rect = find_rect_for_cell(cell, page.rects) if cell else None
return cmyk_to_rgb(rect["non_stroking_color"]) if rect else (255, 255, 255)
pdf = pdfplumber.open("/path/to/target.pdf")
page = pdf.pages[0]
tables: list[Table] = page.find_tables()
# get RGB color of first(= top-left) cell of first table
print(get_cell_color(tables[0].rows[0].cells[0], page)) # => (r, g, b)