I am trying to get a Python script to redact a word-document based of a list of words to redact. I found a link with the code but can't seem to get it to work.
Link: https://arccoder.medium.com/redact-word-documents-using-python-7a676fd84d5e
I don't think its to hard to make it work, but due to my limited knowledge i can't figure out how/where to put my paths/outputs etc.
Can you guys help me where to fill in the needed inputs/outputs?
def redact_document(input_path: str, output_path: str, pattern: list, color: str = None):
# Get the text color and text-background color for reaction
txt_color, background_color = redact_colors(color)
# Open the input document
doc = Document(input_path)
# Loop through paragraphs
for para in doc.paragraphs:
# Loop through the runs in the paragraph in the reverse order
run_index = len(para.runs) - 1
while run_index > -1:
run = para.runs[run_index]
# Find the start and end indices of the patterns in the run-text
match_pairs = [(match.start(), match.end()) for match in re.finditer('|'.join(pattern), run.text)]
# Get the locations in the format required for `split_run_by` function
highlights, matches = process_matches(match_pairs, run.text)
# Go to redact only if patterns are found in the text
if len(highlights) > 0 and len(matches) > 0:
if len(highlights) != len(matches) - 1:
ValueError('Calculation error within matches and highlights')
else:
if len(matches) == 2: # When a pattern is the only text in the run
# Highlight the background color
run.font.highlight_color = background_color
# Match the text color to the background color
run.font.color.rgb = txt_color
else:
# Split the runs using the matches
new_runs = split_run_by(para, run, matches[1:-1])
# Highlight the run if it matches a pattern
for highlight, run in zip(highlights, new_runs):
if highlight:
# Highlight the background color
run.font.highlight_color = background_color
# Match the text color to the background color
run.font.color.rgb = txt_color
# Decrement the index to process the previous run
run_index -= 1
# Save the redacted document to the output path
doc.save(output_path)