I am trying to use Google DLP to Mask PII information using below code.
from typing import List
import google.cloud.dlp_v2
def deidentify_with_mask(
project: str,
input_str: str,
info_types: List[str],
masking_character: str = None,
number_to_mask: int = 0,
charactersToIgnore : list[str]=None
) -> None:
# Instantiate a client
dlp = google.cloud.dlp_v2.DlpServiceClient()
# Convert the project id into a full resource id.
parent = f"projects/{project}"
# Construct inspect configuration dictionary
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
# Construct deidentify configuration dictionary
deidentify_config = {
"info_type_transformations": {
"transformations": [
{
"primitive_transformation": {
"character_mask_config": {
"masking_character": masking_character,
"number_to_mask": number_to_mask,
}
}
}
]
}
}
# Construct item
item = {"value": input_str}
# Call the API
response = dlp.deidentify_content(
request={
"parent": parent,
"deidentify_config": deidentify_config,
"inspect_config": inspect_config,
"item": item,
}
)
# Print out the results.
print(response.item.value)
# Replace with your project ID
project_id = "sandydev"
# Specify the input string, info types, masking character, and number to mask
input_string = "My name is Alicia Abernathy, and my email address is aabernathy@example.com."
info_types = ["EMAIL_ADDRESS"]
masking_character = "*"
number_to_mask = 25
# Call the deidentify_with_mask function
deidentify_with_mask(
project=project_id,
input_str=input_string,
info_types=info_types,
masking_character=masking_character,
number_to_mask=number_to_mask,
charactersToIgnore=["@"]
)
the required output is :
My name is Alicia Abernathy, and my email address is ##########@#######.###.
but is coming as :
My name is Alicia Abernathy, and my email address is **********************
I want to ignore characters like '@' but is masking that also. Need some input to resolve this.