I've ported my Java app from the V2Beta version of the API to V2, and my results coming back seem to be less "accurate" than with the V2Beta version.
Names, addresses, zip codes, age, etc don't get de-identified at all. The results I'm seeing with the V2 API are very different from what I was getting with the V2Beta API. Maybe I'm doing something wrong? Given the input "Hello Mr. John S. Smith! This is Mr. Jones writing back with my SSN: 911-87-9111"
, the only thing that gets de-identified is the SSN digits. I would have expected the names to be de-identified as well.
I'm using Spring to inject stuff like the credentials, etc and there are some Lombok annotations to simplify my life, but the bulk of the code should be pretty straightforward:
import com.google.api.gax.core.CredentialsProvider;
import com.google.cloud.ProjectName;
import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.cloud.dlp.v2.DlpServiceSettings;
import com.google.privacy.dlp.v2.CharacterMaskConfig;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
import lombok.AccessLevel;
import lombok.Setter;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.springframework.util.CollectionUtils.isEmpty;
@Service("DeIdentifyTest")
@FieldDefaults(level = AccessLevel.PRIVATE)
@Setter
@Slf4j
public class DeIdentifyTest {
final DlpServiceSettings settings;
final String projectId;
@SneakyThrows
public DeIdentifyTest(CredentialsProvider credentialsProvider, String projectId) {
this.settings = DlpServiceSettings.newBuilder().setCredentialsProvider(credentialsProvider).build();
this.projectId = projectId;
}
public CompletableFuture<Collection<String>> redact(final Collection<String> input,
final String mask) {
return CompletableFuture.supplyAsync(() -> redactContent(input, mask));
}
@SneakyThrows
private Collection<String> redactContent(Collection<String> input, String mask) {
log.debug("Input: {}", input);
if (isEmpty(input)) {
return input;
}
CharacterMaskConfig characterMaskConfig =
CharacterMaskConfig.newBuilder().setMaskingCharacter(mask).build();
PrimitiveTransformation primitiveTransformation =
PrimitiveTransformation.newBuilder().setCharacterMaskConfig(characterMaskConfig).build();
InfoTypeTransformation infoTypeTransformationObject =
InfoTypeTransformation.newBuilder().setPrimitiveTransformation(primitiveTransformation).build();
InfoTypeTransformations infoTypeTransformationArray =
InfoTypeTransformations.newBuilder().addTransformations(infoTypeTransformationObject).build();
DeidentifyConfig deidentifyConfig =
DeidentifyConfig.newBuilder().setInfoTypeTransformations(infoTypeTransformationArray).build();
try (DlpServiceClient dlpClient = DlpServiceClient.create(settings)) {
// Create the deidentification request object
DeidentifyContentRequest request =
DeidentifyContentRequest.newBuilder()
.setParent(ProjectName.of(projectId).toString())
.setDeidentifyConfig(deidentifyConfig)
.setItem(createContentItemWithTable(input))
.build();
// Execute the deidentification request
DeidentifyContentResponse response = dlpClient.deidentifyContent(request);
Table table = response.getItem().getTable();
return Stream.of(table.getRowsList())
.flatMap(rows -> rows.stream())
.flatMap(row -> row.getValuesList().stream())
.map(val -> val.getStringValue())
.collect(Collectors.toCollection(LinkedList::new));
}
}
private ContentItem createContentItemWithTable(Collection<String> input) {
Table.Builder tableBuilder = Table.newBuilder().addHeaders(FieldId.newBuilder().setName("unused").build());
Value.Builder valueBuilder = Value.newBuilder();
Optional<Table.Builder> tableOpt = input.stream()
.filter(item -> isNotBlank(item))
.map(item -> valueBuilder.setStringValue(item).build())
.map(value -> Row.newBuilder().addValues(value).build())
.map(row -> tableBuilder.addRows(row))
.reduce((t1, t2) -> t1);
return ContentItem.newBuilder().setTable(tableOpt.get().build()).build();
}
}