Skip to content

Commit dd5de07

Browse files
mwdaubchingor13
authored andcommitted
samples: Add DLP code samples for custom info types (#1121)
* Add custom info types to code samples Include samples of using custom dictionaries and custom regexes. * Add missing imports * Fix build errors * Add tests for custom info types * Fix bad regexes. * Fix bad regexes, part 2. * Update README.md Add custom info type flags and fix existing examples so they work with the V2 API. * Fix import order * Fix line length violations * Fix line length formatting violations * Fix broken character mask test DLP now requires the client to specify info types to search for when using DeID with wildcard info types. * Add SSN info type to tests * Add info types to DeID with FPE test
1 parent 680f2a4 commit dd5de07

File tree

4 files changed

+153
-6
lines changed

4 files changed

+153
-6
lines changed

dlp/snippets/snippets/src/main/java/com/example/dlp/DeIdentification.java

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import java.time.format.DateTimeParseException;
5858
import java.util.ArrayList;
5959
import java.util.Arrays;
60+
import java.util.Collections;
6061
import java.util.List;
6162
import java.util.stream.Collectors;
6263
import org.apache.commons.cli.CommandLine;
@@ -81,7 +82,11 @@ public class DeIdentification {
8182
* @param projectId ID of Google Cloud project to run the API under.
8283
*/
8384
private static void deIdentifyWithMask(
84-
String string, Character maskingCharacter, int numberToMask, String projectId) {
85+
String string,
86+
List<InfoType> infoTypes,
87+
Character maskingCharacter,
88+
int numberToMask,
89+
String projectId) {
8590

8691
// instantiate a client
8792
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
@@ -108,6 +113,11 @@ private static void deIdentifyWithMask(
108113
.addTransformations(infoTypeTransformationObject)
109114
.build();
110115

116+
InspectConfig inspectConfig =
117+
InspectConfig.newBuilder()
118+
.addAllInfoTypes(infoTypes)
119+
.build();
120+
111121
DeidentifyConfig deidentifyConfig =
112122
DeidentifyConfig.newBuilder()
113123
.setInfoTypeTransformations(infoTypeTransformationArray)
@@ -117,6 +127,7 @@ private static void deIdentifyWithMask(
117127
DeidentifyContentRequest request =
118128
DeidentifyContentRequest.newBuilder()
119129
.setParent(ProjectName.of(projectId).toString())
130+
.setInspectConfig(inspectConfig)
120131
.setDeidentifyConfig(deidentifyConfig)
121132
.setItem(contentItem)
122133
.build();
@@ -147,6 +158,7 @@ private static void deIdentifyWithMask(
147158
*/
148159
private static void deIdentifyWithFpe(
149160
String string,
161+
List<InfoType> infoTypes,
150162
FfxCommonNativeAlphabet alphabet,
151163
String keyName,
152164
String wrappedKey,
@@ -188,6 +200,11 @@ private static void deIdentifyWithFpe(
188200
.addTransformations(infoTypeTransformationObject)
189201
.build();
190202

203+
InspectConfig inspectConfig =
204+
InspectConfig.newBuilder()
205+
.addAllInfoTypes(infoTypes)
206+
.build();
207+
191208
// Create the deidentification request object
192209
DeidentifyConfig deidentifyConfig =
193210
DeidentifyConfig.newBuilder()
@@ -197,6 +214,7 @@ private static void deIdentifyWithFpe(
197214
DeidentifyContentRequest request =
198215
DeidentifyContentRequest.newBuilder()
199216
.setParent(ProjectName.of(projectId).toString())
217+
.setInspectConfig(inspectConfig)
200218
.setDeidentifyConfig(deidentifyConfig)
201219
.setItem(contentItem)
202220
.build();
@@ -513,6 +531,10 @@ public static void main(String[] args) throws Exception {
513531
Options commandLineOptions = new Options();
514532
commandLineOptions.addOptionGroup(optionsGroup);
515533

534+
Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build();
535+
infoTypesOption.setArgs(Option.UNLIMITED_VALUES);
536+
commandLineOptions.addOption(infoTypesOption);
537+
516538
Option maskingCharacterOption =
517539
Option.builder("maskingCharacter").hasArg(true).required(false).build();
518540
commandLineOptions.addOption(maskingCharacterOption);
@@ -575,12 +597,21 @@ public static void main(String[] args) throws Exception {
575597
String projectId =
576598
cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId());
577599

600+
List<InfoType> infoTypesList = Collections.emptyList();
601+
if (cmd.hasOption(infoTypesOption.getOpt())) {
602+
infoTypesList = new ArrayList<>();
603+
String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt());
604+
for (String infoType : infoTypes) {
605+
infoTypesList.add(InfoType.newBuilder().setName(infoType).build());
606+
}
607+
}
608+
578609
if (cmd.hasOption("m")) {
579610
// deidentification with character masking
580611
int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0"));
581612
char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0);
582613
String val = cmd.getOptionValue(deidentifyMaskingOption.getOpt());
583-
deIdentifyWithMask(val, maskingCharacter, numberToMask, projectId);
614+
deIdentifyWithMask(val, infoTypesList, maskingCharacter, numberToMask, projectId);
584615
} else if (cmd.hasOption("f")) {
585616
// deidentification with FPE
586617
String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt());
@@ -591,7 +622,8 @@ public static void main(String[] args) throws Exception {
591622
FfxCommonNativeAlphabet.valueOf(
592623
cmd.getOptionValue(
593624
alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name()));
594-
deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId, surrogateType);
625+
deIdentifyWithFpe(
626+
val, infoTypesList, alphabet, keyName, wrappedKey, projectId, surrogateType);
595627
} else if (cmd.hasOption("d")) {
596628
//deidentify with date shift
597629
String inputCsv = cmd.getOptionValue(inputCsvPathOption.getOpt());

dlp/snippets/snippets/src/main/java/com/example/dlp/Inspect.java

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
import com.google.privacy.dlp.v2.CloudStorageOptions;
2828
import com.google.privacy.dlp.v2.ContentItem;
2929
import com.google.privacy.dlp.v2.CreateDlpJobRequest;
30+
import com.google.privacy.dlp.v2.CustomInfoType;
31+
import com.google.privacy.dlp.v2.CustomInfoType.Dictionary;
32+
import com.google.privacy.dlp.v2.CustomInfoType.Dictionary.WordList;
33+
import com.google.privacy.dlp.v2.CustomInfoType.Regex;
3034
import com.google.privacy.dlp.v2.DatastoreOptions;
3135
import com.google.privacy.dlp.v2.DlpJob;
3236
import com.google.privacy.dlp.v2.Finding;
@@ -52,6 +56,7 @@
5256
import java.nio.file.Files;
5357
import java.nio.file.Paths;
5458
import java.util.ArrayList;
59+
import java.util.Arrays;
5560
import java.util.Collections;
5661
import java.util.List;
5762
import java.util.concurrent.TimeUnit;
@@ -82,6 +87,7 @@ private static void inspectString(
8287
Likelihood minLikelihood,
8388
int maxFindings,
8489
List<InfoType> infoTypes,
90+
List<CustomInfoType> customInfoTypes,
8591
boolean includeQuote,
8692
String projectId) {
8793
// instantiate a client
@@ -91,6 +97,7 @@ private static void inspectString(
9197
InspectConfig inspectConfig =
9298
InspectConfig.newBuilder()
9399
.addAllInfoTypes(infoTypes)
100+
.addAllCustomInfoTypes(customInfoTypes)
94101
.setMinLikelihood(minLikelihood)
95102
.setLimits(findingLimits)
96103
.setIncludeQuote(includeQuote)
@@ -146,6 +153,7 @@ private static void inspectFile(
146153
Likelihood minLikelihood,
147154
int maxFindings,
148155
List<InfoType> infoTypes,
156+
List<CustomInfoType> customInfoTypes,
149157
boolean includeQuote,
150158
String projectId) {
151159
// Instantiates a client
@@ -189,6 +197,7 @@ private static void inspectFile(
189197
InspectConfig inspectConfig =
190198
InspectConfig.newBuilder()
191199
.addAllInfoTypes(infoTypes)
200+
.addAllCustomInfoTypes(customInfoTypes)
192201
.setMinLikelihood(minLikelihood)
193202
.setLimits(findingLimits)
194203
.setIncludeQuote(includeQuote)
@@ -242,6 +251,7 @@ private static void inspectGcsFile(
242251
String fileName,
243252
Likelihood minLikelihood,
244253
List<InfoType> infoTypes,
254+
List<CustomInfoType> customInfoTypes,
245255
int maxFindings,
246256
String topicId,
247257
String subscriptionId,
@@ -266,6 +276,7 @@ private static void inspectGcsFile(
266276
InspectConfig inspectConfig =
267277
InspectConfig.newBuilder()
268278
.addAllInfoTypes(infoTypes)
279+
.addAllCustomInfoTypes(customInfoTypes)
269280
.setMinLikelihood(minLikelihood)
270281
.setLimits(findingLimits)
271282
.build();
@@ -363,6 +374,7 @@ private static void inspectDatastore(
363374
String kind,
364375
Likelihood minLikelihood,
365376
List<InfoType> infoTypes,
377+
List<CustomInfoType> customInfoTypes,
366378
int maxFindings,
367379
String topicId,
368380
String subscriptionId) {
@@ -388,6 +400,7 @@ private static void inspectDatastore(
388400
InspectConfig inspectConfig =
389401
InspectConfig.newBuilder()
390402
.addAllInfoTypes(infoTypes)
403+
.addAllCustomInfoTypes(customInfoTypes)
391404
.setMinLikelihood(minLikelihood)
392405
.setLimits(findingLimits)
393406
.build();
@@ -486,6 +499,7 @@ private static void inspectBigquery(
486499
String tableId,
487500
Likelihood minLikelihood,
488501
List<InfoType> infoTypes,
502+
List<CustomInfoType> customInfoTypes,
489503
int maxFindings,
490504
String topicId,
491505
String subscriptionId) {
@@ -511,6 +525,7 @@ private static void inspectBigquery(
511525
InspectConfig inspectConfig =
512526
InspectConfig.newBuilder()
513527
.addAllInfoTypes(infoTypes)
528+
.addAllCustomInfoTypes(customInfoTypes)
514529
.setMinLikelihood(minLikelihood)
515530
.setLimits(findingLimits)
516531
.build();
@@ -629,6 +644,16 @@ public static void main(String[] args) throws Exception {
629644
infoTypesOption.setArgs(Option.UNLIMITED_VALUES);
630645
commandLineOptions.addOption(infoTypesOption);
631646

647+
Option customDictionariesOption =
648+
Option.builder("customDictionaries").hasArg(true).required(false).build();
649+
customDictionariesOption.setArgs(Option.UNLIMITED_VALUES);
650+
commandLineOptions.addOption(customDictionariesOption);
651+
652+
Option customRegexesOption =
653+
Option.builder("customRegexes").hasArg(true).required(false).build();
654+
customRegexesOption.setArgs(Option.UNLIMITED_VALUES);
655+
commandLineOptions.addOption(customRegexesOption);
656+
632657
Option includeQuoteOption = Option.builder("includeQuote").hasArg(true).required(false).build();
633658
commandLineOptions.addOption(includeQuoteOption);
634659

@@ -695,13 +720,62 @@ public static void main(String[] args) throws Exception {
695720
infoTypesList.add(InfoType.newBuilder().setName(infoType).build());
696721
}
697722
}
723+
724+
List<CustomInfoType> customInfoTypesList = new ArrayList<>();
725+
if (cmd.hasOption(customDictionariesOption.getOpt())) {
726+
String[] dictionaryStrings = cmd.getOptionValues(customDictionariesOption.getOpt());
727+
for (int i = 0; i < dictionaryStrings.length; i++) {
728+
String[] dictionaryWords = dictionaryStrings[i].split(",");
729+
CustomInfoType customInfoType =
730+
CustomInfoType
731+
.newBuilder()
732+
.setInfoType(
733+
InfoType.newBuilder().setName(String.format("CUSTOM_DICTIONARY_%s", i)))
734+
.setDictionary(
735+
Dictionary
736+
.newBuilder()
737+
.setWordList(
738+
WordList
739+
.newBuilder()
740+
.addAllWords(Arrays.<String>asList(dictionaryWords))))
741+
.build();
742+
customInfoTypesList.add(customInfoType);
743+
}
744+
}
745+
if (cmd.hasOption(customRegexesOption.getOpt())) {
746+
String[] patterns = cmd.getOptionValues(customRegexesOption.getOpt());
747+
for (int i = 0; i < patterns.length; i++) {
748+
CustomInfoType customInfoType =
749+
CustomInfoType
750+
.newBuilder()
751+
.setInfoType(InfoType.newBuilder().setName(String.format("CUSTOM_REGEX_%s", i)))
752+
.setRegex(Regex.newBuilder().setPattern(patterns[i]))
753+
.build();
754+
customInfoTypesList.add(customInfoType);
755+
}
756+
}
757+
698758
// string inspection
699759
if (cmd.hasOption("s")) {
700760
String val = cmd.getOptionValue(stringOption.getOpt());
701-
inspectString(val, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId);
761+
inspectString(
762+
val,
763+
minLikelihood,
764+
maxFindings,
765+
infoTypesList,
766+
customInfoTypesList,
767+
includeQuote,
768+
projectId);
702769
} else if (cmd.hasOption("f")) {
703770
String filePath = cmd.getOptionValue(fileOption.getOpt());
704-
inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId);
771+
inspectFile(
772+
filePath,
773+
minLikelihood,
774+
maxFindings,
775+
infoTypesList,
776+
customInfoTypesList,
777+
includeQuote,
778+
projectId);
705779
// gcs file inspection
706780
} else if (cmd.hasOption("gcs")) {
707781
String bucketName = cmd.getOptionValue(bucketNameOption.getOpt());
@@ -711,6 +785,7 @@ public static void main(String[] args) throws Exception {
711785
fileName,
712786
minLikelihood,
713787
infoTypesList,
788+
customInfoTypesList,
714789
maxFindings,
715790
topicId,
716791
subscriptionId,
@@ -726,6 +801,7 @@ public static void main(String[] args) throws Exception {
726801
kind,
727802
minLikelihood,
728803
infoTypesList,
804+
customInfoTypesList,
729805
maxFindings,
730806
topicId,
731807
subscriptionId);
@@ -739,6 +815,7 @@ public static void main(String[] args) throws Exception {
739815
tableId,
740816
minLikelihood,
741817
infoTypesList,
818+
customInfoTypesList,
742819
maxFindings,
743820
topicId,
744821
subscriptionId);

dlp/snippets/snippets/src/test/java/com/example/dlp/DeIdentificationIT.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public void testDeidStringMasksCharacters() throws Exception {
6363
DeIdentification.main(
6464
new String[] {
6565
"-m", text,
66+
"-infoTypes", "US_SOCIAL_SECURITY_NUMBER",
6667
"-maskingCharacter", "x",
6768
"-numberToMask", "5"
6869
});
@@ -79,6 +80,8 @@ public void testDeidReidFpe() throws Exception {
7980
new String[] {
8081
"-f",
8182
"\"" + text + "\"",
83+
"-infoTypes",
84+
"US_SOCIAL_SECURITY_NUMBER",
8285
"-wrappedKey",
8386
wrappedKey,
8487
"-keyName",

dlp/snippets/snippets/src/test/java/com/example/dlp/InspectIT.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,52 @@ public void testStringInspectionReturnsInfoTypes() throws Exception {
6464
assertThat(output, containsString("EMAIL_ADDRESS"));
6565
}
6666

67+
@Test
68+
public void testStringInspectionReturnsCustomInfoTypes() throws Exception {
69+
String text =
70+
"\"My phone number is (234) 456-7890 and my email address is [email protected]\"";
71+
Inspect.main(
72+
new String[] {
73+
"-s",
74+
text,
75+
"-customDictionaries",
76+
77+
"-customRegexes",
78+
"\\(\\d{3}\\) \\d{3}-\\d{4}"
79+
});
80+
String output = bout.toString();
81+
82+
assertThat(output, containsString("CUSTOM_DICTIONARY_0"));
83+
assertThat(output, containsString("CUSTOM_REGEX_0"));
84+
}
85+
6786
@Test
6887
public void testTextFileInspectionReturnsInfoTypes() throws Exception {
6988
Inspect.main(
7089
new String[] {
71-
"-f", "src/test/resources/test.txt", "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS"
90+
"-f", "src/test/resources/test.txt", "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS"
7291
});
7392
String output = bout.toString();
7493
assertThat(output, containsString("PHONE_NUMBER"));
7594
assertThat(output, containsString("EMAIL_ADDRESS"));
7695
}
7796

97+
@Test
98+
public void testTextFileInspectionReturnsCustomInfoTypes() throws Exception {
99+
Inspect.main(
100+
new String[] {
101+
"-f",
102+
"src/test/resources/test.txt",
103+
"-customDictionaries",
104+
105+
"-customRegexes",
106+
"\\(\\d{3}\\) \\d{3}-\\d{4}"
107+
});
108+
String output = bout.toString();
109+
assertThat(output, containsString("CUSTOM_DICTIONARY_0"));
110+
assertThat(output, containsString("CUSTOM_REGEX_0"));
111+
}
112+
78113
@Test
79114
public void testImageFileInspectionReturnsInfoTypes() throws Exception {
80115
Inspect.main(

0 commit comments

Comments
 (0)