Skip to content

Commit cef37d5

Browse files
author
Ace Nassri
committed
Add ReID FPE sample
1 parent 9352db0 commit cef37d5

File tree

2 files changed

+154
-24
lines changed

2 files changed

+154
-24
lines changed

dlp/src/main/java/com/example/dlp/DeIdentification.java

Lines changed: 135 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,29 @@
1919
import com.google.cloud.ServiceOptions;
2020
import com.google.cloud.dlp.v2.DlpServiceClient;
2121
import com.google.common.io.BaseEncoding;
22-
import com.google.privacy.dlp.v2.ByteContentItem;
2322
import com.google.privacy.dlp.v2.CharacterMaskConfig;
2423
import com.google.privacy.dlp.v2.ContentItem;
2524
import com.google.privacy.dlp.v2.CryptoKey;
2625
import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig;
2726
import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet;
27+
import com.google.privacy.dlp.v2.CustomInfoType;
28+
import com.google.privacy.dlp.v2.CustomInfoType.SurrogateType;
2829
import com.google.privacy.dlp.v2.DateShiftConfig;
2930
import com.google.privacy.dlp.v2.DeidentifyConfig;
3031
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
3132
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
3233
import com.google.privacy.dlp.v2.FieldId;
3334
import com.google.privacy.dlp.v2.FieldTransformation;
35+
import com.google.privacy.dlp.v2.InfoType;
3436
import com.google.privacy.dlp.v2.InfoTypeTransformations;
3537
import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation;
38+
import com.google.privacy.dlp.v2.InspectConfig;
3639
import com.google.privacy.dlp.v2.KmsWrappedCryptoKey;
3740
import com.google.privacy.dlp.v2.PrimitiveTransformation;
3841
import com.google.privacy.dlp.v2.ProjectName;
3942
import com.google.privacy.dlp.v2.RecordTransformations;
43+
import com.google.privacy.dlp.v2.ReidentifyContentRequest;
44+
import com.google.privacy.dlp.v2.ReidentifyContentResponse;
4045
import com.google.privacy.dlp.v2.Table;
4146
import com.google.privacy.dlp.v2.Value;
4247
import com.google.protobuf.ByteString;
@@ -46,7 +51,6 @@
4651
import java.io.File;
4752
import java.io.FileReader;
4853
import java.io.FileWriter;
49-
import java.nio.charset.StandardCharsets;
5054
import java.nio.file.Path;
5155
import java.nio.file.Paths;
5256
import java.time.LocalDate;
@@ -82,14 +86,11 @@ private static void deIdentifyWithMask(
8286
// instantiate a client
8387
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
8488

85-
ByteContentItem byteContentItem =
86-
ByteContentItem.newBuilder()
87-
.setType(ByteContentItem.BytesType.TEXT_UTF8)
88-
.setData(ByteString.copyFrom(string, StandardCharsets.UTF_8))
89+
ContentItem contentItem =
90+
ContentItem.newBuilder()
91+
.setValue(string)
8992
.build();
9093

91-
ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build();
92-
9394
CharacterMaskConfig characterMaskConfig =
9495
CharacterMaskConfig.newBuilder()
9596
.setMaskingCharacter(maskingCharacter.toString())
@@ -130,7 +131,7 @@ private static void deIdentifyWithMask(
130131

131132
// Print the character-masked input value
132133
// e.g. "My SSN is 123456789" --> "My SSN is *********"
133-
String result = response.getItem().getByteItem().getData().toStringUtf8();
134+
String result = response.getItem().getValue();
134135
System.out.println(result);
135136
} catch (Exception e) {
136137
System.out.println("Error in deidentifyWithMask: " + e.getMessage());
@@ -154,16 +155,11 @@ private static void deIdentifyWithFpe(
154155
FfxCommonNativeAlphabet alphabet,
155156
String keyName,
156157
String wrappedKey,
157-
String projectId) {
158+
String projectId,
159+
String surrogateType) {
158160
// instantiate a client
159161
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
160-
161-
ByteContentItem byteContentItem =
162-
ByteContentItem.newBuilder()
163-
.setData(ByteString.copyFrom(string, StandardCharsets.UTF_8))
164-
.build();
165-
166-
ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build();
162+
ContentItem contentItem = ContentItem.newBuilder().setValue(string).build();
167163

168164
// Create the format-preserving encryption (FPE) configuration
169165
KmsWrappedCryptoKey kmsWrappedCryptoKey =
@@ -178,6 +174,7 @@ private static void deIdentifyWithFpe(
178174
CryptoReplaceFfxFpeConfig.newBuilder()
179175
.setCryptoKey(cryptoKey)
180176
.setCommonAlphabet(alphabet)
177+
.setSurrogateInfoType(InfoType.newBuilder().setName(surrogateType).build())
181178
.build();
182179

183180
// Create the deidentification transformation configuration
@@ -214,14 +211,113 @@ private static void deIdentifyWithFpe(
214211

215212
// Print the deidentified input value
216213
// e.g. "My SSN is 123456789" --> "My SSN is 7261298621"
217-
String result = response.getItem().getByteItem().getData().toStringUtf8();
214+
String result = response.getItem().getValue();
218215
System.out.println(result);
219216
} catch (Exception e) {
220217
System.out.println("Error in deidentifyWithFpe: " + e.getMessage());
221218
}
222219
}
223220
// [END dlp_deidentify_fpe]
224221

222+
// [START dlp_reidentify_fpe]
223+
/**
224+
* Reidentify a string by encrypting sensitive information while preserving format.
225+
*
226+
* @param string The string to reidentify.
227+
* @param alphabet The set of characters used when encrypting the input. For more information,
228+
* see cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify
229+
* @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key.
230+
* @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key.
231+
* @param projectId ID of Google Cloud project to run the API under.
232+
* @param surrogateType The name of the surrogate custom info type to used
233+
* during the encryption process.
234+
*/
235+
private static void reIdentifyWithFpe(
236+
String string,
237+
FfxCommonNativeAlphabet alphabet,
238+
String keyName,
239+
String wrappedKey,
240+
String projectId,
241+
String surrogateType) {
242+
// instantiate a client
243+
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
244+
ContentItem contentItem = ContentItem.newBuilder().setValue(string).build();
245+
246+
247+
InfoType surrogateTypeObject = InfoType.newBuilder()
248+
.setName(surrogateType)
249+
.build();
250+
251+
// Create the format-preserving encryption (FPE) configuration
252+
KmsWrappedCryptoKey kmsWrappedCryptoKey =
253+
KmsWrappedCryptoKey.newBuilder()
254+
.setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey)))
255+
.setCryptoKeyName(keyName)
256+
.build();
257+
258+
CryptoKey cryptoKey = CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build();
259+
260+
CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig =
261+
CryptoReplaceFfxFpeConfig.newBuilder()
262+
.setCryptoKey(cryptoKey)
263+
.setCommonAlphabet(alphabet)
264+
.setSurrogateInfoType(surrogateTypeObject)
265+
.build();
266+
267+
// Create the deidentification transformation configuration
268+
PrimitiveTransformation primitiveTransformation =
269+
PrimitiveTransformation.newBuilder()
270+
.setCryptoReplaceFfxFpeConfig(cryptoReplaceFfxFpeConfig)
271+
.build();
272+
273+
InfoTypeTransformation infoTypeTransformationObject =
274+
InfoTypeTransformation.newBuilder()
275+
.setPrimitiveTransformation(primitiveTransformation)
276+
.addInfoTypes(surrogateTypeObject)
277+
.build();
278+
279+
InfoTypeTransformations infoTypeTransformationArray =
280+
InfoTypeTransformations.newBuilder()
281+
.addTransformations(infoTypeTransformationObject)
282+
.build();
283+
284+
// Create the inspection config
285+
CustomInfoType customInfoType = CustomInfoType.newBuilder()
286+
.setInfoType(surrogateTypeObject)
287+
.setSurrogateType(SurrogateType.newBuilder().build())
288+
.build();
289+
290+
InspectConfig inspectConfig =
291+
InspectConfig.newBuilder()
292+
.addCustomInfoTypes(customInfoType).build();
293+
294+
// Create the reidentification request object
295+
DeidentifyConfig reidentifyConfig =
296+
DeidentifyConfig.newBuilder()
297+
.setInfoTypeTransformations(infoTypeTransformationArray)
298+
.build();
299+
300+
ReidentifyContentRequest request =
301+
ReidentifyContentRequest.newBuilder()
302+
.setParent(ProjectName.of(projectId).toString())
303+
.setReidentifyConfig(reidentifyConfig)
304+
.setInspectConfig(inspectConfig)
305+
.setItem(contentItem)
306+
.build();
307+
308+
// Execute the deidentification request
309+
ReidentifyContentResponse response = dlpServiceClient.reidentifyContent(request);
310+
311+
// Print the reidentified input value
312+
// e.g. "My SSN is 7261298621" --> "My SSN is 123456789"
313+
String result = response.getItem().getValue();
314+
System.out.println(result);
315+
} catch (Exception e) {
316+
System.out.println("Error in reidentifyWithFpe: " + e.getMessage());
317+
}
318+
}
319+
// [END dlp_reidentify_fpe]
320+
225321
// [START dlp_deidentify_date_shift]
226322
/**
227323
*
@@ -413,6 +509,10 @@ public static void main(String[] args) throws Exception {
413509
new Option("f", "fpe", true, "Deidentify with format-preserving encryption.");
414510
optionsGroup.addOption(deidentifyFpeOption);
415511

512+
Option reidentifyFpeOption =
513+
new Option("r", "reid", true, "Reidentify with format-preserving encryption.");
514+
optionsGroup.addOption(reidentifyFpeOption);
515+
416516
Option deidentifyDateShiftOption =
417517
new Option("d", "date", false, "Deidentify dates in a CSV file.");
418518
optionsGroup.addOption(deidentifyDateShiftOption);
@@ -424,6 +524,10 @@ public static void main(String[] args) throws Exception {
424524
Option.builder("maskingCharacter").hasArg(true).required(false).build();
425525
commandLineOptions.addOption(maskingCharacterOption);
426526

527+
Option surrogateTypeOption =
528+
Option.builder("surrogateType").hasArg(true).required(false).build();
529+
commandLineOptions.addOption(surrogateTypeOption);
530+
427531
Option numberToMaskOption = Option.builder("numberToMask").hasArg(true).required(false).build();
428532
commandLineOptions.addOption(numberToMaskOption);
429533

@@ -489,11 +593,12 @@ public static void main(String[] args) throws Exception {
489593
String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt());
490594
String keyName = cmd.getOptionValue(keyNameOption.getOpt());
491595
String val = cmd.getOptionValue(deidentifyFpeOption.getOpt());
596+
String surrogateType = cmd.getOptionValue(surrogateTypeOption.getOpt());
492597
FfxCommonNativeAlphabet alphabet =
493598
FfxCommonNativeAlphabet.valueOf(
494599
cmd.getOptionValue(
495600
alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name()));
496-
deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId);
601+
deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId, surrogateType);
497602
} else if (cmd.hasOption("d")) {
498603
//deidentify with date shift
499604
String inputCsv = cmd.getOptionValue(inputCsvPathOption.getOpt());
@@ -518,6 +623,17 @@ public static void main(String[] args) throws Exception {
518623
wrappedKey,
519624
keyName,
520625
projectId);
626+
} else if (cmd.hasOption("r")) {
627+
// reidentification with FPE
628+
String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt());
629+
String keyName = cmd.getOptionValue(keyNameOption.getOpt());
630+
String val = cmd.getOptionValue(reidentifyFpeOption.getOpt());
631+
String surrogateType = cmd.getOptionValue(surrogateTypeOption.getOpt());
632+
FfxCommonNativeAlphabet alphabet =
633+
FfxCommonNativeAlphabet.valueOf(
634+
cmd.getOptionValue(
635+
alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name()));
636+
reIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId, surrogateType);
521637
}
522638
}
523639
}

dlp/src/test/java/com/example/dlp/DeIdentificationIT.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,19 +76,33 @@ public void testDeidStringMasksCharacters() throws Exception {
7676
public void testDeidReidFpe() throws Exception {
7777

7878
// Test DeID
79-
String text = "\"My SSN is 372819127\"";
79+
String text = "My SSN is 372819127";
8080
DeIdentification.main(
8181
new String[] {
82-
"-f", text,
82+
"-f", "\"" + text + "\"",
8383
"-wrappedKey", wrappedKey,
8484
"-keyName", keyName,
8585
"-commonAlphabet", "NUMERIC",
86+
"-surrogateType", "SSN_TOKEN"
8687
});
87-
String output = bout.toString();
88+
String deidOutput = bout.toString();
8889
assertFalse(
8990
"Response contains original SSN.",
90-
output.contains("372819127"));
91-
assertTrue(output.matches("My SSN is \\d+\n"));
91+
deidOutput.contains("372819127"));
92+
assertTrue(deidOutput.matches("My SSN is SSN_TOKEN\\(9\\):\\d+\n"));
93+
94+
// Test ReID
95+
bout.flush();
96+
DeIdentification.main(
97+
new String[] {
98+
"-r", deidOutput.toString().trim(),
99+
"-wrappedKey", wrappedKey,
100+
"-keyName", keyName,
101+
"-commonAlphabet", "NUMERIC",
102+
"-surrogateType", "SSN_TOKEN"
103+
});
104+
String reidOutput = bout.toString();
105+
assertThat(reidOutput, containsString(text));
92106
}
93107

94108
@Test

0 commit comments

Comments
 (0)