Skip to content

Commit 4417dd1

Browse files
shubha-rajanchingor13
authored andcommitted
samples: Cleanup DLP Risk Analysis snippets (#2069)
* refactored Risk analysis samples * updated inspect tests and readme * moved parameters to inline, cleaned up files, reformatted for consistency * replaced iterators with intstream in RiskAnalysisKMap * moved message receiver lambda to its own method * ran code formatter * refactored long messageReciever lambda * rewrote lambda for pubsub message receiver * replaced int stream with for loop
1 parent f293245 commit 4417dd1

File tree

8 files changed

+977
-961
lines changed

8 files changed

+977
-961
lines changed

dlp/snippets/snippets/src/main/java/com/example/dlp/RiskAnalysis.java

Lines changed: 0 additions & 859 deletions
This file was deleted.
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package dlp.snippets;
18+
// [START dlp_categorical_stats]
19+
20+
import com.google.api.core.SettableApiFuture;
21+
import com.google.cloud.dlp.v2.DlpServiceClient;
22+
import com.google.cloud.pubsub.v1.AckReplyConsumer;
23+
import com.google.cloud.pubsub.v1.MessageReceiver;
24+
import com.google.cloud.pubsub.v1.Subscriber;
25+
import com.google.privacy.dlp.v2.Action;
26+
import com.google.privacy.dlp.v2.Action.PublishToPubSub;
27+
import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult;
28+
import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket;
29+
import com.google.privacy.dlp.v2.BigQueryTable;
30+
import com.google.privacy.dlp.v2.CreateDlpJobRequest;
31+
import com.google.privacy.dlp.v2.DlpJob;
32+
import com.google.privacy.dlp.v2.FieldId;
33+
import com.google.privacy.dlp.v2.GetDlpJobRequest;
34+
import com.google.privacy.dlp.v2.PrivacyMetric;
35+
import com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig;
36+
import com.google.privacy.dlp.v2.ProjectName;
37+
import com.google.privacy.dlp.v2.RiskAnalysisJobConfig;
38+
import com.google.privacy.dlp.v2.ValueFrequency;
39+
import com.google.pubsub.v1.ProjectSubscriptionName;
40+
import com.google.pubsub.v1.ProjectTopicName;
41+
import com.google.pubsub.v1.PubsubMessage;
42+
import java.util.List;
43+
import java.util.concurrent.TimeUnit;
44+
import java.util.concurrent.TimeoutException;
45+
46+
class RiskAnalysisCategoricalStats {
47+
48+
public static void categoricalStatsAnalysis() throws Exception {
49+
// TODO(developer): Replace these variables before running the sample.
50+
String projectId = "your-project-id";
51+
String datasetId = "your-bigquery-dataset-id";
52+
String tableId = "your-bigquery-table-id";
53+
String topicId = "pub-sub-topic";
54+
String subscriptionId = "pub-sub-subscription";
55+
categoricalStatsAnalysis(projectId, datasetId, tableId, topicId, subscriptionId);
56+
}
57+
58+
public static void categoricalStatsAnalysis(
59+
String projectId, String datasetId, String tableId, String topicId, String subscriptionId)
60+
throws Exception {
61+
// Initialize client that will be used to send requests. This client only needs to be created
62+
// once, and can be reused for multiple requests. After completing all of your requests, call
63+
// the "close" method on the client to safely clean up any remaining background resources.
64+
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
65+
// Specify the BigQuery table to analyze
66+
BigQueryTable bigQueryTable =
67+
BigQueryTable.newBuilder()
68+
.setProjectId(projectId)
69+
.setDatasetId(datasetId)
70+
.setTableId(tableId)
71+
.build();
72+
73+
// The name of the column to analyze, which doesn't need to contain numerical data
74+
String columnName = "Mystery";
75+
76+
// Configure the privacy metric for the job
77+
FieldId fieldId = FieldId.newBuilder().setName(columnName).build();
78+
CategoricalStatsConfig categoricalStatsConfig =
79+
CategoricalStatsConfig.newBuilder().setField(fieldId).build();
80+
PrivacyMetric privacyMetric =
81+
PrivacyMetric.newBuilder().setCategoricalStatsConfig(categoricalStatsConfig).build();
82+
83+
// Create action to publish job status notifications over Google Cloud Pub/Sub
84+
ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId);
85+
PublishToPubSub publishToPubSub =
86+
PublishToPubSub.newBuilder().setTopic(topicName.toString()).build();
87+
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
88+
89+
// Configure the risk analysis job to perform
90+
RiskAnalysisJobConfig riskAnalysisJobConfig =
91+
RiskAnalysisJobConfig.newBuilder()
92+
.setSourceTable(bigQueryTable)
93+
.setPrivacyMetric(privacyMetric)
94+
.addActions(action)
95+
.build();
96+
97+
// Build the job creation request to be sent by the client
98+
CreateDlpJobRequest createDlpJobRequest =
99+
CreateDlpJobRequest.newBuilder()
100+
.setParent(ProjectName.of(projectId).toString())
101+
.setRiskJob(riskAnalysisJobConfig)
102+
.build();
103+
104+
// Send the request to the API using the client
105+
DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
106+
107+
// Set up a Pub/Sub subscriber to listen on the job completion status
108+
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
109+
110+
ProjectSubscriptionName subscriptionName =
111+
ProjectSubscriptionName.of(projectId, subscriptionId);
112+
113+
MessageReceiver messageHandler =
114+
(PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> {
115+
handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer);
116+
};
117+
Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build();
118+
subscriber.startAsync();
119+
120+
// Wait for job completion semi-synchronously
121+
// For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
122+
try {
123+
done.get(1, TimeUnit.MINUTES);
124+
Thread.sleep(500); // Wait for the job to become available
125+
} catch (TimeoutException e) {
126+
System.out.println("Unable to verify job completion.");
127+
}
128+
129+
// Build a request to get the completed job
130+
GetDlpJobRequest getDlpJobRequest =
131+
GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build();
132+
133+
// Retrieve completed job status
134+
DlpJob completedJob = dlpServiceClient.getDlpJob(getDlpJobRequest);
135+
System.out.println("Job status: " + completedJob.getState());
136+
137+
// Get the result and parse through and process the information
138+
CategoricalStatsResult result = completedJob.getRiskDetails().getCategoricalStatsResult();
139+
List<CategoricalStatsHistogramBucket> histogramBucketList =
140+
result.getValueFrequencyHistogramBucketsList();
141+
142+
for (CategoricalStatsHistogramBucket bucket : histogramBucketList) {
143+
long mostCommonFrequency = bucket.getValueFrequencyUpperBound();
144+
System.out.printf("Most common value occurs %d time(s).\n", mostCommonFrequency);
145+
146+
long leastCommonFrequency = bucket.getValueFrequencyLowerBound();
147+
System.out.printf("Least common value occurs %d time(s).\n", leastCommonFrequency);
148+
149+
for (ValueFrequency valueFrequency : bucket.getBucketValuesList()) {
150+
System.out.printf(
151+
"Value %s occurs %d time(s).\n",
152+
valueFrequency.getValue().toString(), valueFrequency.getCount());
153+
}
154+
}
155+
}
156+
}
157+
158+
// handleMessage injects the job and settableFuture into the message reciever interface
159+
private static void handleMessage(
160+
DlpJob job,
161+
SettableApiFuture<Boolean> done,
162+
PubsubMessage pubsubMessage,
163+
AckReplyConsumer ackReplyConsumer) {
164+
String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName");
165+
if (job.getName().equals(messageAttribute)) {
166+
done.set(true);
167+
ackReplyConsumer.ack();
168+
} else {
169+
ackReplyConsumer.nack();
170+
}
171+
}
172+
}
173+
174+
// [END dlp_categorical_stats]
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package dlp.snippets;
18+
19+
// [START dlp_k_anonymity]
20+
21+
import com.google.api.core.SettableApiFuture;
22+
import com.google.cloud.dlp.v2.DlpServiceClient;
23+
import com.google.cloud.pubsub.v1.AckReplyConsumer;
24+
import com.google.cloud.pubsub.v1.MessageReceiver;
25+
import com.google.cloud.pubsub.v1.Subscriber;
26+
import com.google.privacy.dlp.v2.Action;
27+
import com.google.privacy.dlp.v2.Action.PublishToPubSub;
28+
import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult;
29+
import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass;
30+
import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket;
31+
import com.google.privacy.dlp.v2.BigQueryTable;
32+
import com.google.privacy.dlp.v2.CreateDlpJobRequest;
33+
import com.google.privacy.dlp.v2.DlpJob;
34+
import com.google.privacy.dlp.v2.FieldId;
35+
import com.google.privacy.dlp.v2.GetDlpJobRequest;
36+
import com.google.privacy.dlp.v2.PrivacyMetric;
37+
import com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig;
38+
import com.google.privacy.dlp.v2.ProjectName;
39+
import com.google.privacy.dlp.v2.RiskAnalysisJobConfig;
40+
import com.google.privacy.dlp.v2.Value;
41+
import com.google.pubsub.v1.ProjectSubscriptionName;
42+
import com.google.pubsub.v1.ProjectTopicName;
43+
import com.google.pubsub.v1.PubsubMessage;
44+
import java.util.Arrays;
45+
import java.util.List;
46+
import java.util.concurrent.TimeUnit;
47+
import java.util.concurrent.TimeoutException;
48+
import java.util.stream.Collectors;
49+
50+
class RiskAnalysisKAnonymity {
51+
52+
public static void calculateKAnonymity() throws Exception {
53+
// TODO(developer): Replace these variables before running the sample.
54+
String projectId = "your-project-id";
55+
String datasetId = "your-bigquery-dataset-id";
56+
String tableId = "your-bigquery-table-id";
57+
String topicId = "pub-sub-topic";
58+
String subscriptionId = "pub-sub-subscription";
59+
calculateKAnonymity(projectId, datasetId, tableId, topicId, subscriptionId);
60+
}
61+
62+
public static void calculateKAnonymity(
63+
String projectId, String datasetId, String tableId, String topicId, String subscriptionId)
64+
throws Exception {
65+
// Initialize client that will be used to send requests. This client only needs to be created
66+
// once, and can be reused for multiple requests. After completing all of your requests, call
67+
// the "close" method on the client to safely clean up any remaining background resources.
68+
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
69+
70+
// Specify the BigQuery table to analyze
71+
BigQueryTable bigQueryTable =
72+
BigQueryTable.newBuilder()
73+
.setProjectId(projectId)
74+
.setDatasetId(datasetId)
75+
.setTableId(tableId)
76+
.build();
77+
78+
// These values represent the column names of quasi-identifiers to analyze
79+
List<String> quasiIds = Arrays.asList("Age", "Mystery");
80+
81+
// Configure the privacy metric for the job
82+
List<FieldId> quasiIdFields =
83+
quasiIds.stream()
84+
.map(columnName -> FieldId.newBuilder().setName(columnName).build())
85+
.collect(Collectors.toList());
86+
KAnonymityConfig kanonymityConfig =
87+
KAnonymityConfig.newBuilder().addAllQuasiIds(quasiIdFields).build();
88+
PrivacyMetric privacyMetric =
89+
PrivacyMetric.newBuilder().setKAnonymityConfig(kanonymityConfig).build();
90+
91+
// Create action to publish job status notifications over Google Cloud Pub/Sub
92+
ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId);
93+
PublishToPubSub publishToPubSub =
94+
PublishToPubSub.newBuilder().setTopic(topicName.toString()).build();
95+
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
96+
97+
// Configure the risk analysis job to perform
98+
RiskAnalysisJobConfig riskAnalysisJobConfig =
99+
RiskAnalysisJobConfig.newBuilder()
100+
.setSourceTable(bigQueryTable)
101+
.setPrivacyMetric(privacyMetric)
102+
.addActions(action)
103+
.build();
104+
105+
// Build the request to be sent by the client
106+
CreateDlpJobRequest createDlpJobRequest =
107+
CreateDlpJobRequest.newBuilder()
108+
.setParent(ProjectName.of(projectId).toString())
109+
.setRiskJob(riskAnalysisJobConfig)
110+
.build();
111+
112+
// Send the request to the API using the client
113+
DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
114+
115+
// Set up a Pub/Sub subscriber to listen on the job completion status
116+
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
117+
118+
ProjectSubscriptionName subscriptionName =
119+
ProjectSubscriptionName.of(projectId, subscriptionId);
120+
121+
MessageReceiver messageHandler =
122+
(PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> {
123+
handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer);
124+
};
125+
Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build();
126+
subscriber.startAsync();
127+
128+
// Wait for job completion semi-synchronously
129+
// For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
130+
try {
131+
done.get(1, TimeUnit.MINUTES);
132+
Thread.sleep(500); // Wait for the job to become available
133+
} catch (TimeoutException e) {
134+
System.out.println("Unable to verify job completion.");
135+
}
136+
137+
// Build a request to get the completed job
138+
GetDlpJobRequest getDlpJobRequest =
139+
GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build();
140+
141+
// Retrieve completed job status
142+
DlpJob completedJob = dlpServiceClient.getDlpJob(getDlpJobRequest);
143+
System.out.println("Job status: " + completedJob.getState());
144+
145+
// Get the result and parse through and process the information
146+
KAnonymityResult kanonymityResult = completedJob.getRiskDetails().getKAnonymityResult();
147+
List<KAnonymityHistogramBucket> histogramBucketList =
148+
kanonymityResult.getEquivalenceClassHistogramBucketsList();
149+
for (KAnonymityHistogramBucket result : histogramBucketList) {
150+
System.out.printf(
151+
"Bucket size range: [%d, %d]\n",
152+
result.getEquivalenceClassSizeLowerBound(), result.getEquivalenceClassSizeUpperBound());
153+
154+
for (KAnonymityEquivalenceClass bucket : result.getBucketValuesList()) {
155+
List<String> quasiIdValues =
156+
bucket.getQuasiIdsValuesList().stream()
157+
.map(Value::toString)
158+
.collect(Collectors.toList());
159+
160+
System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues));
161+
System.out.println("\tClass size: " + bucket.getEquivalenceClassSize());
162+
}
163+
}
164+
}
165+
}
166+
167+
// handleMessage injects the job and settableFuture into the message reciever interface
168+
private static void handleMessage(
169+
DlpJob job,
170+
SettableApiFuture<Boolean> done,
171+
PubsubMessage pubsubMessage,
172+
AckReplyConsumer ackReplyConsumer) {
173+
String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName");
174+
if (job.getName().equals(messageAttribute)) {
175+
done.set(true);
176+
ackReplyConsumer.ack();
177+
} else {
178+
ackReplyConsumer.nack();
179+
}
180+
}
181+
}
182+
// [END dlp_k_anonymity]

0 commit comments

Comments
 (0)