|
| 1 | +/* |
| 2 | + * Copyright 2020 Google LLC |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package dlp.snippets; |
| 18 | + |
| 19 | +// [START dlp_k_anonymity] |
| 20 | + |
| 21 | +import com.google.api.core.SettableApiFuture; |
| 22 | +import com.google.cloud.dlp.v2.DlpServiceClient; |
| 23 | +import com.google.cloud.pubsub.v1.AckReplyConsumer; |
| 24 | +import com.google.cloud.pubsub.v1.MessageReceiver; |
| 25 | +import com.google.cloud.pubsub.v1.Subscriber; |
| 26 | +import com.google.privacy.dlp.v2.Action; |
| 27 | +import com.google.privacy.dlp.v2.Action.PublishToPubSub; |
| 28 | +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult; |
| 29 | +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass; |
| 30 | +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket; |
| 31 | +import com.google.privacy.dlp.v2.BigQueryTable; |
| 32 | +import com.google.privacy.dlp.v2.CreateDlpJobRequest; |
| 33 | +import com.google.privacy.dlp.v2.DlpJob; |
| 34 | +import com.google.privacy.dlp.v2.FieldId; |
| 35 | +import com.google.privacy.dlp.v2.GetDlpJobRequest; |
| 36 | +import com.google.privacy.dlp.v2.PrivacyMetric; |
| 37 | +import com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig; |
| 38 | +import com.google.privacy.dlp.v2.ProjectName; |
| 39 | +import com.google.privacy.dlp.v2.RiskAnalysisJobConfig; |
| 40 | +import com.google.privacy.dlp.v2.Value; |
| 41 | +import com.google.pubsub.v1.ProjectSubscriptionName; |
| 42 | +import com.google.pubsub.v1.ProjectTopicName; |
| 43 | +import com.google.pubsub.v1.PubsubMessage; |
| 44 | +import java.util.Arrays; |
| 45 | +import java.util.List; |
| 46 | +import java.util.concurrent.TimeUnit; |
| 47 | +import java.util.concurrent.TimeoutException; |
| 48 | +import java.util.stream.Collectors; |
| 49 | + |
| 50 | +class RiskAnalysisKAnonymity { |
| 51 | + |
| 52 | + public static void calculateKAnonymity() throws Exception { |
| 53 | + // TODO(developer): Replace these variables before running the sample. |
| 54 | + String projectId = "your-project-id"; |
| 55 | + String datasetId = "your-bigquery-dataset-id"; |
| 56 | + String tableId = "your-bigquery-table-id"; |
| 57 | + String topicId = "pub-sub-topic"; |
| 58 | + String subscriptionId = "pub-sub-subscription"; |
| 59 | + calculateKAnonymity(projectId, datasetId, tableId, topicId, subscriptionId); |
| 60 | + } |
| 61 | + |
| 62 | + public static void calculateKAnonymity( |
| 63 | + String projectId, String datasetId, String tableId, String topicId, String subscriptionId) |
| 64 | + throws Exception { |
| 65 | + // Initialize client that will be used to send requests. This client only needs to be created |
| 66 | + // once, and can be reused for multiple requests. After completing all of your requests, call |
| 67 | + // the "close" method on the client to safely clean up any remaining background resources. |
| 68 | + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { |
| 69 | + |
| 70 | + // Specify the BigQuery table to analyze |
| 71 | + BigQueryTable bigQueryTable = |
| 72 | + BigQueryTable.newBuilder() |
| 73 | + .setProjectId(projectId) |
| 74 | + .setDatasetId(datasetId) |
| 75 | + .setTableId(tableId) |
| 76 | + .build(); |
| 77 | + |
| 78 | + // These values represent the column names of quasi-identifiers to analyze |
| 79 | + List<String> quasiIds = Arrays.asList("Age", "Mystery"); |
| 80 | + |
| 81 | + // Configure the privacy metric for the job |
| 82 | + List<FieldId> quasiIdFields = |
| 83 | + quasiIds.stream() |
| 84 | + .map(columnName -> FieldId.newBuilder().setName(columnName).build()) |
| 85 | + .collect(Collectors.toList()); |
| 86 | + KAnonymityConfig kanonymityConfig = |
| 87 | + KAnonymityConfig.newBuilder().addAllQuasiIds(quasiIdFields).build(); |
| 88 | + PrivacyMetric privacyMetric = |
| 89 | + PrivacyMetric.newBuilder().setKAnonymityConfig(kanonymityConfig).build(); |
| 90 | + |
| 91 | + // Create action to publish job status notifications over Google Cloud Pub/Sub |
| 92 | + ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId); |
| 93 | + PublishToPubSub publishToPubSub = |
| 94 | + PublishToPubSub.newBuilder().setTopic(topicName.toString()).build(); |
| 95 | + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); |
| 96 | + |
| 97 | + // Configure the risk analysis job to perform |
| 98 | + RiskAnalysisJobConfig riskAnalysisJobConfig = |
| 99 | + RiskAnalysisJobConfig.newBuilder() |
| 100 | + .setSourceTable(bigQueryTable) |
| 101 | + .setPrivacyMetric(privacyMetric) |
| 102 | + .addActions(action) |
| 103 | + .build(); |
| 104 | + |
| 105 | + // Build the request to be sent by the client |
| 106 | + CreateDlpJobRequest createDlpJobRequest = |
| 107 | + CreateDlpJobRequest.newBuilder() |
| 108 | + .setParent(ProjectName.of(projectId).toString()) |
| 109 | + .setRiskJob(riskAnalysisJobConfig) |
| 110 | + .build(); |
| 111 | + |
| 112 | + // Send the request to the API using the client |
| 113 | + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); |
| 114 | + |
| 115 | + // Set up a Pub/Sub subscriber to listen on the job completion status |
| 116 | + final SettableApiFuture<Boolean> done = SettableApiFuture.create(); |
| 117 | + |
| 118 | + ProjectSubscriptionName subscriptionName = |
| 119 | + ProjectSubscriptionName.of(projectId, subscriptionId); |
| 120 | + |
| 121 | + MessageReceiver messageHandler = |
| 122 | + (PubsubMessage pubsubMessage, AckReplyConsumer ackReplyConsumer) -> { |
| 123 | + handleMessage(dlpJob, done, pubsubMessage, ackReplyConsumer); |
| 124 | + }; |
| 125 | + Subscriber subscriber = Subscriber.newBuilder(subscriptionName, messageHandler).build(); |
| 126 | + subscriber.startAsync(); |
| 127 | + |
| 128 | + // Wait for job completion semi-synchronously |
| 129 | + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions |
| 130 | + try { |
| 131 | + done.get(1, TimeUnit.MINUTES); |
| 132 | + Thread.sleep(500); // Wait for the job to become available |
| 133 | + } catch (TimeoutException e) { |
| 134 | + System.out.println("Unable to verify job completion."); |
| 135 | + } |
| 136 | + |
| 137 | + // Build a request to get the completed job |
| 138 | + GetDlpJobRequest getDlpJobRequest = |
| 139 | + GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build(); |
| 140 | + |
| 141 | + // Retrieve completed job status |
| 142 | + DlpJob completedJob = dlpServiceClient.getDlpJob(getDlpJobRequest); |
| 143 | + System.out.println("Job status: " + completedJob.getState()); |
| 144 | + |
| 145 | + // Get the result and parse through and process the information |
| 146 | + KAnonymityResult kanonymityResult = completedJob.getRiskDetails().getKAnonymityResult(); |
| 147 | + List<KAnonymityHistogramBucket> histogramBucketList = |
| 148 | + kanonymityResult.getEquivalenceClassHistogramBucketsList(); |
| 149 | + for (KAnonymityHistogramBucket result : histogramBucketList) { |
| 150 | + System.out.printf( |
| 151 | + "Bucket size range: [%d, %d]\n", |
| 152 | + result.getEquivalenceClassSizeLowerBound(), result.getEquivalenceClassSizeUpperBound()); |
| 153 | + |
| 154 | + for (KAnonymityEquivalenceClass bucket : result.getBucketValuesList()) { |
| 155 | + List<String> quasiIdValues = |
| 156 | + bucket.getQuasiIdsValuesList().stream() |
| 157 | + .map(Value::toString) |
| 158 | + .collect(Collectors.toList()); |
| 159 | + |
| 160 | + System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); |
| 161 | + System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); |
| 162 | + } |
| 163 | + } |
| 164 | + } |
| 165 | + } |
| 166 | + |
| 167 | + // handleMessage injects the job and settableFuture into the message reciever interface |
| 168 | + private static void handleMessage( |
| 169 | + DlpJob job, |
| 170 | + SettableApiFuture<Boolean> done, |
| 171 | + PubsubMessage pubsubMessage, |
| 172 | + AckReplyConsumer ackReplyConsumer) { |
| 173 | + String messageAttribute = pubsubMessage.getAttributesMap().get("DlpJobName"); |
| 174 | + if (job.getName().equals(messageAttribute)) { |
| 175 | + done.set(true); |
| 176 | + ackReplyConsumer.ack(); |
| 177 | + } else { |
| 178 | + ackReplyConsumer.nack(); |
| 179 | + } |
| 180 | + } |
| 181 | +} |
| 182 | +// [END dlp_k_anonymity] |
0 commit comments