Skip to content

Commit fbb9777

Browse files
authored
Add custom infoType snippets to DLP samples (#3991)
1 parent c9d0c9b commit fbb9777

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed

dlp/custom_infotype.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Copyright 2020 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""Custom infoType snippets.
15+
16+
This file contains sample code that uses the Data Loss Prevention API to create
17+
custom infoType detectors to refine scan results.
18+
"""
19+
20+
21+
# [START dlp_omit_name_if_also_email]
22+
def omit_name_if_also_email(
23+
project,
24+
content_string,
25+
):
26+
"""Marches PERSON_NAME and EMAIL_ADDRESS, but not both.
27+
28+
Uses the Data Loss Prevention API omit matches on PERSON_NAME if the
29+
EMAIL_ADDRESS detector also matches.
30+
Args:
31+
project: The Google Cloud project id to use as a parent resource.
32+
content_string: The string to inspect.
33+
34+
Returns:
35+
None; the response from the API is printed to the terminal.
36+
"""
37+
38+
# Import the client library.
39+
import google.cloud.dlp
40+
41+
# Instantiate a client.
42+
dlp = google.cloud.dlp_v2.DlpServiceClient()
43+
44+
# Construct a list of infoTypes for DLP to locate in `content_string`. See
45+
# https://cloud.google.com/dlp/docs/concepts-infotypes for more information
46+
# about supported infoTypes.
47+
info_types_to_locate = [{"name": "PERSON_NAME"}, {"name": "EMAIL_ADDRESS"}]
48+
49+
# Construct the configuration dictionary that will only match on PERSON_NAME
50+
# if the EMAIL_ADDRESS doesn't also match. This configuration helps reduce
51+
# the total number of findings when there is a large overlap between different
52+
# infoTypes.
53+
inspect_config = {
54+
"info_types":
55+
info_types_to_locate,
56+
"rule_set": [{
57+
"info_types": [{
58+
"name": "PERSON_NAME"
59+
}],
60+
"rules": [{
61+
"exclusion_rule": {
62+
"exclude_info_types": {
63+
"info_types": [{
64+
"name": "EMAIL_ADDRESS"
65+
}]
66+
},
67+
"matching_type": "MATCHING_TYPE_PARTIAL_MATCH"
68+
}
69+
}]
70+
}]
71+
}
72+
73+
# Construct the `item`.
74+
item = {"value": content_string}
75+
76+
# Convert the project id into a full resource id.
77+
parent = dlp.project_path(project)
78+
79+
# Call the API.
80+
response = dlp.inspect_content(parent, inspect_config, item)
81+
82+
return [f.info_type.name for f in response.result.findings]
83+
84+
85+
# [END dlp_omit_name_if_also_email]

dlp/custom_infotype_test.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2020 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import custom_infotype
18+
19+
GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")
20+
21+
22+
def test_omit_name_if_also_email(capsys):
23+
info_types = custom_infotype.omit_name_if_also_email(
24+
GCLOUD_PROJECT, "[email protected]")
25+
26+
# Ensure we found only EMAIL_ADDRESS, and not PERSON_NAME.
27+
assert len(info_types) == 1
28+
assert info_types[0] == "EMAIL_ADDRESS"

0 commit comments

Comments
 (0)