Skip to content

Commit 81d2287

Browse files
committed
Add custom infoType snippets to DLP samples
1 parent add3a08 commit 81d2287

File tree

2 files changed

+111
-0
lines changed

2 files changed

+111
-0
lines changed

dlp/custom_infotype.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Copyright 2020 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""Custom infoType snippets.
15+
16+
This file contains sample code that uses the Data Loss Prevention API to create
17+
custom infoType detectors to refine scan results.
18+
"""
19+
20+
21+
# [START dlp_omit_name_if_also_email]
22+
def omit_name_if_also_email(
23+
project,
24+
content_string,
25+
):
26+
"""Marches PERSON_NAME and EMAIL_ADDRESS, but not both.
27+
28+
Uses the Data Loss Prevention API omit matches on PERSON_NAME if the
29+
EMAIL_ADDRESS detector also matches.
30+
Args:
31+
project: The Google Cloud project id to use as a parent resource.
32+
content_string: The string to inspect.
33+
34+
Returns:
35+
None; the response from the API is printed to the terminal.
36+
"""
37+
38+
# Import the client library.
39+
import google.cloud.dlp
40+
41+
# Instantiate a client.
42+
dlp = google.cloud.dlp_v2.DlpServiceClient()
43+
44+
# Construct a list of infoTypes for DLP to locate in `content_string`
45+
info_types_to_locate = [{"name": "PERSON_NAME"}, {"name": "EMAIL_ADDRESS"}]
46+
47+
# Construct the configuration dictionary that will only match on PERSON_NAME
48+
# if the EMAIL_ADDRESS doesn't also match. This configuration helps reduce
49+
# the total number of findings when there is a large overlap between different
50+
# infoTypes.
51+
inspect_config = {
52+
"info_types":
53+
info_types_to_locate,
54+
"rule_set": [{
55+
"info_types": [{
56+
"name": "PERSON_NAME"
57+
}],
58+
"rules": [{
59+
"exclusion_rule": {
60+
"exclude_info_types": {
61+
"info_types": [{
62+
"name": "EMAIL_ADDRESS"
63+
}]
64+
},
65+
"matching_type": "MATCHING_TYPE_PARTIAL_MATCH"
66+
}
67+
}]
68+
}]
69+
}
70+
71+
# Construct the `item`.
72+
item = {"value": content_string}
73+
74+
# Convert the project id into a full resource id.
75+
parent = dlp.project_path(project)
76+
77+
# Call the API.
78+
response = dlp.inspect_content(parent, inspect_config, item)
79+
80+
return [f.info_type.name for f in response.result.findings]
81+
82+
83+
# [END dlp_omit_name_if_also_email]

dlp/custom_infotype_test.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2020 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import custom_infotype
18+
19+
GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")
20+
21+
22+
def test_omit_name_if_also_email(capsys):
23+
info_types = custom_infotype.omit_name_if_also_email(
24+
GCLOUD_PROJECT, "[email protected]")
25+
26+
# Ensure we found only EMAIL_ADDRESS, and not PERSON_NAME.
27+
assert len(info_types) == 1
28+
assert info_types[0] == "EMAIL_ADDRESS"

0 commit comments

Comments
 (0)