Skip to content

Commit 4939eec

Browse files
holtskinnerdandhleekweinmeistergcf-merge-on-green[bot]
authored
docs(samples): Document AI Warehouse - Created Quickstart Sample Code (#8969)
* docs(samples): Document AI Warehouse - Created Quickstart and Search Documents sample code * docs(samples): Added contentwarehouse to blunderbuss and Codeowners * docs(samples): Resolved Typing error - Put File type filter in `DocumentQuery` * docs(samples): Adjusted Histogram Printing with f-strings * docs(samples): Added project number conversion to Document AI Warehouse Tests * docs(samples): Upgrade contentwarehouse to `0.3.0` * docs(samples): Updated Document Warehouse Quickstart & Search based on Code Review comments * Update contentwarehouse/snippets/noxfile_config.py Co-authored-by: Dan Lee <[email protected]> * Update noxfile_config.py Enabled Type Hints * docs(samples): Moved get_project_number into test_utilities.py * docs(samples): Added Types to Tests * docs(samples): Chanaged Any for capsys to pytest.CaptureFixture * Ran isort with Google Profile Co-authored-by: Dan Lee <[email protected]> Co-authored-by: Karl Weinmeister <[email protected]> Co-authored-by: gcf-merge-on-green[bot] <60162190+gcf-merge-on-green[bot]@users.noreply.github.com>
1 parent 0a656b3 commit 4939eec

14 files changed

+332
-0
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
/compute/**/* @m-strzelczyk @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/python-samples-reviewers
3737
/container/**/* @GoogleCloudPlatform/dee-platform-ops @GoogleCloudPlatform/python-samples-reviewers
3838
/containeranalysis/**/* @GoogleCloudPlatform/aap-dpes @GoogleCloudPlatform/python-samples-reviewers
39+
/contentwarehouse/**/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers
3940
/data-science-onramp/ @leahecole @bradmiro @GoogleCloudPlatform/python-samples-reviewers
4041
/datacatalog/**/* @GoogleCloudPlatform/python-samples-reviewers
4142
/dataflow/**/* @davidcavazos @GoogleCloudPlatform/python-samples-reviewers

.github/blunderbuss.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ assign_issues_by:
8484
to:
8585
- GoogleCloudPlatform/api-iot
8686
- labels:
87+
- 'api: contentwarehouse'
8788
- 'api: enterpriseknowledgegraph'
8889
- 'api: documentai'
8990
- 'api: language'
@@ -186,6 +187,7 @@ assign_prs_by:
186187
to:
187188
- GoogleCloudPlatform/infra-db-dpes
188189
- labels:
190+
- 'api: contentwarehouse'
189191
- 'api: enterpriseknowledgegraph'
190192
- 'api: documentai'
191193
- 'api: retail'

contentwarehouse/AUTHORING_GUIDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md

contentwarehouse/CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/CONTRIBUTING.md

contentwarehouse/__init__.py

Whitespace-only changes.

contentwarehouse/snippets/__init__.py

Whitespace-only changes.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Default TEST_CONFIG_OVERRIDE for python repos.
16+
17+
# You can copy this file into your directory, then it will be imported from
18+
# the noxfile.py.
19+
20+
# The source of truth:
21+
# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py
22+
23+
TEST_CONFIG_OVERRIDE = {
24+
# You can opt out from the test for specific Python versions.
25+
"ignored_versions": ["2.7", "3.6", "3.8", "3.9", "3.10", "3.11"],
26+
# Old samples are opted out of enforcing Python type hints
27+
# All new samples should feature them
28+
"enforce_type_hints": True,
29+
# An envvar key for determining the project id to use. Change it
30+
# to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
31+
# build specific Cloud project. You can also use your own string
32+
# to use your own Cloud project.
33+
"gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
34+
# 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
35+
# If you need to use a specific version of pip,
36+
# change pip_version_override to the string representation
37+
# of the version number, for example, "20.2.4"
38+
"pip_version_override": None,
39+
# A dictionary you want to inject into your test. Don't put any
40+
# secrets here. These values will override predefined values.
41+
"envs": {},
42+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
17+
# [START contentwarehouse_quickstart]
18+
19+
from google.cloud import contentwarehouse
20+
21+
# TODO(developer): Uncomment these variables before running the sample.
22+
# project_number = 'YOUR_PROJECT_NUMBER'
23+
# location = 'YOUR_PROJECT_LOCATION' # Format is 'us' or 'eu'
24+
25+
26+
def quickstart(project_number: str, location: str) -> None:
27+
28+
# Create a Schema Service client
29+
document_schema_client = contentwarehouse.DocumentSchemaServiceClient()
30+
31+
# The full resource name of the location, e.g.:
32+
# projects/{project_number}/locations/{location}
33+
parent = document_schema_client.common_location_path(
34+
project=project_number, location=location
35+
)
36+
37+
# Define Schema Property of Text Type
38+
property_definition = contentwarehouse.PropertyDefinition(
39+
name="stock_symbol", # Must be unique within a document schema (case insensitive)
40+
display_name="Searchable text",
41+
is_searchable=True,
42+
text_type_options=contentwarehouse.TextTypeOptions(),
43+
)
44+
45+
# Define Document Schema Request
46+
create_document_schema_request = contentwarehouse.CreateDocumentSchemaRequest(
47+
parent=parent,
48+
document_schema=contentwarehouse.DocumentSchema(
49+
display_name="My Test Schema",
50+
property_definitions=[property_definition],
51+
),
52+
)
53+
54+
# Create a Document schema
55+
document_schema = document_schema_client.create_document_schema(
56+
request=create_document_schema_request
57+
)
58+
59+
# Create a Document Service client
60+
document_client = contentwarehouse.DocumentServiceClient()
61+
62+
# The full resource name of the location, e.g.:
63+
# projects/{project_number}/locations/{location}
64+
parent = document_client.common_location_path(
65+
project=project_number, location=location
66+
)
67+
68+
# Define Document Property Value
69+
document_property = contentwarehouse.Property(
70+
name=document_schema.property_definitions[0].name,
71+
text_values=contentwarehouse.TextArray(values=["GOOG"]),
72+
)
73+
74+
# Define Document
75+
document = contentwarehouse.Document(
76+
display_name="My Test Document",
77+
document_schema_name=document_schema.name,
78+
plain_text="This is a sample of a document's text.",
79+
properties=[document_property],
80+
)
81+
82+
# Define Request
83+
create_document_request = contentwarehouse.CreateDocumentRequest(
84+
parent=parent, document=document
85+
)
86+
87+
# Create a Document for the given schema
88+
response = document_client.create_document(request=create_document_request)
89+
90+
# Read the output
91+
print(f"Rule Engine Output: {response.rule_engine_output}")
92+
print(f"Document Created: {response.document}")
93+
94+
95+
# [END contentwarehouse_quickstart]
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# # Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
import os
17+
18+
from contentwarehouse.snippets import quickstart_sample
19+
from contentwarehouse.snippets import test_utilities
20+
import pytest
21+
22+
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
23+
location = "us" # Format is 'us' or 'eu'
24+
25+
26+
def test_quickstart(capsys: pytest.CaptureFixture) -> None:
27+
project_number = test_utilities.get_project_number(project_id)
28+
quickstart_sample.quickstart(
29+
project_number=project_number,
30+
location=location,
31+
)
32+
out, _ = capsys.readouterr()
33+
34+
assert "Rule Engine Output" in out
35+
assert "Document Created" in out
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pytest==7.2.0
2+
mock==5.0.0
3+
google-cloud-resource-manager==1.8.0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
google-cloud-contentwarehouse==0.3.0
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
17+
# [START contentwarehouse_search_documents]
18+
19+
from google.cloud import contentwarehouse
20+
21+
# TODO(developer): Uncomment these variables before running the sample.
22+
# project_number = 'YOUR_PROJECT_NUMBER'
23+
# location = 'YOUR_PROJECT_LOCATION' # Format is 'us' or 'eu'
24+
# document_query_text = 'YOUR_DOCUMENT_QUERY'
25+
26+
27+
def search_documents_sample(
28+
project_number: str,
29+
location: str,
30+
document_query_text: str,
31+
) -> None:
32+
33+
# Create a client
34+
client = contentwarehouse.DocumentServiceClient()
35+
36+
# The full resource name of the location, e.g.:
37+
# projects/{project_number}/locations/{location}
38+
parent = client.common_location_path(project=project_number, location=location)
39+
40+
# File Type Filter
41+
# Options: DOCUMENT, FOLDER
42+
file_type_filter = contentwarehouse.FileTypeFilter(
43+
file_type=contentwarehouse.FileTypeFilter.FileType.DOCUMENT
44+
)
45+
46+
# Document Text Query
47+
document_query = contentwarehouse.DocumentQuery(
48+
query=document_query_text,
49+
file_type_filter=file_type_filter,
50+
)
51+
52+
# Histogram Query
53+
histogram_query = contentwarehouse.HistogramQuery(
54+
histogram_query='count("DocumentSchemaId")'
55+
)
56+
57+
# Define request
58+
request = contentwarehouse.SearchDocumentsRequest(
59+
parent=parent,
60+
document_query=document_query,
61+
histogram_queries=[histogram_query],
62+
)
63+
64+
# Make the request
65+
response = client.search_documents(request=request)
66+
67+
# Print search results
68+
for matching_document in response.matching_documents:
69+
document = matching_document.document
70+
# Display name - schema display name.
71+
# Name.
72+
# Create date.
73+
# Snippet - keywords are highlighted with <b> & </b>.
74+
print(
75+
f"{document.display_name} - {document.document_schema_name}\n"
76+
f"{document.name}\n"
77+
f"{document.create_time}\n"
78+
f"{matching_document.search_text_snippet}\n"
79+
)
80+
81+
# Print histogram
82+
for histogram_query_result in response.histogram_query_results:
83+
print(
84+
f"Histogram Query: {histogram_query_result.histogram_query}\n"
85+
f"| {'Schema':<70} | {'Count':<15} |"
86+
)
87+
for key, value in histogram_query_result.histogram.items():
88+
print(f"| {key:<70} | {value:<15} |")
89+
90+
91+
# [END contentwarehouse_search_documents]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# # Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
import os
17+
18+
from contentwarehouse.snippets import search_documents_sample
19+
from contentwarehouse.snippets import test_utilities
20+
import pytest
21+
22+
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
23+
location = "us" # Format is 'us' or 'eu'
24+
document_query_text = "document"
25+
26+
27+
def test_search_documents(capsys: pytest.CaptureFixture) -> None:
28+
project_number = test_utilities.get_project_number(project_id)
29+
search_documents_sample.search_documents_sample(
30+
project_number=project_number,
31+
location=location,
32+
document_query_text=document_query_text,
33+
)
34+
out, _ = capsys.readouterr()
35+
36+
assert "document" in out
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# # Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
from google.cloud import resourcemanager
17+
18+
19+
def get_project_number(project_id: str) -> str:
20+
client = resourcemanager.ProjectsClient()
21+
name = client.project_path(project=project_id)
22+
project = client.get_project(name=name)
23+
project_number = client.parse_project_path(project.name)["project"]
24+
return project_number

0 commit comments

Comments
 (0)