Skip to content

docs(samples): Document AI - Added Test & Evaluation Samples for v1beta3 2.3.0 #8876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jan 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
9b07bfb
docs(samples): Added Test & Evaluation Samples for v1beta3 2.3.0
holtskinner Jan 6, 2023
0496710
Resolved I100 Import order issues
holtskinner Jan 9, 2023
c924c09
Resolved I100 import issue get_evaluation_sample_test
holtskinner Jan 9, 2023
9da8140
Added Python 3.11 to Ignored versions in noxfile config
holtskinner Jan 9, 2023
4f9be0f
Update noxfile_config.py
holtskinner Jan 10, 2023
b7c74ef
Update noxfile_config.py
holtskinner Jan 13, 2023
e06b950
Merge branch 'main' into documentai-training-samples
holtskinner Jan 13, 2023
890168f
docs(samples): Added Test & Evaluation Samples for v1beta3 2.3.0
holtskinner Jan 6, 2023
f9682db
Resolved I100 Import order issues
holtskinner Jan 9, 2023
86af5a0
Resolved I100 import issue get_evaluation_sample_test
holtskinner Jan 9, 2023
050d92c
Added Python 3.11 to Ignored versions in noxfile config
holtskinner Jan 9, 2023
d398c3e
Update noxfile_config.py
holtskinner Jan 10, 2023
99e547b
Update noxfile_config.py
holtskinner Jan 13, 2023
83c0336
Merge branch 'documentai-training-samples' of https://github.com/Goog…
holtskinner Jan 17, 2023
5c4a609
Merge branch 'main' into documentai-training-samples
holtskinner Jan 17, 2023
d77ff26
Merge branch 'main' into documentai-training-samples
holtskinner Jan 17, 2023
8ac49b2
Merge branch 'main' into documentai-training-samples
holtskinner Jan 17, 2023
be8f092
Update train_processor_version_sample.py
holtskinner Jan 18, 2023
eac4292
Merge branch 'main' into documentai-training-samples
kweinmeister Jan 19, 2023
97ebb66
Merge branch 'main' into documentai-training-samples
kweinmeister Jan 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions documentai/snippets/evaluate_processor_version_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# [START documentai_evaluate_processor_version]

from google.api_core.client_options import ClientOptions
from google.cloud import documentai_v1beta3 as documentai

# TODO(developer): Uncomment these variables before running the sample.
# project_id = 'YOUR_PROJECT_ID'
# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu'
# processor_id = 'YOUR_PROCESSOR_ID'
# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID'
# gcs_input_uri = # Format: gs://bucket/directory/


def evaluate_processor_version_sample(
project_id: str,
location: str,
processor_id: str,
processor_version_id: str,
gcs_input_uri: str,
):
# You must set the api_endpoint if you use a location other than 'us', e.g.:
opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")

client = documentai.DocumentProcessorServiceClient(client_options=opts)

# The full resource name of the processor version
# e.g. `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
name = client.processor_version_path(
project_id, location, processor_id, processor_version_id
)

evaluation_documents = documentai.BatchDocumentsInputConfig(
gcs_prefix=documentai.GcsPrefix(gcs_uri_prefix=gcs_input_uri)
)

# NOTE: Alternatively, specify a list of GCS Documents
#
# gcs_input_uri = "gs://bucket/directory/file.pdf"
# input_mime_type = "application/pdf"
#
# gcs_document = documentai.GcsDocument(
# gcs_uri=gcs_input_uri, mime_type=input_mime_type
# )
# gcs_documents = [gcs_document]
# evaluation_documents = documentai.BatchDocumentsInputConfig(
# gcs_documents=documentai.GcsDocuments(documents=gcs_documents)
# )
#

request = documentai.EvaluateProcessorVersionRequest(
processor_version=name,
evaluation_documents=evaluation_documents,
)

# Make EvaluateProcessorVersion request
# Continually polls the operation until it is complete.
# This could take some time for larger files
operation = client.evaluate_processor_version(request=request)
# Print operation details
# Format: projects/PROJECT_NUMBER/locations/LOCATION/operations/OPERATION_ID
print(f"Waiting for operation {operation.operation.name} to complete...")
# Wait for operation to complete
response = documentai.EvaluateProcessorVersionResponse(operation.result())

# Once the operation is complete,
# Print evaluation ID from operation response
print(f"Evaluation Complete: {response.evaluation}")


# [END documentai_evaluate_processor_version]
56 changes: 56 additions & 0 deletions documentai/snippets/evaluate_processor_version_sample_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os

from documentai.snippets import evaluate_processor_version_sample

import mock

location = "us"
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
processor_id = "aaaaaaaaa"
processor_version_id = "xxxxxxxxxx"
gcs_input_uri = "gs://bucket/directory/"


# Mocking request as evaluation can take a long time
@mock.patch(
"google.cloud.documentai_v1beta3.DocumentProcessorServiceClient.evaluate_processor_version"
)
@mock.patch("google.cloud.documentai_v1beta3.EvaluateProcessorVersionResponse")
@mock.patch("google.api_core.operation.Operation")
def test_evaluate_processor_version(
operation_mock,
evaluate_processor_version_response_mock,
evaluate_processor_version_mock,
capsys,
):
operation_mock.result.return_value = evaluate_processor_version_response_mock
evaluate_processor_version_mock.return_value = operation_mock

evaluate_processor_version_sample.evaluate_processor_version_sample(
project_id=project_id,
location=location,
processor_id=processor_id,
processor_version_id=processor_version_id,
gcs_input_uri=gcs_input_uri,
)

evaluate_processor_version_mock.assert_called_once()

out, _ = capsys.readouterr()

assert "operation" in out
62 changes: 62 additions & 0 deletions documentai/snippets/get_evaluation_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# [START documentai_get_evaluation]

from google.api_core.client_options import ClientOptions
from google.cloud import documentai_v1beta3 as documentai

# TODO(developer): Uncomment these variables before running the sample.
# project_id = 'YOUR_PROJECT_ID'
# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu'
# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample
# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID'
# evaluation_id = 'YOUR_EVALUATION_ID'


def get_evaluation_sample(
project_id: str,
location: str,
processor_id: str,
processor_version_id: str,
evaluation_id: str,
):
# You must set the api_endpoint if you use a location other than 'us', e.g.:
opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")

client = documentai.DocumentProcessorServiceClient(client_options=opts)

# The full resource name of the evaluation
# e.g. `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
evaluation_name = client.evaluation_path(
project_id, location, processor_id, processor_version_id, evaluation_id
)
# Make GetEvaluation request
evaluation = client.get_evaluation(name=evaluation_name)

create_time = evaluation.create_time
document_counters = evaluation.document_counters

# Print the Evaluation Information
# Refer to https://cloud.google.com/document-ai/docs/reference/rest/v1beta3/projects.locations.processors.processorVersions.evaluations
# for more information on the available evaluation data
print(f"Create Time: {create_time}")
print(f"Input Documents: {document_counters.input_documents_count}")
print(f"\tInvalid Documents: {document_counters.invalid_documents_count}")
print(f"\tFailed Documents: {document_counters.failed_documents_count}")
print(f"\tEvaluated Documents: {document_counters.evaluated_documents_count}")


# [END documentai_get_evaluation]
50 changes: 50 additions & 0 deletions documentai/snippets/get_evaluation_sample_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os

from documentai.snippets import get_evaluation_sample

import mock

location = "us"
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
processor_id = "a35310a144a6e4f8"
processor_version_id = "2af620b2fd4d1fcf"
evaluation_id = "55cdab6206095055"


# Mocking request
@mock.patch(
"google.cloud.documentai_v1beta3.DocumentProcessorServiceClient.get_evaluation"
)
@mock.patch("google.cloud.documentai_v1beta3.Evaluation")
def test_get_evaluation(evaluation_mock, get_evaluation_mock, capsys):

get_evaluation_mock.return_value = evaluation_mock

get_evaluation_sample.get_evaluation_sample(
project_id=project_id,
location=location,
processor_id=processor_id,
processor_version_id=processor_version_id,
evaluation_id=evaluation_id,
)

get_evaluation_mock.assert_called_once()

out, _ = capsys.readouterr()

assert "Create Time" in out
54 changes: 54 additions & 0 deletions documentai/snippets/list_evaluations_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# [START documentai_list_evaluations]

from google.api_core.client_options import ClientOptions
from google.cloud import documentai_v1beta3 as documentai

# TODO(developer): Uncomment these variables before running the sample.
# project_id = 'YOUR_PROJECT_ID'
# location = 'YOUR_PROCESSOR_LOCATION' # Format is 'us' or 'eu'
# processor_id = 'YOUR_PROCESSOR_ID' # Create processor before running sample
# processor_version_id = 'YOUR_PROCESSOR_VERSION_ID'


def list_evaluations_sample(
project_id: str, location: str, processor_id: str, processor_version_id: str
):
# You must set the api_endpoint if you use a location other than 'us', e.g.:
opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")

client = documentai.DocumentProcessorServiceClient(client_options=opts)

# The full resource name of the processor version
# e.g. `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
parent = client.processor_version_path(
project_id, location, processor_id, processor_version_id
)

evaluations = client.list_evaluations(parent=parent)

# Print the Evaluation Information
# Refer to https://cloud.google.com/document-ai/docs/reference/rest/v1beta3/projects.locations.processors.processorVersions.evaluations
# for more information on the available evaluation data
print(f"Evaluations for Processor Version {parent}")

for evaluation in evaluations:
print(f"Name: {evaluation.name}")
print(f"\tCreate Time: {evaluation.create_time}\n")


# [END documentai_list_evaluations]
36 changes: 36 additions & 0 deletions documentai/snippets/list_evaluations_sample_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os

from documentai.snippets import list_evaluations_sample

location = "us"
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
processor_id = "feacd98c28866ede"
processor_version_id = "stable"


def test_list_evaluations(capsys):

list_evaluations_sample.list_evaluations_sample(
project_id=project_id,
location=location,
processor_id=processor_id,
processor_version_id=processor_version_id,
)

out, _ = capsys.readouterr()
assert "Evaluation" in out
2 changes: 1 addition & 1 deletion documentai/snippets/noxfile_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

TEST_CONFIG_OVERRIDE = {
# You can opt out from the test for specific Python versions.
"ignored_versions": ["2.7", "3.6"],
"ignored_versions": ["2.7", "3.6", "3.8", "3.9", "3.10", "3.11"],
# Old samples are opted out of enforcing Python type hints
# All new samples should feature them
"enforce_type_hints": False,
Expand Down
Loading