Skip to content

Commit 8523898

Browse files
fix(documentai): refactor sample 'quickstart' (#13258)
- Move the creation and deletion of the Processor to a fixture not in the sample - Cleanup test based on David Cavazos advice - Refactor sample to make it consistent with other languages in https://cloud.google.com/document-ai/docs/process-documents-client-libraries#using_the_client_library
1 parent 46a0c73 commit 8523898

File tree

2 files changed

+65
-39
lines changed

2 files changed

+65
-39
lines changed

documentai/snippets/quickstart_sample.py

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,52 +13,48 @@
1313
# limitations under the License.
1414

1515
from google.cloud.documentai_v1.types.document import Document
16-
from google.cloud.documentai_v1.types.processor import Processor
1716

1817

1918
def quickstart(
2019
project_id: str,
20+
processor_id: str,
2121
location: str,
2222
file_path: str,
23-
processor_display_name: str,
24-
) -> tuple[Processor, Document]:
23+
) -> Document:
2524
# [START documentai_quickstart]
2625
from google.api_core.client_options import ClientOptions
27-
from google.cloud import documentai_v1 # type: ignore
26+
from google.cloud import documentai_v1
27+
28+
# TODO(developer): Create a processor of type "OCR_PROCESSOR".
2829

2930
# TODO(developer): Update and uncomment these variables before running the sample.
3031
# project_id = "MY_PROJECT_ID"
3132

33+
# Processor ID as hexadecimal characters.
34+
# Not to be confused with the Processor Display Name.
35+
# processor_id = "MY_PROCESSOR_ID"
36+
3237
# Processor location. For example: "us" or "eu".
3338
# location = "MY_PROCESSOR_LOCATION"
3439

3540
# Path for file to process.
3641
# file_path = "/path/to/local/pdf"
3742

38-
# Processor display name must be unique per project.
39-
# processor_display_name = "MY_PROCESSOR_DISPLAY_NAME"
40-
4143
# Set `api_endpoint` if you use a location other than "us".
4244
opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
4345

4446
# Initialize Document AI client.
4547
client = documentai_v1.DocumentProcessorServiceClient(client_options=opts)
4648

47-
# Get the full resource name of the location.
48-
# For example: `projects/{project_id}/locations/{location}`
49-
parent = client.common_location_path(project_id, location)
50-
51-
# Create a Processor.
52-
# For available types, refer to https://cloud.google.com/document-ai/docs/create-processor
53-
processor = client.create_processor(
54-
parent=parent,
55-
processor=documentai_v1.Processor(
56-
type_="OCR_PROCESSOR",
57-
display_name=processor_display_name,
58-
),
59-
)
49+
# Get the Fully-qualified Processor path.
50+
full_processor_name = client.processor_path(project_id, location, processor_id)
6051

61-
# Print the processor information.
52+
# Get a Processor reference.
53+
request = documentai_v1.GetProcessorRequest(name=full_processor_name)
54+
processor = client.get_processor(request=request)
55+
56+
# `processor.name` is the full resource name of the processor.
57+
# For example: `projects/{project_id}/locations/{location}/processors/{processor_id}`
6258
print(f"Processor Name: {processor.name}")
6359

6460
# Read the file into memory.
@@ -72,11 +68,8 @@ def quickstart(
7268
mime_type="application/pdf",
7369
)
7470

75-
# Configure the process request.
76-
# `processor.name` is the full resource name of the processor,
77-
# For example: `projects/{project_id}/locations/{location}/processors/{processor_id}`
71+
# Send a request and get the processed document.
7872
request = documentai_v1.ProcessRequest(name=processor.name, raw_document=raw_document)
79-
8073
result = client.process_document(request=request)
8174
document = result.document
8275

@@ -87,4 +80,4 @@ def quickstart(
8780
print(document.text)
8881
# [END documentai_quickstart]
8982

90-
return processor, document
83+
return document

documentai/snippets/quickstart_sample_test.py

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,63 @@
2020
from google.api_core.client_options import ClientOptions
2121
from google.cloud import documentai_v1
2222

23+
from google.cloud.documentai_v1.types.processor import Processor
24+
25+
import pytest
26+
2327
LOCATION = "us"
2428
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
25-
PROCESSOR_DISPLAY_NAME = f"test-processor-{uuid4()}"
2629
FILE_PATH = "resources/invoice.pdf"
2730

2831

29-
def test_quickstart() -> None:
30-
processor, document = quickstart_sample.quickstart(
31-
project_id=PROJECT_ID,
32-
location=LOCATION,
33-
processor_display_name=PROCESSOR_DISPLAY_NAME,
34-
file_path=FILE_PATH,
32+
@pytest.fixture(scope="module")
33+
def client() -> documentai_v1.DocumentProcessorServiceClient:
34+
opts = ClientOptions(api_endpoint=f"{LOCATION}-documentai.googleapis.com")
35+
36+
client = documentai_v1.DocumentProcessorServiceClient(client_options=opts)
37+
38+
return client
39+
40+
41+
@pytest.fixture(scope="module")
42+
def processor_id(client: documentai_v1.DocumentProcessorServiceClient) -> Processor:
43+
processor_display_name = f"test-processor-{uuid4()}"
44+
45+
# Get the full resource name of the location.
46+
# For example: `projects/{project_id}/locations/{location}`
47+
parent = client.common_location_path(PROJECT_ID, LOCATION)
48+
49+
# Create a Processor.
50+
# https://cloud.google.com/document-ai/docs/create-processor#available_processors
51+
processor = client.create_processor(
52+
parent=parent,
53+
processor=documentai_v1.Processor(
54+
type_="OCR_PROCESSOR",
55+
display_name=processor_display_name,
56+
),
3557
)
3658

37-
assert processor is not None
38-
assert "Invoice" in document.text
59+
# `processor.name` (Full Processor Path) has this form:
60+
# `projects/{project_id}/locations/{location}/processors/{processor_id}`
61+
# Return only the `processor_id` section.
62+
last_slash_index = processor.name.rfind('/')
63+
yield processor.name[last_slash_index + 1:]
3964

40-
# Delete created processor
65+
# Delete processor.
4166
client = documentai_v1.DocumentProcessorServiceClient(
4267
client_options=ClientOptions(
4368
api_endpoint=f"{LOCATION}-documentai.googleapis.com"
4469
)
4570
)
46-
operation = client.delete_processor(name=processor.name)
71+
client.delete_processor(name=processor.name)
72+
4773

48-
# Wait for operation to complete
49-
operation.result()
74+
def test_quickstart(processor_id: str) -> None:
75+
document = quickstart_sample.quickstart(
76+
project_id=PROJECT_ID,
77+
processor_id=processor_id,
78+
location=LOCATION,
79+
file_path=FILE_PATH,
80+
)
81+
82+
assert "Invoice" in document.text

0 commit comments

Comments
 (0)