You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docs/dyn/documentai_v1.projects.locations.processors.html
+36Lines changed: 36 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -151,6 +151,22 @@ <h3>Method Details</h3>
151
151
"gcsUriPrefix": "A String", # The URI prefix.
152
152
},
153
153
},
154
+
"processOptions": { # Options for Process API # Inference-time options for the process API
155
+
"ocrConfig": { # Config for Document OCR. # Only applicable to `OCR_PROCESSOR`. Returns error if set on other processor types.
156
+
"advancedOcrOptions": [ # A list of advanced OCR options to further fine-tune OCR behavior. Current valid values are: - `legacy_layout`: a heuristics layout detection algorithm, which serves as an alternative to the current ML-based layout detection algorithm. Customers can choose the best suitable layout algorithm based on their situation.
157
+
"A String",
158
+
],
159
+
"computeStyleInfo": True or False, # Turn on font id model and returns font style information.
160
+
"enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call.
161
+
"enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs.
162
+
"enableSymbol": True or False, # Includes symbol level OCR information if set to true.
163
+
"hints": { # Hints for OCR Engine # Hints for the OCR model.
164
+
"languageHints": [ # List of BCP-47 language codes to use for OCR. In most cases, not specifying it yields the best results since it enables automatic language detection. For languages based on the Latin alphabet, setting hints is not needed. In rare cases, when the language of the text in the image is known, setting a hint will help get better results (although it will be a significant hindrance if the hint is wrong).
165
+
"A String",
166
+
],
167
+
},
168
+
},
169
+
},
154
170
"skipHumanReview": True or False, # Whether human review should be skipped for this request. Default to `false`.
155
171
}
156
172
@@ -429,6 +445,10 @@ <h3>Method Details</h3>
429
445
430
446
{ # Request message for the ProcessDocument method.
431
447
"fieldMask": "A String", # Specifies which fields to include in the ProcessResponse.document output. Only supports top-level document and pages field, so it must be in the form of `{document_field_name}` or `pages.{page_field_name}`.
448
+
"gcsDocument": { # Specifies a document stored on Cloud Storage. # A raw document on Google Cloud Storage.
449
+
"gcsUri": "A String", # The Cloud Storage object uri.
450
+
"mimeType": "A String", # An IANA MIME type (RFC6838) of the content.
451
+
},
432
452
"inlineDocument": { # Document represents the canonical document resource in Document AI. It is an interchange format that provides insights into documents and allows for collaboration between users and Document AI to iterate and optimize for quality. # An inline document proto.
433
453
"content": "A String", # Optional. Inline document content, represented as a stream of bytes. Note: As with all `bytes` fields, protobuffers use a pure binary representation, whereas JSON representations use base64.
434
454
"entities": [ # A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.
@@ -1275,6 +1295,22 @@ <h3>Method Details</h3>
1275
1295
],
1276
1296
"uri": "A String", # Optional. Currently supports Google Cloud Storage URI of the form `gs://bucket_name/object_name`. Object versioning is not supported. For more information, refer to [Google Cloud Storage Request URIs](https://cloud.google.com/storage/docs/reference-uris).
1277
1297
},
1298
+
"processOptions": { # Options for Process API # Inference-time options for the process API
1299
+
"ocrConfig": { # Config for Document OCR. # Only applicable to `OCR_PROCESSOR`. Returns error if set on other processor types.
1300
+
"advancedOcrOptions": [ # A list of advanced OCR options to further fine-tune OCR behavior. Current valid values are: - `legacy_layout`: a heuristics layout detection algorithm, which serves as an alternative to the current ML-based layout detection algorithm. Customers can choose the best suitable layout algorithm based on their situation.
1301
+
"A String",
1302
+
],
1303
+
"computeStyleInfo": True or False, # Turn on font id model and returns font style information.
1304
+
"enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call.
1305
+
"enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs.
1306
+
"enableSymbol": True or False, # Includes symbol level OCR information if set to true.
1307
+
"hints": { # Hints for OCR Engine # Hints for the OCR model.
1308
+
"languageHints": [ # List of BCP-47 language codes to use for OCR. In most cases, not specifying it yields the best results since it enables automatic language detection. For languages based on the Latin alphabet, setting hints is not needed. In rare cases, when the language of the text in the image is known, setting a hint will help get better results (although it will be a significant hindrance if the hint is wrong).
1309
+
"A String",
1310
+
],
1311
+
},
1312
+
},
1313
+
},
1278
1314
"rawDocument": { # Payload message of raw document content (bytes). # A raw document content (bytes).
Copy file name to clipboardExpand all lines: docs/dyn/documentai_v1.projects.locations.processors.processorVersions.html
+36Lines changed: 36 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -146,6 +146,22 @@ <h3>Method Details</h3>
146
146
"gcsUriPrefix": "A String", # The URI prefix.
147
147
},
148
148
},
149
+
"processOptions": { # Options for Process API # Inference-time options for the process API
150
+
"ocrConfig": { # Config for Document OCR. # Only applicable to `OCR_PROCESSOR`. Returns error if set on other processor types.
151
+
"advancedOcrOptions": [ # A list of advanced OCR options to further fine-tune OCR behavior. Current valid values are: - `legacy_layout`: a heuristics layout detection algorithm, which serves as an alternative to the current ML-based layout detection algorithm. Customers can choose the best suitable layout algorithm based on their situation.
152
+
"A String",
153
+
],
154
+
"computeStyleInfo": True or False, # Turn on font id model and returns font style information.
155
+
"enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call.
156
+
"enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs.
157
+
"enableSymbol": True or False, # Includes symbol level OCR information if set to true.
158
+
"hints": { # Hints for OCR Engine # Hints for the OCR model.
159
+
"languageHints": [ # List of BCP-47 language codes to use for OCR. In most cases, not specifying it yields the best results since it enables automatic language detection. For languages based on the Latin alphabet, setting hints is not needed. In rare cases, when the language of the text in the image is known, setting a hint will help get better results (although it will be a significant hindrance if the hint is wrong).
160
+
"A String",
161
+
],
162
+
},
163
+
},
164
+
},
149
165
"skipHumanReview": True or False, # Whether human review should be skipped for this request. Default to `false`.
150
166
}
151
167
@@ -525,6 +541,10 @@ <h3>Method Details</h3>
525
541
526
542
{ # Request message for the ProcessDocument method.
527
543
"fieldMask": "A String", # Specifies which fields to include in the ProcessResponse.document output. Only supports top-level document and pages field, so it must be in the form of `{document_field_name}` or `pages.{page_field_name}`.
544
+
"gcsDocument": { # Specifies a document stored on Cloud Storage. # A raw document on Google Cloud Storage.
545
+
"gcsUri": "A String", # The Cloud Storage object uri.
546
+
"mimeType": "A String", # An IANA MIME type (RFC6838) of the content.
547
+
},
528
548
"inlineDocument": { # Document represents the canonical document resource in Document AI. It is an interchange format that provides insights into documents and allows for collaboration between users and Document AI to iterate and optimize for quality. # An inline document proto.
529
549
"content": "A String", # Optional. Inline document content, represented as a stream of bytes. Note: As with all `bytes` fields, protobuffers use a pure binary representation, whereas JSON representations use base64.
530
550
"entities": [ # A list of entities detected on Document.text. For document shards, entities in this list may cross shard boundaries.
@@ -1371,6 +1391,22 @@ <h3>Method Details</h3>
1371
1391
],
1372
1392
"uri": "A String", # Optional. Currently supports Google Cloud Storage URI of the form `gs://bucket_name/object_name`. Object versioning is not supported. For more information, refer to [Google Cloud Storage Request URIs](https://cloud.google.com/storage/docs/reference-uris).
1373
1393
},
1394
+
"processOptions": { # Options for Process API # Inference-time options for the process API
1395
+
"ocrConfig": { # Config for Document OCR. # Only applicable to `OCR_PROCESSOR`. Returns error if set on other processor types.
1396
+
"advancedOcrOptions": [ # A list of advanced OCR options to further fine-tune OCR behavior. Current valid values are: - `legacy_layout`: a heuristics layout detection algorithm, which serves as an alternative to the current ML-based layout detection algorithm. Customers can choose the best suitable layout algorithm based on their situation.
1397
+
"A String",
1398
+
],
1399
+
"computeStyleInfo": True or False, # Turn on font id model and returns font style information.
1400
+
"enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call.
1401
+
"enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs.
1402
+
"enableSymbol": True or False, # Includes symbol level OCR information if set to true.
1403
+
"hints": { # Hints for OCR Engine # Hints for the OCR model.
1404
+
"languageHints": [ # List of BCP-47 language codes to use for OCR. In most cases, not specifying it yields the best results since it enables automatic language detection. For languages based on the Latin alphabet, setting hints is not needed. In rare cases, when the language of the text in the image is known, setting a hint will help get better results (although it will be a significant hindrance if the hint is wrong).
1405
+
"A String",
1406
+
],
1407
+
},
1408
+
},
1409
+
},
1374
1410
"rawDocument": { # Payload message of raw document content (bytes). # A raw document content (bytes).
0 commit comments