# frozen_string_literal: true # Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Auto-generated by gapic-generator-ruby. DO NOT EDIT! module Google module Cloud module DocumentAI module V1 # Document represents the canonical document resource in Document AI. It is an # interchange format that provides insights into documents and allows for # collaboration between users and Document AI to iterate and optimize for # quality. # @!attribute [rw] uri # @return [::String] # Optional. Currently supports Google Cloud Storage URI of the form # `gs://bucket_name/object_name`. Object versioning is not supported. # For more information, refer to [Google Cloud Storage Request # URIs](https://cloud.google.com/storage/docs/reference-uris). # @!attribute [rw] content # @return [::String] # Optional. Inline document content, represented as a stream of bytes. # Note: As with all `bytes` fields, protobuffers use a pure binary # representation, whereas JSON representations use base64. # @!attribute [rw] mime_type # @return [::String] # An IANA published [media type (MIME # type)](https://www.iana.org/assignments/media-types/media-types.xhtml). # @!attribute [rw] text # @return [::String] # Optional. UTF-8 encoded text in reading order from the document. # @!attribute [rw] text_styles # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Style>] # Styles for the {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. # @!attribute [rw] pages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page>] # Visual page layout for the {::Google::Cloud::DocumentAI::V1::Document Document}. # @!attribute [rw] entities # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Entity>] # A list of entities detected on # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. For document # shards, entities in this list may cross shard boundaries. # @!attribute [rw] entity_relations # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::EntityRelation>] # Placeholder. Relationship among # {::Google::Cloud::DocumentAI::V1::Document#entities Document.entities}. # @!attribute [rw] text_changes # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::TextChange>] # Placeholder. A list of text corrections made to # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. This is usually # used for annotating corrections to OCR mistakes. Text changes for a given # revision may not overlap with each other. # @!attribute [rw] shard_info # @return [::Google::Cloud::DocumentAI::V1::Document::ShardInfo] # Information about the sharding if this document is sharded part of a larger # document. If the document is not sharded, this message is not specified. # @!attribute [rw] error # @return [::Google::Rpc::Status] # Any error that occurred while processing this document. # @!attribute [rw] revisions # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Revision>] # Placeholder. Revision history of this document. class Document include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # For a large document, sharding may be performed to produce several # document shards. Each document shard contains this field to detail which # shard it is. # @!attribute [rw] shard_index # @return [::Integer] # The 0-based index of this shard. # @!attribute [rw] shard_count # @return [::Integer] # Total number of shards. # @!attribute [rw] text_offset # @return [::Integer] # The index of the first character in # {::Google::Cloud::DocumentAI::V1::Document#text Document.text} in the overall # document global text. class ShardInfo include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Annotation for common text style attributes. This adheres to CSS # conventions as much as possible. # @!attribute [rw] text_anchor # @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor] # Text anchor indexing into the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. # @!attribute [rw] color # @return [::Google::Type::Color] # Text color. # @!attribute [rw] background_color # @return [::Google::Type::Color] # Text background color. # @!attribute [rw] font_weight # @return [::String] # [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp). # Possible values are `normal`, `bold`, `bolder`, and `lighter`. # @!attribute [rw] text_style # @return [::String] # [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp). # Possible values are `normal`, `italic`, and `oblique`. # @!attribute [rw] text_decoration # @return [::String] # [Text # decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp). # Follows CSS standard. # # @!attribute [rw] font_size # @return [::Google::Cloud::DocumentAI::V1::Document::Style::FontSize] # Font size. # @!attribute [rw] font_family # @return [::String] # Font family such as `Arial`, `Times New Roman`. # https://www.w3schools.com/cssref/pr_font_font-family.asp class Style include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Font size with unit. # @!attribute [rw] size # @return [::Float] # Font size for the text. # @!attribute [rw] unit # @return [::String] # Unit for the font size. Follows CSS naming (such as `in`, `px`, and # `pt`). class FontSize include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end # A page in a {::Google::Cloud::DocumentAI::V1::Document Document}. # @!attribute [rw] page_number # @return [::Integer] # 1-based index for current # {::Google::Cloud::DocumentAI::V1::Document::Page Page} in a parent # {::Google::Cloud::DocumentAI::V1::Document Document}. Useful when a page is # taken out of a {::Google::Cloud::DocumentAI::V1::Document Document} for # individual processing. # @!attribute [rw] image # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Image] # Rendered image for this page. This image is preprocessed to remove any # skew, rotation, and distortions such that the annotation bounding boxes # can be upright and axis-aligned. # @!attribute [rw] transforms # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Matrix>] # Transformation matrices that were applied to the original document image # to produce {::Google::Cloud::DocumentAI::V1::Document::Page#image Page.image}. # @!attribute [rw] dimension # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Dimension] # Physical dimension of the page. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the page. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. # @!attribute [rw] blocks # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Block>] # A list of visually detected text blocks on the page. # A block has a set of lines (collected into paragraphs) that have a common # line-spacing and orientation. # @!attribute [rw] paragraphs # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Paragraph>] # A list of visually detected text paragraphs on the page. # A collection of lines that a human would perceive as a paragraph. # @!attribute [rw] lines # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Line>] # A list of visually detected text lines on the page. # A collection of tokens that a human would perceive as a line. # @!attribute [rw] tokens # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Token>] # A list of visually detected tokens on the page. # @!attribute [rw] visual_elements # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement>] # A list of detected non-text visual elements e.g. checkbox, # signature etc. on the page. # @!attribute [rw] tables # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Table>] # A list of visually detected tables on the page. # @!attribute [rw] form_fields # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::FormField>] # A list of visually detected form fields on the page. # @!attribute [rw] symbols # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Symbol>] # A list of visually detected symbols on the page. # @!attribute [rw] detected_barcodes # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode>] # A list of detected barcodes. # @!attribute [rw] image_quality_scores # @return [::Google::Cloud::DocumentAI::V1::Document::Page::ImageQualityScores] # Image quality scores. # @!attribute [rw] provenance # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance] # The history of this page. class Page include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Dimension for the page. # @!attribute [rw] width # @return [::Float] # Page width. # @!attribute [rw] height # @return [::Float] # Page height. # @!attribute [rw] unit # @return [::String] # Dimension unit. class Dimension include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Rendered image contents for this page. # @!attribute [rw] content # @return [::String] # Raw byte content of the image. # @!attribute [rw] mime_type # @return [::String] # Encoding [media type (MIME # type)](https://www.iana.org/assignments/media-types/media-types.xhtml) # for the image. # @!attribute [rw] width # @return [::Integer] # Width of the image in pixels. # @!attribute [rw] height # @return [::Integer] # Height of the image in pixels. class Image include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Representation for transformation matrix, intended to be compatible and # used with OpenCV format for image manipulation. # @!attribute [rw] rows # @return [::Integer] # Number of rows in the matrix. # @!attribute [rw] cols # @return [::Integer] # Number of columns in the matrix. # @!attribute [rw] type # @return [::Integer] # This encodes information about what data type the matrix uses. # For example, 0 (CV_8U) is an unsigned 8-bit image. For the full list # of OpenCV primitive data types, please refer to # https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html # @!attribute [rw] data # @return [::String] # The matrix data. class Matrix include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Visual element describing a layout unit on a page. # @!attribute [rw] text_anchor # @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor] # Text anchor indexing into the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. # @!attribute [rw] confidence # @return [::Float] # Confidence of the current # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} within # context of the object this layout is for. e.g. confidence can be for a # single token, a table, a visual element, etc. depending on context. # Range `[0, 1]`. # @!attribute [rw] bounding_poly # @return [::Google::Cloud::DocumentAI::V1::BoundingPoly] # The bounding polygon for the # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout}. # @!attribute [rw] orientation # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout::Orientation] # Detected orientation for the # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout}. class Layout include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Detected human reading orientation. module Orientation # Unspecified orientation. ORIENTATION_UNSPECIFIED = 0 # Orientation is aligned with page up. PAGE_UP = 1 # Orientation is aligned with page right. # Turn the head 90 degrees clockwise from upright to read. PAGE_RIGHT = 2 # Orientation is aligned with page down. # Turn the head 180 degrees from upright to read. PAGE_DOWN = 3 # Orientation is aligned with page left. # Turn the head 90 degrees counterclockwise from upright to read. PAGE_LEFT = 4 end end # A block has a set of lines (collected into paragraphs) that have a # common line-spacing and orientation. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::Block Block}. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. # @!attribute [rw] provenance # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance] # The history of this annotation. class Block include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # A collection of lines that a human would perceive as a paragraph. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::Paragraph Paragraph}. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. # @!attribute [rw] provenance # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance] # The history of this annotation. class Paragraph include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # A collection of tokens that a human would perceive as a line. # Does not cross column boundaries, can be horizontal, vertical, etc. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::Line Line}. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. # @!attribute [rw] provenance # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance] # The history of this annotation. class Line include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # A detected token. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}. # @!attribute [rw] detected_break # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::DetectedBreak] # Detected break at the end of a # {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. # @!attribute [rw] provenance # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance] # The history of this annotation. # @!attribute [rw] style_info # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo] # Text style attributes. class Token include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Detected break at the end of a # {::Google::Cloud::DocumentAI::V1::Document::Page::Token Token}. # @!attribute [rw] type # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Token::DetectedBreak::Type] # Detected break type. class DetectedBreak include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Enum to denote the type of break found. module Type # Unspecified break type. TYPE_UNSPECIFIED = 0 # A single whitespace. SPACE = 1 # A wider whitespace. WIDE_SPACE = 2 # A hyphen that indicates that a token has been split across lines. HYPHEN = 3 end end # Font and other text style attributes. # @!attribute [rw] font_size # @return [::Integer] # Font size in points (`1` point is `¹⁄₇₂` inches). # @!attribute [rw] pixel_font_size # @return [::Float] # Font size in pixels, equal to _unrounded # {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_size font_size}_ # * _resolution_ ÷ `72.0`. # @!attribute [rw] letter_spacing # @return [::Float] # Letter spacing in points. # @!attribute [rw] font_type # @return [::String] # Name or style of the font. # @!attribute [rw] bold # @return [::Boolean] # Whether the text is bold (equivalent to # {::Google::Cloud::DocumentAI::V1::Document::Page::Token::StyleInfo#font_weight font_weight} # is at least `700`). # @!attribute [rw] italic # @return [::Boolean] # Whether the text is italic. # @!attribute [rw] underlined # @return [::Boolean] # Whether the text is underlined. # @!attribute [rw] strikeout # @return [::Boolean] # Whether the text is strikethrough. # @!attribute [rw] subscript # @return [::Boolean] # Whether the text is a subscript. # @!attribute [rw] superscript # @return [::Boolean] # Whether the text is a superscript. # @!attribute [rw] smallcaps # @return [::Boolean] # Whether the text is in small caps. # @!attribute [rw] font_weight # @return [::Integer] # TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy). # Normal is `400`, bold is `700`. # @!attribute [rw] handwritten # @return [::Boolean] # Whether the text is handwritten. # @!attribute [rw] text_color # @return [::Google::Type::Color] # Color of the text. # @!attribute [rw] background_color # @return [::Google::Type::Color] # Color of the background. class StyleInfo include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end # A detected symbol. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::Symbol Symbol}. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. class Symbol include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Detected non-text visual elements e.g. checkbox, signature etc. on the # page. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement VisualElement}. # @!attribute [rw] type # @return [::String] # Type of the # {::Google::Cloud::DocumentAI::V1::Document::Page::VisualElement VisualElement}. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. class VisualElement include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # A table representation similar to HTML table structure. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::Table Table}. # @!attribute [rw] header_rows # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableRow>] # Header rows of the table. # @!attribute [rw] body_rows # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableRow>] # Body rows of the table. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. # @!attribute [rw] provenance # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance] # The history of this table. class Table include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # A row of table cells. # @!attribute [rw] cells # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableCell>] # Cells that make up this row. class TableRow include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # A cell representation inside the table. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::Table::TableCell TableCell}. # @!attribute [rw] row_span # @return [::Integer] # How many rows this cell spans. # @!attribute [rw] col_span # @return [::Integer] # How many columns this cell spans. # @!attribute [rw] detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages together with confidence. class TableCell include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end # A form field detected on the page. # @!attribute [rw] field_name # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the # {::Google::Cloud::DocumentAI::V1::Document::Page::FormField FormField} name. # e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc. # @!attribute [rw] field_value # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for the # {::Google::Cloud::DocumentAI::V1::Document::Page::FormField FormField} value. # @!attribute [rw] name_detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages for name together with confidence. # @!attribute [rw] value_detected_languages # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::DetectedLanguage>] # A list of detected languages for value together with confidence. # @!attribute [rw] value_type # @return [::String] # If the value is non-textual, this field represents the type. Current # valid values are: # # - blank (this indicates the `field_value` is normal text) # - `unfilled_checkbox` # - `filled_checkbox` # @!attribute [rw] corrected_key_text # @return [::String] # Created for Labeling UI to export key text. # If corrections were made to the text identified by the # `field_name.text_anchor`, this field will contain the correction. # @!attribute [rw] corrected_value_text # @return [::String] # Created for Labeling UI to export value text. # If corrections were made to the text identified by the # `field_value.text_anchor`, this field will contain the correction. # @!attribute [rw] provenance # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance] # The history of this annotation. class FormField include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # A detected barcode. # @!attribute [rw] layout # @return [::Google::Cloud::DocumentAI::V1::Document::Page::Layout] # {::Google::Cloud::DocumentAI::V1::Document::Page::Layout Layout} for # {::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode DetectedBarcode}. # @!attribute [rw] barcode # @return [::Google::Cloud::DocumentAI::V1::Barcode] # Detailed barcode information of the # {::Google::Cloud::DocumentAI::V1::Document::Page::DetectedBarcode DetectedBarcode}. class DetectedBarcode include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Detected language for a structural component. # @!attribute [rw] language_code # @return [::String] # The [BCP-47 language # code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier), # such as `en-US` or `sr-Latn`. # @!attribute [rw] confidence # @return [::Float] # Confidence of detected language. Range `[0, 1]`. class DetectedLanguage include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Image quality scores for the page image. # @!attribute [rw] quality_score # @return [::Float] # The overall quality score. Range `[0, 1]` where `1` is perfect quality. # @!attribute [rw] detected_defects # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Page::ImageQualityScores::DetectedDefect>] # A list of detected defects. class ImageQualityScores include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Image Quality Defects # @!attribute [rw] type # @return [::String] # Name of the defect type. Supported values are: # # - `quality/defect_blurry` # - `quality/defect_noisy` # - `quality/defect_dark` # - `quality/defect_faint` # - `quality/defect_text_too_small` # - `quality/defect_document_cutoff` # - `quality/defect_text_cutoff` # - `quality/defect_glare` # @!attribute [rw] confidence # @return [::Float] # Confidence of detected defect. Range `[0, 1]` where `1` indicates # strong confidence that the defect exists. class DetectedDefect include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end end # An entity that could be a phrase in the text or a property that belongs to # the document. It is a known entity type, such as a person, an organization, # or location. # @!attribute [rw] text_anchor # @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor] # Optional. Provenance of the entity. # Text anchor indexing into the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. # @!attribute [rw] type # @return [::String] # Required. Entity type from a schema e.g. `Address`. # @!attribute [rw] mention_text # @return [::String] # Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`. # @!attribute [rw] mention_id # @return [::String] # Optional. Deprecated. Use `id` field instead. # @!attribute [rw] confidence # @return [::Float] # Optional. Confidence of detected Schema entity. Range `[0, 1]`. # @!attribute [rw] page_anchor # @return [::Google::Cloud::DocumentAI::V1::Document::PageAnchor] # Optional. Represents the provenance of this entity wrt. the location on # the page where it was found. # @!attribute [rw] id # @return [::String] # Optional. Canonical id. This will be a unique value in the entity list # for this document. # @!attribute [rw] normalized_value # @return [::Google::Cloud::DocumentAI::V1::Document::Entity::NormalizedValue] # Optional. Normalized entity value. Absent if the extracted value could # not be converted or the type (e.g. address) is not supported for certain # parsers. This field is also only populated for certain supported document # types. # @!attribute [rw] properties # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Entity>] # Optional. Entities can be nested to form a hierarchical data structure # representing the content in the document. # @!attribute [rw] provenance # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance] # Optional. The history of this annotation. # @!attribute [rw] redacted # @return [::Boolean] # Optional. Whether the entity will be redacted for de-identification # purposes. class Entity include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Parsed and normalized entity value. # @!attribute [rw] money_value # @return [::Google::Type::Money] # Money value. See also: # https://github.com/googleapis/googleapis/blob/master/google/type/money.proto # @!attribute [rw] date_value # @return [::Google::Type::Date] # Date value. Includes year, month, day. See also: # https://github.com/googleapis/googleapis/blob/master/google/type/date.proto # @!attribute [rw] datetime_value # @return [::Google::Type::DateTime] # DateTime value. Includes date, time, and timezone. See also: # https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto # @!attribute [rw] address_value # @return [::Google::Type::PostalAddress] # Postal address. See also: # https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto # @!attribute [rw] boolean_value # @return [::Boolean] # Boolean value. Can be used for entities with binary values, or for # checkboxes. # @!attribute [rw] integer_value # @return [::Integer] # Integer value. # @!attribute [rw] float_value # @return [::Float] # Float value. # @!attribute [rw] text # @return [::String] # Optional. An optional field to store a normalized string. # For some entity types, one of respective `structured_value` fields may # also be populated. Also not all the types of `structured_value` will be # normalized. For example, some processors may not generate `float` # or `integer` normalized text by default. # # Below are sample formats mapped to structured values. # # - Money/Currency type (`money_value`) is in the ISO 4217 text format. # - Date type (`date_value`) is in the ISO 8601 text format. # - Datetime type (`datetime_value`) is in the ISO 8601 text format. class NormalizedValue include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end # Relationship between # {::Google::Cloud::DocumentAI::V1::Document::Entity Entities}. # @!attribute [rw] subject_id # @return [::String] # Subject entity id. # @!attribute [rw] object_id # @return [::String] # Object entity id. # @!attribute [rw] relation # @return [::String] # Relationship description. class EntityRelation include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Text reference indexing into the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. # @!attribute [rw] text_segments # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment>] # The text segments from the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. # @!attribute [rw] content # @return [::String] # Contains the content of the text span so that users do # not have to look it up in the text_segments. It is always # populated for formFields. class TextAnchor include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # A text segment in the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. The indices # may be out of bounds which indicate that the text extends into another # document shard for large sharded documents. See # {::Google::Cloud::DocumentAI::V1::Document::ShardInfo#text_offset ShardInfo.text_offset} # @!attribute [rw] start_index # @return [::Integer] # {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment} # start UTF-8 char index in the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. # @!attribute [rw] end_index # @return [::Integer] # {::Google::Cloud::DocumentAI::V1::Document::TextAnchor::TextSegment TextSegment} # half open end UTF-8 char index in the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. class TextSegment include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end # Referencing the visual context of the entity in the # {::Google::Cloud::DocumentAI::V1::Document#pages Document.pages}. Page anchors # can be cross-page, consist of multiple bounding polygons and optionally # reference specific layout element types. # @!attribute [rw] page_refs # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef>] # One or more references to visual page elements class PageAnchor include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Represents a weak reference to a page element within a document. # @!attribute [rw] page # @return [::Integer] # Required. Index into the # {::Google::Cloud::DocumentAI::V1::Document#pages Document.pages} element, # for example using # `[Document.pages][page_refs.page]` to locate the related page element. # This field is skipped when its value is the default `0`. See # https://developers.google.com/protocol-buffers/docs/proto3#json. # @!attribute [rw] layout_type # @return [::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef::LayoutType] # Optional. The type of the layout element that is being referenced if # any. # @!attribute [rw] layout_id # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::String] # Optional. Deprecated. Use # {::Google::Cloud::DocumentAI::V1::Document::PageAnchor::PageRef#bounding_poly PageRef.bounding_poly} # instead. # @!attribute [rw] bounding_poly # @return [::Google::Cloud::DocumentAI::V1::BoundingPoly] # Optional. Identifies the bounding polygon of a layout element on the # page. If `layout_type` is set, the bounding polygon must be exactly the # same to the layout element it's referring to. # @!attribute [rw] confidence # @return [::Float] # Optional. Confidence of detected page element, if applicable. Range # `[0, 1]`. class PageRef include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # The type of layout that is being referenced. module LayoutType # Layout Unspecified. LAYOUT_TYPE_UNSPECIFIED = 0 # References a # {::Google::Cloud::DocumentAI::V1::Document::Page#blocks Page.blocks} # element. BLOCK = 1 # References a # {::Google::Cloud::DocumentAI::V1::Document::Page#paragraphs Page.paragraphs} # element. PARAGRAPH = 2 # References a # {::Google::Cloud::DocumentAI::V1::Document::Page#lines Page.lines} element. LINE = 3 # References a # {::Google::Cloud::DocumentAI::V1::Document::Page#tokens Page.tokens} # element. TOKEN = 4 # References a # {::Google::Cloud::DocumentAI::V1::Document::Page#visual_elements Page.visual_elements} # element. VISUAL_ELEMENT = 5 # Refrrences a # {::Google::Cloud::DocumentAI::V1::Document::Page#tables Page.tables} # element. TABLE = 6 # References a # {::Google::Cloud::DocumentAI::V1::Document::Page#form_fields Page.form_fields} # element. FORM_FIELD = 7 end end end # Structure to identify provenance relationships between annotations in # different revisions. # @!attribute [rw] revision # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Integer] # The index of the revision that produced this element. # @!attribute [rw] id # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Integer] # The Id of this operation. Needs to be unique within the scope of the # revision. # @!attribute [rw] parents # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Provenance::Parent>] # References to the original elements that are replaced. # @!attribute [rw] type # @return [::Google::Cloud::DocumentAI::V1::Document::Provenance::OperationType] # The type of provenance operation. class Provenance include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # The parent element the current element is based on. Used for # referencing/aligning, removal and replacement operations. # @!attribute [rw] revision # @return [::Integer] # The index of the index into current revision's parent_ids list. # @!attribute [rw] index # @return [::Integer] # The index of the parent item in the corresponding item list (eg. list # of entities, properties within entities, etc.) in the parent revision. # @!attribute [rw] id # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Integer] # The id of the parent provenance. class Parent include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # If a processor or agent does an explicit operation on existing elements. module OperationType # Operation type unspecified. If no operation is specified a provenance # entry is simply used to match against a `parent`. OPERATION_TYPE_UNSPECIFIED = 0 # Add an element. ADD = 1 # Remove an element identified by `parent`. REMOVE = 2 # Updates any fields within the given provenance scope of the message. It # overwrites the fields rather than replacing them. Use this when you # want to update a field value of an entity without also updating all the # child properties. UPDATE = 7 # Currently unused. Replace an element identified by `parent`. REPLACE = 3 # Deprecated. Request human review for the element identified by # `parent`. EVAL_REQUESTED = 4 # Deprecated. Element is reviewed and approved at human review, # confidence will be set to 1.0. EVAL_APPROVED = 5 # Deprecated. Element is skipped in the validation process. EVAL_SKIPPED = 6 end end # Contains past or forward revisions of this document. # @!attribute [rw] agent # @return [::String] # If the change was made by a person specify the name or id of that # person. # @!attribute [rw] processor # @return [::String] # If the annotation was made by processor identify the processor by its # resource name. # @!attribute [rw] id # @return [::String] # Id of the revision, internally generated by doc proto storage. # Unique within the context of the document. # @!attribute [rw] parent # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Array<::Integer>] # The revisions that this revision is based on. This can include one or # more parent (when documents are merged.) This field represents the # index into the `revisions` field. # @!attribute [rw] parent_ids # @return [::Array<::String>] # The revisions that this revision is based on. Must include all the ids # that have anything to do with this revision - eg. there are # `provenance.parent.revision` fields that index into this field. # @!attribute [rw] create_time # @return [::Google::Protobuf::Timestamp] # The time that the revision was created, internally generated by # doc proto storage at the time of create. # @!attribute [rw] human_review # @return [::Google::Cloud::DocumentAI::V1::Document::Revision::HumanReview] # Human Review information of this revision. class Revision include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # Human Review information of the document. # @!attribute [rw] state # @return [::String] # Human review state. e.g. `requested`, `succeeded`, `rejected`. # @!attribute [rw] state_message # @return [::String] # A message providing more details about the current state of processing. # For example, the rejection reason when the state is `rejected`. class HumanReview include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end # This message is used for text changes aka. OCR corrections. # @!attribute [rw] text_anchor # @return [::Google::Cloud::DocumentAI::V1::Document::TextAnchor] # Provenance of the correction. # Text anchor indexing into the # {::Google::Cloud::DocumentAI::V1::Document#text Document.text}. There can # only be a single `TextAnchor.text_segments` element. If the start and # end index of the text segment are the same, the text change is inserted # before that index. # @!attribute [rw] changed_text # @return [::String] # The text that replaces the text identified in the `text_anchor`. # @!attribute [rw] provenance # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Array<::Google::Cloud::DocumentAI::V1::Document::Provenance>] # The history of this annotation. class TextChange include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end end end end end