# frozen_string_literal: true

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Auto-generated by gapic-generator-ruby. DO NOT EDIT!


module Google
  module Cloud
    module DiscoveryEngine
      module V1beta
        # A singleton resource of
        # {::Google::Cloud::DiscoveryEngine::V1beta::DataStore DataStore}. It's empty when
        # {::Google::Cloud::DiscoveryEngine::V1beta::DataStore DataStore} is created, which
        # defaults to digital parser. The first call to
        # [DataStoreService.UpdateDocumentProcessingConfig][] method will initialize
        # the config.
        # @!attribute [rw] name
        #   @return [::String]
        #     The full resource name of the Document Processing Config.
        #     Format:
        #     `projects/*/locations/*/collections/*/dataStores/*/documentProcessingConfig`.
        # @!attribute [rw] chunking_config
        #   @return [::Google::Cloud::DiscoveryEngine::V1beta::DocumentProcessingConfig::ChunkingConfig]
        #     Whether chunking mode is enabled.
        # @!attribute [rw] default_parsing_config
        #   @return [::Google::Cloud::DiscoveryEngine::V1beta::DocumentProcessingConfig::ParsingConfig]
        #     Configurations for default Document parser.
        #     If not specified, we will configure it as default DigitalParsingConfig, and
        #     the default parsing config will be applied to all file types for Document
        #     parsing.
        # @!attribute [rw] parsing_config_overrides
        #   @return [::Google::Protobuf::Map{::String => ::Google::Cloud::DiscoveryEngine::V1beta::DocumentProcessingConfig::ParsingConfig}]
        #     Map from file type to override the default parsing configuration based on
        #     the file type. Supported keys:
        #
        #     * `pdf`: Override parsing config for PDF files, either digital parsing, ocr
        #     parsing or layout parsing is supported.
        #     * `html`: Override parsing config for HTML files, only digital parsing and
        #     layout parsing are supported.
        #     * `docx`: Override parsing config for DOCX files, only digital parsing and
        #     layout parsing are supported.
        #     * `pptx`: Override parsing config for PPTX files, only digital parsing and
        #     layout parsing are supported.
        class DocumentProcessingConfig
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods

          # Configuration for chunking config.
          # @!attribute [rw] layout_based_chunking_config
          #   @return [::Google::Cloud::DiscoveryEngine::V1beta::DocumentProcessingConfig::ChunkingConfig::LayoutBasedChunkingConfig]
          #     Configuration for the layout based chunking.
          class ChunkingConfig
            include ::Google::Protobuf::MessageExts
            extend ::Google::Protobuf::MessageExts::ClassMethods

            # Configuration for the layout based chunking.
            # @!attribute [rw] chunk_size
            #   @return [::Integer]
            #     The token size limit for each chunk.
            #
            #     Supported values: 100-500 (inclusive).
            #     Default value: 500.
            # @!attribute [rw] include_ancestor_headings
            #   @return [::Boolean]
            #     Whether to include appending different levels of headings to chunks
            #     from the middle of the document to prevent context loss.
            #
            #     Default value: False.
            class LayoutBasedChunkingConfig
              include ::Google::Protobuf::MessageExts
              extend ::Google::Protobuf::MessageExts::ClassMethods
            end
          end

          # Related configurations applied to a specific type of document parser.
          # @!attribute [rw] digital_parsing_config
          #   @return [::Google::Cloud::DiscoveryEngine::V1beta::DocumentProcessingConfig::ParsingConfig::DigitalParsingConfig]
          #     Configurations applied to digital parser.
          # @!attribute [rw] ocr_parsing_config
          #   @return [::Google::Cloud::DiscoveryEngine::V1beta::DocumentProcessingConfig::ParsingConfig::OcrParsingConfig]
          #     Configurations applied to OCR parser. Currently it only applies to
          #     PDFs.
          # @!attribute [rw] layout_parsing_config
          #   @return [::Google::Cloud::DiscoveryEngine::V1beta::DocumentProcessingConfig::ParsingConfig::LayoutParsingConfig]
          #     Configurations applied to layout parser.
          class ParsingConfig
            include ::Google::Protobuf::MessageExts
            extend ::Google::Protobuf::MessageExts::ClassMethods

            # The digital parsing configurations for documents.
            class DigitalParsingConfig
              include ::Google::Protobuf::MessageExts
              extend ::Google::Protobuf::MessageExts::ClassMethods
            end

            # The OCR parsing configurations for documents.
            # @!attribute [rw] enhanced_document_elements
            #   @deprecated This field is deprecated and may be removed in the next major version update.
            #   @return [::Array<::String>]
            #     [DEPRECATED] This field is deprecated. To use the additional enhanced
            #     document elements processing, please switch to `layout_parsing_config`.
            # @!attribute [rw] use_native_text
            #   @return [::Boolean]
            #     If true, will use native text instead of OCR text on pages containing
            #     native text.
            class OcrParsingConfig
              include ::Google::Protobuf::MessageExts
              extend ::Google::Protobuf::MessageExts::ClassMethods
            end

            # The layout parsing configurations for documents.
            class LayoutParsingConfig
              include ::Google::Protobuf::MessageExts
              extend ::Google::Protobuf::MessageExts::ClassMethods
            end
          end

          # @!attribute [rw] key
          #   @return [::String]
          # @!attribute [rw] value
          #   @return [::Google::Cloud::DiscoveryEngine::V1beta::DocumentProcessingConfig::ParsingConfig]
          class ParsingConfigOverridesEntry
            include ::Google::Protobuf::MessageExts
            extend ::Google::Protobuf::MessageExts::ClassMethods
          end
        end
      end
    end
  end
end