# frozen_string_literal: true

# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Auto-generated by gapic-generator-ruby. DO NOT EDIT!


module Google
  module Cloud
    module Dataflow
      module V1beta3
        # Describes the environment in which a Dataflow Job runs.
        # @!attribute [rw] temp_storage_prefix
        #   @return [::String]
        #     The prefix of the resources the system should use for temporary
        #     storage.  The system will append the suffix "/temp-\\{JOBNAME} to
        #     this resource prefix, where \\{JOBNAME} is the value of the
        #     job_name field.  The resulting bucket and object prefix is used
        #     as the prefix of the resources used to store temporary data
        #     needed during the job execution.  NOTE: This will override the
        #     value in taskrunner_settings.
        #     The supported resource type is:
        #
        #     Google Cloud Storage:
        #
        #       storage.googleapis.com/\\{bucket}/\\{object}
        #       bucket.storage.googleapis.com/\\{object}
        # @!attribute [rw] cluster_manager_api_service
        #   @return [::String]
        #     The type of cluster manager API to use.  If unknown or
        #     unspecified, the service will attempt to choose a reasonable
        #     default.  This should be in the form of the API service name,
        #     e.g. "compute.googleapis.com".
        # @!attribute [rw] experiments
        #   @return [::Array<::String>]
        #     The list of experiments to enable. This field should be used for SDK
        #     related experiments and not for service related experiments. The proper
        #     field for service related experiments is service_options.
        # @!attribute [rw] service_options
        #   @return [::Array<::String>]
        #     The list of service options to enable. This field should be used for
        #     service related experiments only. These experiments, when graduating to GA,
        #     should be replaced by dedicated fields or become default (i.e. always on).
        # @!attribute [rw] service_kms_key_name
        #   @return [::String]
        #     If set, contains the Cloud KMS key identifier used to encrypt data
        #     at rest, AKA a Customer Managed Encryption Key (CMEK).
        #
        #     Format:
        #       projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
        # @!attribute [rw] worker_pools
        #   @return [::Array<::Google::Cloud::Dataflow::V1beta3::WorkerPool>]
        #     The worker pools. At least one "harness" worker pool must be
        #     specified in order for the job to have workers.
        # @!attribute [rw] user_agent
        #   @return [::Google::Protobuf::Struct]
        #     A description of the process that generated the request.
        # @!attribute [rw] version
        #   @return [::Google::Protobuf::Struct]
        #     A structure describing which components and their versions of the service
        #     are required in order to run the job.
        # @!attribute [rw] dataset
        #   @return [::String]
        #     The dataset for the current project where various workflow
        #     related tables are stored.
        #
        #     The supported resource type is:
        #
        #     Google BigQuery:
        #       bigquery.googleapis.com/\\{dataset}
        # @!attribute [rw] sdk_pipeline_options
        #   @return [::Google::Protobuf::Struct]
        #     The Cloud Dataflow SDK pipeline options specified by the user. These
        #     options are passed through the service and are used to recreate the
        #     SDK pipeline options on the worker in a language agnostic and platform
        #     independent way.
        # @!attribute [rw] internal_experiments
        #   @return [::Google::Protobuf::Any]
        #     Experimental settings.
        # @!attribute [rw] service_account_email
        #   @return [::String]
        #     Identity to run virtual machines as. Defaults to the default account.
        # @!attribute [rw] flex_resource_scheduling_goal
        #   @return [::Google::Cloud::Dataflow::V1beta3::FlexResourceSchedulingGoal]
        #     Which Flexible Resource Scheduling mode to run in.
        # @!attribute [rw] worker_region
        #   @return [::String]
        #     The Compute Engine region
        #     (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
        #     which worker processing should occur, e.g. "us-west1". Mutually exclusive
        #     with worker_zone. If neither worker_region nor worker_zone is specified,
        #     default to the control plane's region.
        # @!attribute [rw] worker_zone
        #   @return [::String]
        #     The Compute Engine zone
        #     (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
        #     which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
        #     with worker_region. If neither worker_region nor worker_zone is specified,
        #     a zone in the control plane's region is chosen based on available capacity.
        # @!attribute [r] shuffle_mode
        #   @return [::Google::Cloud::Dataflow::V1beta3::ShuffleMode]
        #     Output only. The shuffle mode used for the job.
        # @!attribute [rw] debug_options
        #   @return [::Google::Cloud::Dataflow::V1beta3::DebugOptions]
        #     Any debugging options to be supplied to the job.
        class Environment
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods
        end

        # The packages that must be installed in order for a worker to run the
        # steps of the Cloud Dataflow job that will be assigned to its worker
        # pool.
        #
        # This is the mechanism by which the Cloud Dataflow SDK causes code to
        # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
        # might use this to install jars containing the user's code and all of the
        # various dependencies (libraries, data files, etc.) required in order
        # for that code to run.
        # @!attribute [rw] name
        #   @return [::String]
        #     The name of the package.
        # @!attribute [rw] location
        #   @return [::String]
        #     The resource to read the package from. The supported resource type is:
        #
        #     Google Cloud Storage:
        #
        #       storage.googleapis.com/\\{bucket}
        #       bucket.storage.googleapis.com/
        class Package
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods
        end

        # Describes the data disk used by a workflow job.
        # @!attribute [rw] size_gb
        #   @return [::Integer]
        #     Size of disk in GB.  If zero or unspecified, the service will
        #     attempt to choose a reasonable default.
        # @!attribute [rw] disk_type
        #   @return [::String]
        #     Disk storage type, as defined by Google Compute Engine.  This
        #     must be a disk type appropriate to the project and zone in which
        #     the workers will run.  If unknown or unspecified, the service
        #     will attempt to choose a reasonable default.
        #
        #     For example, the standard persistent disk type is a resource name
        #     typically ending in "pd-standard".  If SSD persistent disks are
        #     available, the resource name typically ends with "pd-ssd".  The
        #     actual valid values are defined the Google Compute Engine API,
        #     not by the Cloud Dataflow API; consult the Google Compute Engine
        #     documentation for more information about determining the set of
        #     available disk types for a particular project and zone.
        #
        #     Google Compute Engine Disk types are local to a particular
        #     project in a particular zone, and so the resource name will
        #     typically look something like this:
        #
        #     compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
        # @!attribute [rw] mount_point
        #   @return [::String]
        #     Directory in a VM where disk is mounted.
        class Disk
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods
        end

        # Provides data to pass through to the worker harness.
        # @!attribute [rw] base_url
        #   @return [::String]
        #     The base URL for accessing Google Cloud APIs.
        #
        #     When workers access Google Cloud APIs, they logically do so via
        #     relative URLs.  If this field is specified, it supplies the base
        #     URL to use for resolving these relative URLs.  The normative
        #     algorithm used is defined by RFC 1808, "Relative Uniform Resource
        #     Locators".
        #
        #     If not specified, the default value is "http://www.googleapis.com/"
        # @!attribute [rw] reporting_enabled
        #   @return [::Boolean]
        #     Whether to send work progress updates to the service.
        # @!attribute [rw] service_path
        #   @return [::String]
        #     The Cloud Dataflow service path relative to the root URL, for example,
        #     "dataflow/v1b3/projects".
        # @!attribute [rw] shuffle_service_path
        #   @return [::String]
        #     The Shuffle service path relative to the root URL, for example,
        #     "shuffle/v1beta1".
        # @!attribute [rw] worker_id
        #   @return [::String]
        #     The ID of the worker running this pipeline.
        # @!attribute [rw] temp_storage_prefix
        #   @return [::String]
        #     The prefix of the resources the system should use for temporary
        #     storage.
        #
        #     The supported resource type is:
        #
        #     Google Cloud Storage:
        #
        #       storage.googleapis.com/\\{bucket}/\\{object}
        #       bucket.storage.googleapis.com/\\{object}
        class WorkerSettings
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods
        end

        # Taskrunner configuration settings.
        # @!attribute [rw] task_user
        #   @return [::String]
        #     The UNIX user ID on the worker VM to use for tasks launched by
        #     taskrunner; e.g. "root".
        # @!attribute [rw] task_group
        #   @return [::String]
        #     The UNIX group ID on the worker VM to use for tasks launched by
        #     taskrunner; e.g. "wheel".
        # @!attribute [rw] oauth_scopes
        #   @return [::Array<::String>]
        #     The OAuth2 scopes to be requested by the taskrunner in order to
        #     access the Cloud Dataflow API.
        # @!attribute [rw] base_url
        #   @return [::String]
        #     The base URL for the taskrunner to use when accessing Google Cloud APIs.
        #
        #     When workers access Google Cloud APIs, they logically do so via
        #     relative URLs.  If this field is specified, it supplies the base
        #     URL to use for resolving these relative URLs.  The normative
        #     algorithm used is defined by RFC 1808, "Relative Uniform Resource
        #     Locators".
        #
        #     If not specified, the default value is "http://www.googleapis.com/"
        # @!attribute [rw] dataflow_api_version
        #   @return [::String]
        #     The API version of endpoint, e.g. "v1b3"
        # @!attribute [rw] parallel_worker_settings
        #   @return [::Google::Cloud::Dataflow::V1beta3::WorkerSettings]
        #     The settings to pass to the parallel worker harness.
        # @!attribute [rw] base_task_dir
        #   @return [::String]
        #     The location on the worker for task-specific subdirectories.
        # @!attribute [rw] continue_on_exception
        #   @return [::Boolean]
        #     Whether to continue taskrunner if an exception is hit.
        # @!attribute [rw] log_to_serialconsole
        #   @return [::Boolean]
        #     Whether to send taskrunner log info to Google Compute Engine VM serial
        #     console.
        # @!attribute [rw] alsologtostderr
        #   @return [::Boolean]
        #     Whether to also send taskrunner log info to stderr.
        # @!attribute [rw] log_upload_location
        #   @return [::String]
        #     Indicates where to put logs.  If this is not specified, the logs
        #     will not be uploaded.
        #
        #     The supported resource type is:
        #
        #     Google Cloud Storage:
        #       storage.googleapis.com/\\{bucket}/\\{object}
        #       bucket.storage.googleapis.com/\\{object}
        # @!attribute [rw] log_dir
        #   @return [::String]
        #     The directory on the VM to store logs.
        # @!attribute [rw] temp_storage_prefix
        #   @return [::String]
        #     The prefix of the resources the taskrunner should use for
        #     temporary storage.
        #
        #     The supported resource type is:
        #
        #     Google Cloud Storage:
        #       storage.googleapis.com/\\{bucket}/\\{object}
        #       bucket.storage.googleapis.com/\\{object}
        # @!attribute [rw] harness_command
        #   @return [::String]
        #     The command to launch the worker harness.
        # @!attribute [rw] workflow_file_name
        #   @return [::String]
        #     The file to store the workflow in.
        # @!attribute [rw] commandlines_file_name
        #   @return [::String]
        #     The file to store preprocessing commands in.
        # @!attribute [rw] vm_id
        #   @return [::String]
        #     The ID string of the VM.
        # @!attribute [rw] language_hint
        #   @return [::String]
        #     The suggested backend language.
        # @!attribute [rw] streaming_worker_main_class
        #   @return [::String]
        #     The streaming worker main class name.
        class TaskRunnerSettings
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods
        end

        # Settings for WorkerPool autoscaling.
        # @!attribute [rw] algorithm
        #   @return [::Google::Cloud::Dataflow::V1beta3::AutoscalingAlgorithm]
        #     The algorithm to use for autoscaling.
        # @!attribute [rw] max_num_workers
        #   @return [::Integer]
        #     The maximum number of workers to cap scaling at.
        class AutoscalingSettings
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods
        end

        # Defines a SDK harness container for executing Dataflow pipelines.
        # @!attribute [rw] container_image
        #   @return [::String]
        #     A docker container image that resides in Google Container Registry.
        # @!attribute [rw] use_single_core_per_container
        #   @return [::Boolean]
        #     If true, recommends the Dataflow service to use only one core per SDK
        #     container instance with this image. If false (or unset) recommends using
        #     more than one core per SDK container instance with this image for
        #     efficiency. Note that Dataflow service may choose to override this property
        #     if needed.
        # @!attribute [rw] environment_id
        #   @return [::String]
        #     Environment ID for the Beam runner API proto Environment that corresponds
        #     to the current SDK Harness.
        class SdkHarnessContainerImage
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods
        end

        # Describes one particular pool of Cloud Dataflow workers to be
        # instantiated by the Cloud Dataflow service in order to perform the
        # computations required by a job.  Note that a workflow job may use
        # multiple pools, in order to match the various computational
        # requirements of the various stages of the job.
        # @!attribute [rw] kind
        #   @return [::String]
        #     The kind of the worker pool; currently only `harness` and `shuffle`
        #     are supported.
        # @!attribute [rw] num_workers
        #   @return [::Integer]
        #     Number of Google Compute Engine workers in this pool needed to
        #     execute the job.  If zero or unspecified, the service will
        #     attempt to choose a reasonable default.
        # @!attribute [rw] packages
        #   @return [::Array<::Google::Cloud::Dataflow::V1beta3::Package>]
        #     Packages to be installed on workers.
        # @!attribute [rw] default_package_set
        #   @return [::Google::Cloud::Dataflow::V1beta3::DefaultPackageSet]
        #     The default package set to install.  This allows the service to
        #     select a default set of packages which are useful to worker
        #     harnesses written in a particular language.
        # @!attribute [rw] machine_type
        #   @return [::String]
        #     Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
        #     service will attempt to choose a reasonable default.
        # @!attribute [rw] teardown_policy
        #   @return [::Google::Cloud::Dataflow::V1beta3::TeardownPolicy]
        #     Sets the policy for determining when to turndown worker pool.
        #     Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
        #     `TEARDOWN_NEVER`.
        #     `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
        #     the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
        #     if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
        #     down.
        #
        #     If the workers are not torn down by the service, they will
        #     continue to run and use Google Compute Engine VM resources in the
        #     user's project until they are explicitly terminated by the user.
        #     Because of this, Google recommends using the `TEARDOWN_ALWAYS`
        #     policy except for small, manually supervised test jobs.
        #
        #     If unknown or unspecified, the service will attempt to choose a reasonable
        #     default.
        # @!attribute [rw] disk_size_gb
        #   @return [::Integer]
        #     Size of root disk for VMs, in GB.  If zero or unspecified, the service will
        #     attempt to choose a reasonable default.
        # @!attribute [rw] disk_type
        #   @return [::String]
        #     Type of root disk for VMs.  If empty or unspecified, the service will
        #     attempt to choose a reasonable default.
        # @!attribute [rw] disk_source_image
        #   @return [::String]
        #     Fully qualified source image for disks.
        # @!attribute [rw] zone
        #   @return [::String]
        #     Zone to run the worker pools in.  If empty or unspecified, the service
        #     will attempt to choose a reasonable default.
        # @!attribute [rw] taskrunner_settings
        #   @return [::Google::Cloud::Dataflow::V1beta3::TaskRunnerSettings]
        #     Settings passed through to Google Compute Engine workers when
        #     using the standard Dataflow task runner.  Users should ignore
        #     this field.
        # @!attribute [rw] on_host_maintenance
        #   @return [::String]
        #     The action to take on host maintenance, as defined by the Google
        #     Compute Engine API.
        # @!attribute [rw] data_disks
        #   @return [::Array<::Google::Cloud::Dataflow::V1beta3::Disk>]
        #     Data disks that are used by a VM in this workflow.
        # @!attribute [rw] metadata
        #   @return [::Google::Protobuf::Map{::String => ::String}]
        #     Metadata to set on the Google Compute Engine VMs.
        # @!attribute [rw] autoscaling_settings
        #   @return [::Google::Cloud::Dataflow::V1beta3::AutoscalingSettings]
        #     Settings for autoscaling of this WorkerPool.
        # @!attribute [rw] pool_args
        #   @return [::Google::Protobuf::Any]
        #     Extra arguments for this worker pool.
        # @!attribute [rw] network
        #   @return [::String]
        #     Network to which VMs will be assigned.  If empty or unspecified,
        #     the service will use the network "default".
        # @!attribute [rw] subnetwork
        #   @return [::String]
        #     Subnetwork to which VMs will be assigned, if desired.  Expected to be of
        #     the form "regions/REGION/subnetworks/SUBNETWORK".
        # @!attribute [rw] worker_harness_container_image
        #   @return [::String]
        #     Required. Docker container image that executes the Cloud Dataflow worker
        #     harness, residing in Google Container Registry.
        #
        #     Deprecated for the Fn API path. Use sdk_harness_container_images instead.
        # @!attribute [rw] num_threads_per_worker
        #   @return [::Integer]
        #     The number of threads per worker harness. If empty or unspecified, the
        #     service will choose a number of threads (according to the number of cores
        #     on the selected machine type for batch, or 1 by convention for streaming).
        # @!attribute [rw] ip_configuration
        #   @return [::Google::Cloud::Dataflow::V1beta3::WorkerIPAddressConfiguration]
        #     Configuration for VM IPs.
        # @!attribute [rw] sdk_harness_container_images
        #   @return [::Array<::Google::Cloud::Dataflow::V1beta3::SdkHarnessContainerImage>]
        #     Set of SDK harness containers needed to execute this pipeline. This will
        #     only be set in the Fn API path. For non-cross-language pipelines this
        #     should have only one entry. Cross-language pipelines will have two or more
        #     entries.
        class WorkerPool
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods

          # @!attribute [rw] key
          #   @return [::String]
          # @!attribute [rw] value
          #   @return [::String]
          class MetadataEntry
            include ::Google::Protobuf::MessageExts
            extend ::Google::Protobuf::MessageExts::ClassMethods
          end
        end

        # Describes any options that have an effect on the debugging of pipelines.
        # @!attribute [rw] enable_hot_key_logging
        #   @return [::Boolean]
        #     When true, enables the logging of the literal hot key to the user's Cloud
        #     Logging.
        class DebugOptions
          include ::Google::Protobuf::MessageExts
          extend ::Google::Protobuf::MessageExts::ClassMethods
        end

        # Specifies the processing model used by a
        # [google.dataflow.v1beta3.Job], which determines the way the Job is
        # managed by the Cloud Dataflow service (how workers are scheduled, how
        # inputs are sharded, etc).
        module JobType
          # The type of the job is unspecified, or unknown.
          JOB_TYPE_UNKNOWN = 0

          # A batch job with a well-defined end point: data is read, data is
          # processed, data is written, and the job is done.
          JOB_TYPE_BATCH = 1

          # A continuously streaming job with no end: data is read,
          # processed, and written continuously.
          JOB_TYPE_STREAMING = 2
        end

        # Specifies the resource to optimize for in Flexible Resource Scheduling.
        module FlexResourceSchedulingGoal
          # Run in the default mode.
          FLEXRS_UNSPECIFIED = 0

          # Optimize for lower execution time.
          FLEXRS_SPEED_OPTIMIZED = 1

          # Optimize for lower cost.
          FLEXRS_COST_OPTIMIZED = 2
        end

        # Specifies what happens to a resource when a Cloud Dataflow
        # {::Google::Cloud::Dataflow::V1beta3::Job google.dataflow.v1beta3.Job} has completed.
        module TeardownPolicy
          # The teardown policy isn't specified, or is unknown.
          TEARDOWN_POLICY_UNKNOWN = 0

          # Always teardown the resource.
          TEARDOWN_ALWAYS = 1

          # Teardown the resource on success. This is useful for debugging
          # failures.
          TEARDOWN_ON_SUCCESS = 2

          # Never teardown the resource. This is useful for debugging and
          # development.
          TEARDOWN_NEVER = 3
        end

        # The default set of packages to be staged on a pool of workers.
        module DefaultPackageSet
          # The default set of packages to stage is unknown, or unspecified.
          DEFAULT_PACKAGE_SET_UNKNOWN = 0

          # Indicates that no packages should be staged at the worker unless
          # explicitly specified by the job.
          DEFAULT_PACKAGE_SET_NONE = 1

          # Stage packages typically useful to workers written in Java.
          DEFAULT_PACKAGE_SET_JAVA = 2

          # Stage packages typically useful to workers written in Python.
          DEFAULT_PACKAGE_SET_PYTHON = 3
        end

        # Specifies the algorithm used to determine the number of worker
        # processes to run at any given point in time, based on the amount of
        # data left to process, the number of workers, and how quickly
        # existing workers are processing data.
        module AutoscalingAlgorithm
          # The algorithm is unknown, or unspecified.
          AUTOSCALING_ALGORITHM_UNKNOWN = 0

          # Disable autoscaling.
          AUTOSCALING_ALGORITHM_NONE = 1

          # Increase worker count over time to reduce job execution time.
          AUTOSCALING_ALGORITHM_BASIC = 2
        end

        # Specifies how IP addresses should be allocated to the worker machines.
        module WorkerIPAddressConfiguration
          # The configuration is unknown, or unspecified.
          WORKER_IP_UNSPECIFIED = 0

          # Workers should have public IP addresses.
          WORKER_IP_PUBLIC = 1

          # Workers should have private IP addresses.
          WORKER_IP_PRIVATE = 2
        end

        # Specifies the shuffle mode used by a
        # [google.dataflow.v1beta3.Job], which determines the approach data is shuffled
        # during processing. More details in:
        # https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-shuffle
        module ShuffleMode
          # Shuffle mode information is not available.
          SHUFFLE_MODE_UNSPECIFIED = 0

          # Shuffle is done on the worker VMs.
          VM_BASED = 1

          # Shuffle is done on the service side.
          SERVICE_BASED = 2
        end
      end
    end
  end
end