# frozen_string_literal: true # Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Auto-generated by gapic-generator-ruby. DO NOT EDIT! module Google module Cloud module Dataflow module V1beta3 # Describes the environment in which a Dataflow Job runs. # @!attribute [rw] temp_storage_prefix # @return [::String] # The prefix of the resources the system should use for temporary # storage. The system will append the suffix "/temp-\\{JOBNAME} to # this resource prefix, where \\{JOBNAME} is the value of the # job_name field. The resulting bucket and object prefix is used # as the prefix of the resources used to store temporary data # needed during the job execution. NOTE: This will override the # value in taskrunner_settings. # The supported resource type is: # # Google Cloud Storage: # # storage.googleapis.com/\\{bucket}/\\{object} # bucket.storage.googleapis.com/\\{object} # @!attribute [rw] cluster_manager_api_service # @return [::String] # The type of cluster manager API to use. If unknown or # unspecified, the service will attempt to choose a reasonable # default. This should be in the form of the API service name, # e.g. "compute.googleapis.com". # @!attribute [rw] experiments # @return [::Array<::String>] # The list of experiments to enable. This field should be used for SDK # related experiments and not for service related experiments. The proper # field for service related experiments is service_options. # @!attribute [rw] service_options # @return [::Array<::String>] # The list of service options to enable. This field should be used for # service related experiments only. These experiments, when graduating to GA, # should be replaced by dedicated fields or become default (i.e. always on). # @!attribute [rw] service_kms_key_name # @return [::String] # If set, contains the Cloud KMS key identifier used to encrypt data # at rest, AKA a Customer Managed Encryption Key (CMEK). # # Format: # projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY # @!attribute [rw] worker_pools # @return [::Array<::Google::Cloud::Dataflow::V1beta3::WorkerPool>] # The worker pools. At least one "harness" worker pool must be # specified in order for the job to have workers. # @!attribute [rw] user_agent # @return [::Google::Protobuf::Struct] # A description of the process that generated the request. # @!attribute [rw] version # @return [::Google::Protobuf::Struct] # A structure describing which components and their versions of the service # are required in order to run the job. # @!attribute [rw] dataset # @return [::String] # The dataset for the current project where various workflow # related tables are stored. # # The supported resource type is: # # Google BigQuery: # bigquery.googleapis.com/\\{dataset} # @!attribute [rw] sdk_pipeline_options # @return [::Google::Protobuf::Struct] # The Cloud Dataflow SDK pipeline options specified by the user. These # options are passed through the service and are used to recreate the # SDK pipeline options on the worker in a language agnostic and platform # independent way. # @!attribute [rw] internal_experiments # @return [::Google::Protobuf::Any] # Experimental settings. # @!attribute [rw] service_account_email # @return [::String] # Identity to run virtual machines as. Defaults to the default account. # @!attribute [rw] flex_resource_scheduling_goal # @return [::Google::Cloud::Dataflow::V1beta3::FlexResourceSchedulingGoal] # Which Flexible Resource Scheduling mode to run in. # @!attribute [rw] worker_region # @return [::String] # The Compute Engine region # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in # which worker processing should occur, e.g. "us-west1". Mutually exclusive # with worker_zone. If neither worker_region nor worker_zone is specified, # default to the control plane's region. # @!attribute [rw] worker_zone # @return [::String] # The Compute Engine zone # (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in # which worker processing should occur, e.g. "us-west1-a". Mutually exclusive # with worker_region. If neither worker_region nor worker_zone is specified, # a zone in the control plane's region is chosen based on available capacity. # @!attribute [r] shuffle_mode # @return [::Google::Cloud::Dataflow::V1beta3::ShuffleMode] # Output only. The shuffle mode used for the job. # @!attribute [rw] debug_options # @return [::Google::Cloud::Dataflow::V1beta3::DebugOptions] # Any debugging options to be supplied to the job. class Environment include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The packages that must be installed in order for a worker to run the # steps of the Cloud Dataflow job that will be assigned to its worker # pool. # # This is the mechanism by which the Cloud Dataflow SDK causes code to # be loaded onto the workers. For example, the Cloud Dataflow Java SDK # might use this to install jars containing the user's code and all of the # various dependencies (libraries, data files, etc.) required in order # for that code to run. # @!attribute [rw] name # @return [::String] # The name of the package. # @!attribute [rw] location # @return [::String] # The resource to read the package from. The supported resource type is: # # Google Cloud Storage: # # storage.googleapis.com/\\{bucket} # bucket.storage.googleapis.com/ class Package include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Describes the data disk used by a workflow job. # @!attribute [rw] size_gb # @return [::Integer] # Size of disk in GB. If zero or unspecified, the service will # attempt to choose a reasonable default. # @!attribute [rw] disk_type # @return [::String] # Disk storage type, as defined by Google Compute Engine. This # must be a disk type appropriate to the project and zone in which # the workers will run. If unknown or unspecified, the service # will attempt to choose a reasonable default. # # For example, the standard persistent disk type is a resource name # typically ending in "pd-standard". If SSD persistent disks are # available, the resource name typically ends with "pd-ssd". The # actual valid values are defined the Google Compute Engine API, # not by the Cloud Dataflow API; consult the Google Compute Engine # documentation for more information about determining the set of # available disk types for a particular project and zone. # # Google Compute Engine Disk types are local to a particular # project in a particular zone, and so the resource name will # typically look something like this: # # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard # @!attribute [rw] mount_point # @return [::String] # Directory in a VM where disk is mounted. class Disk include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Provides data to pass through to the worker harness. # @!attribute [rw] base_url # @return [::String] # The base URL for accessing Google Cloud APIs. # # When workers access Google Cloud APIs, they logically do so via # relative URLs. If this field is specified, it supplies the base # URL to use for resolving these relative URLs. The normative # algorithm used is defined by RFC 1808, "Relative Uniform Resource # Locators". # # If not specified, the default value is "http://www.googleapis.com/" # @!attribute [rw] reporting_enabled # @return [::Boolean] # Whether to send work progress updates to the service. # @!attribute [rw] service_path # @return [::String] # The Cloud Dataflow service path relative to the root URL, for example, # "dataflow/v1b3/projects". # @!attribute [rw] shuffle_service_path # @return [::String] # The Shuffle service path relative to the root URL, for example, # "shuffle/v1beta1". # @!attribute [rw] worker_id # @return [::String] # The ID of the worker running this pipeline. # @!attribute [rw] temp_storage_prefix # @return [::String] # The prefix of the resources the system should use for temporary # storage. # # The supported resource type is: # # Google Cloud Storage: # # storage.googleapis.com/\\{bucket}/\\{object} # bucket.storage.googleapis.com/\\{object} class WorkerSettings include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Taskrunner configuration settings. # @!attribute [rw] task_user # @return [::String] # The UNIX user ID on the worker VM to use for tasks launched by # taskrunner; e.g. "root". # @!attribute [rw] task_group # @return [::String] # The UNIX group ID on the worker VM to use for tasks launched by # taskrunner; e.g. "wheel". # @!attribute [rw] oauth_scopes # @return [::Array<::String>] # The OAuth2 scopes to be requested by the taskrunner in order to # access the Cloud Dataflow API. # @!attribute [rw] base_url # @return [::String] # The base URL for the taskrunner to use when accessing Google Cloud APIs. # # When workers access Google Cloud APIs, they logically do so via # relative URLs. If this field is specified, it supplies the base # URL to use for resolving these relative URLs. The normative # algorithm used is defined by RFC 1808, "Relative Uniform Resource # Locators". # # If not specified, the default value is "http://www.googleapis.com/" # @!attribute [rw] dataflow_api_version # @return [::String] # The API version of endpoint, e.g. "v1b3" # @!attribute [rw] parallel_worker_settings # @return [::Google::Cloud::Dataflow::V1beta3::WorkerSettings] # The settings to pass to the parallel worker harness. # @!attribute [rw] base_task_dir # @return [::String] # The location on the worker for task-specific subdirectories. # @!attribute [rw] continue_on_exception # @return [::Boolean] # Whether to continue taskrunner if an exception is hit. # @!attribute [rw] log_to_serialconsole # @return [::Boolean] # Whether to send taskrunner log info to Google Compute Engine VM serial # console. # @!attribute [rw] alsologtostderr # @return [::Boolean] # Whether to also send taskrunner log info to stderr. # @!attribute [rw] log_upload_location # @return [::String] # Indicates where to put logs. If this is not specified, the logs # will not be uploaded. # # The supported resource type is: # # Google Cloud Storage: # storage.googleapis.com/\\{bucket}/\\{object} # bucket.storage.googleapis.com/\\{object} # @!attribute [rw] log_dir # @return [::String] # The directory on the VM to store logs. # @!attribute [rw] temp_storage_prefix # @return [::String] # The prefix of the resources the taskrunner should use for # temporary storage. # # The supported resource type is: # # Google Cloud Storage: # storage.googleapis.com/\\{bucket}/\\{object} # bucket.storage.googleapis.com/\\{object} # @!attribute [rw] harness_command # @return [::String] # The command to launch the worker harness. # @!attribute [rw] workflow_file_name # @return [::String] # The file to store the workflow in. # @!attribute [rw] commandlines_file_name # @return [::String] # The file to store preprocessing commands in. # @!attribute [rw] vm_id # @return [::String] # The ID string of the VM. # @!attribute [rw] language_hint # @return [::String] # The suggested backend language. # @!attribute [rw] streaming_worker_main_class # @return [::String] # The streaming worker main class name. class TaskRunnerSettings include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Settings for WorkerPool autoscaling. # @!attribute [rw] algorithm # @return [::Google::Cloud::Dataflow::V1beta3::AutoscalingAlgorithm] # The algorithm to use for autoscaling. # @!attribute [rw] max_num_workers # @return [::Integer] # The maximum number of workers to cap scaling at. class AutoscalingSettings include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Defines a SDK harness container for executing Dataflow pipelines. # @!attribute [rw] container_image # @return [::String] # A docker container image that resides in Google Container Registry. # @!attribute [rw] use_single_core_per_container # @return [::Boolean] # If true, recommends the Dataflow service to use only one core per SDK # container instance with this image. If false (or unset) recommends using # more than one core per SDK container instance with this image for # efficiency. Note that Dataflow service may choose to override this property # if needed. # @!attribute [rw] environment_id # @return [::String] # Environment ID for the Beam runner API proto Environment that corresponds # to the current SDK Harness. class SdkHarnessContainerImage include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Describes one particular pool of Cloud Dataflow workers to be # instantiated by the Cloud Dataflow service in order to perform the # computations required by a job. Note that a workflow job may use # multiple pools, in order to match the various computational # requirements of the various stages of the job. # @!attribute [rw] kind # @return [::String] # The kind of the worker pool; currently only `harness` and `shuffle` # are supported. # @!attribute [rw] num_workers # @return [::Integer] # Number of Google Compute Engine workers in this pool needed to # execute the job. If zero or unspecified, the service will # attempt to choose a reasonable default. # @!attribute [rw] packages # @return [::Array<::Google::Cloud::Dataflow::V1beta3::Package>] # Packages to be installed on workers. # @!attribute [rw] default_package_set # @return [::Google::Cloud::Dataflow::V1beta3::DefaultPackageSet] # The default package set to install. This allows the service to # select a default set of packages which are useful to worker # harnesses written in a particular language. # @!attribute [rw] machine_type # @return [::String] # Machine type (e.g. "n1-standard-1"). If empty or unspecified, the # service will attempt to choose a reasonable default. # @!attribute [rw] teardown_policy # @return [::Google::Cloud::Dataflow::V1beta3::TeardownPolicy] # Sets the policy for determining when to turndown worker pool. # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and # `TEARDOWN_NEVER`. # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn # down. # # If the workers are not torn down by the service, they will # continue to run and use Google Compute Engine VM resources in the # user's project until they are explicitly terminated by the user. # Because of this, Google recommends using the `TEARDOWN_ALWAYS` # policy except for small, manually supervised test jobs. # # If unknown or unspecified, the service will attempt to choose a reasonable # default. # @!attribute [rw] disk_size_gb # @return [::Integer] # Size of root disk for VMs, in GB. If zero or unspecified, the service will # attempt to choose a reasonable default. # @!attribute [rw] disk_type # @return [::String] # Type of root disk for VMs. If empty or unspecified, the service will # attempt to choose a reasonable default. # @!attribute [rw] disk_source_image # @return [::String] # Fully qualified source image for disks. # @!attribute [rw] zone # @return [::String] # Zone to run the worker pools in. If empty or unspecified, the service # will attempt to choose a reasonable default. # @!attribute [rw] taskrunner_settings # @return [::Google::Cloud::Dataflow::V1beta3::TaskRunnerSettings] # Settings passed through to Google Compute Engine workers when # using the standard Dataflow task runner. Users should ignore # this field. # @!attribute [rw] on_host_maintenance # @return [::String] # The action to take on host maintenance, as defined by the Google # Compute Engine API. # @!attribute [rw] data_disks # @return [::Array<::Google::Cloud::Dataflow::V1beta3::Disk>] # Data disks that are used by a VM in this workflow. # @!attribute [rw] metadata # @return [::Google::Protobuf::Map{::String => ::String}] # Metadata to set on the Google Compute Engine VMs. # @!attribute [rw] autoscaling_settings # @return [::Google::Cloud::Dataflow::V1beta3::AutoscalingSettings] # Settings for autoscaling of this WorkerPool. # @!attribute [rw] pool_args # @return [::Google::Protobuf::Any] # Extra arguments for this worker pool. # @!attribute [rw] network # @return [::String] # Network to which VMs will be assigned. If empty or unspecified, # the service will use the network "default". # @!attribute [rw] subnetwork # @return [::String] # Subnetwork to which VMs will be assigned, if desired. Expected to be of # the form "regions/REGION/subnetworks/SUBNETWORK". # @!attribute [rw] worker_harness_container_image # @return [::String] # Required. Docker container image that executes the Cloud Dataflow worker # harness, residing in Google Container Registry. # # Deprecated for the Fn API path. Use sdk_harness_container_images instead. # @!attribute [rw] num_threads_per_worker # @return [::Integer] # The number of threads per worker harness. If empty or unspecified, the # service will choose a number of threads (according to the number of cores # on the selected machine type for batch, or 1 by convention for streaming). # @!attribute [rw] ip_configuration # @return [::Google::Cloud::Dataflow::V1beta3::WorkerIPAddressConfiguration] # Configuration for VM IPs. # @!attribute [rw] sdk_harness_container_images # @return [::Array<::Google::Cloud::Dataflow::V1beta3::SdkHarnessContainerImage>] # Set of SDK harness containers needed to execute this pipeline. This will # only be set in the Fn API path. For non-cross-language pipelines this # should have only one entry. Cross-language pipelines will have two or more # entries. class WorkerPool include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # @!attribute [rw] key # @return [::String] # @!attribute [rw] value # @return [::String] class MetadataEntry include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end # Describes any options that have an effect on the debugging of pipelines. # @!attribute [rw] enable_hot_key_logging # @return [::Boolean] # When true, enables the logging of the literal hot key to the user's Cloud # Logging. class DebugOptions include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # Specifies the processing model used by a # [google.dataflow.v1beta3.Job], which determines the way the Job is # managed by the Cloud Dataflow service (how workers are scheduled, how # inputs are sharded, etc). module JobType # The type of the job is unspecified, or unknown. JOB_TYPE_UNKNOWN = 0 # A batch job with a well-defined end point: data is read, data is # processed, data is written, and the job is done. JOB_TYPE_BATCH = 1 # A continuously streaming job with no end: data is read, # processed, and written continuously. JOB_TYPE_STREAMING = 2 end # Specifies the resource to optimize for in Flexible Resource Scheduling. module FlexResourceSchedulingGoal # Run in the default mode. FLEXRS_UNSPECIFIED = 0 # Optimize for lower execution time. FLEXRS_SPEED_OPTIMIZED = 1 # Optimize for lower cost. FLEXRS_COST_OPTIMIZED = 2 end # Specifies what happens to a resource when a Cloud Dataflow # {::Google::Cloud::Dataflow::V1beta3::Job google.dataflow.v1beta3.Job} has completed. module TeardownPolicy # The teardown policy isn't specified, or is unknown. TEARDOWN_POLICY_UNKNOWN = 0 # Always teardown the resource. TEARDOWN_ALWAYS = 1 # Teardown the resource on success. This is useful for debugging # failures. TEARDOWN_ON_SUCCESS = 2 # Never teardown the resource. This is useful for debugging and # development. TEARDOWN_NEVER = 3 end # The default set of packages to be staged on a pool of workers. module DefaultPackageSet # The default set of packages to stage is unknown, or unspecified. DEFAULT_PACKAGE_SET_UNKNOWN = 0 # Indicates that no packages should be staged at the worker unless # explicitly specified by the job. DEFAULT_PACKAGE_SET_NONE = 1 # Stage packages typically useful to workers written in Java. DEFAULT_PACKAGE_SET_JAVA = 2 # Stage packages typically useful to workers written in Python. DEFAULT_PACKAGE_SET_PYTHON = 3 end # Specifies the algorithm used to determine the number of worker # processes to run at any given point in time, based on the amount of # data left to process, the number of workers, and how quickly # existing workers are processing data. module AutoscalingAlgorithm # The algorithm is unknown, or unspecified. AUTOSCALING_ALGORITHM_UNKNOWN = 0 # Disable autoscaling. AUTOSCALING_ALGORITHM_NONE = 1 # Increase worker count over time to reduce job execution time. AUTOSCALING_ALGORITHM_BASIC = 2 end # Specifies how IP addresses should be allocated to the worker machines. module WorkerIPAddressConfiguration # The configuration is unknown, or unspecified. WORKER_IP_UNSPECIFIED = 0 # Workers should have public IP addresses. WORKER_IP_PUBLIC = 1 # Workers should have private IP addresses. WORKER_IP_PRIVATE = 2 end # Specifies the shuffle mode used by a # [google.dataflow.v1beta3.Job], which determines the approach data is shuffled # during processing. More details in: # https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-shuffle module ShuffleMode # Shuffle mode information is not available. SHUFFLE_MODE_UNSPECIFIED = 0 # Shuffle is done on the worker VMs. VM_BASED = 1 # Shuffle is done on the service side. SERVICE_BASED = 2 end end end end end