lib/google/cloud/bigquery/project.rb in google-cloud-bigquery-1.21.2 vs lib/google/cloud/bigquery/project.rb in google-cloud-bigquery-1.22.0
- old
+ new
@@ -151,17 +151,25 @@
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
# underscores (_), or dashes (-). The maximum length of the entire ID
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
# be used.
# @param [Hash] labels A hash of user-provided labels associated with
- # the job. You can use these to organize and group your jobs. Label
- # keys and values can be no longer than 63 characters, can only
- # contain lowercase letters, numeric characters, underscores and
- # dashes. International characters are allowed. Label values are
- # optional. Label keys must start with a letter and each label in the
- # list must have a different key. See [Requirements for
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
+ # the job. You can use these to organize and group your jobs.
+ #
+ # The labels applied to a resource must meet the following requirements:
+ #
+ # * Each resource can have multiple labels, up to a maximum of 64.
+ # * Each label must be a key-value pair.
+ # * Keys have a minimum length of 1 character and a maximum length of
+ # 63 characters, and cannot be empty. Values can be empty, and have
+ # a maximum length of 63 characters.
+ # * Keys and values can contain only lowercase letters, numeric characters,
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
+ # international characters are allowed.
+ # * The key portion of a label must be unique. However, you can use the
+ # same key with multiple resources.
+ # * Keys must start with a lowercase letter or international character.
# @yield [job] a job configuration object
# @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
# configuration object for setting additional options.
#
# @return [Google::Cloud::Bigquery::CopyJob]
@@ -409,17 +417,25 @@
# be used.
#
# See [Generating a job
# ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
# @param [Hash] labels A hash of user-provided labels associated with
- # the job. You can use these to organize and group your jobs. Label
- # keys and values can be no longer than 63 characters, can only
- # contain lowercase letters, numeric characters, underscores and
- # dashes. International characters are allowed. Label values are
- # optional. Label keys must start with a letter and each label in the
- # list must have a different key. See [Requirements for
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
+ # the job. You can use these to organize and group your jobs.
+ #
+ # The labels applied to a resource must meet the following requirements:
+ #
+ # * Each resource can have multiple labels, up to a maximum of 64.
+ # * Each label must be a key-value pair.
+ # * Keys have a minimum length of 1 character and a maximum length of
+ # 63 characters, and cannot be empty. Values can be empty, and have
+ # a maximum length of 63 characters.
+ # * Keys and values can contain only lowercase letters, numeric characters,
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
+ # international characters are allowed.
+ # * The key portion of a label must be unique. However, you can use the
+ # same key with multiple resources.
+ # * Keys must start with a lowercase letter or international character.
# @param [Array<String>, String] udfs User-defined function resources
# used in a legacy SQL query. May be either a code resource to load from
# a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
# that contains code for a user-defined function (UDF). Providing an
# inline code resource is equivalent to providing a URI for a file
@@ -1443,50 +1459,62 @@
encrypt_config.kms_key = kms_key unless kms_key.nil?
encrypt_config
end
##
- # Extracts the data from the provided table to a Google Cloud Storage
- # file using an asynchronous method. In this method, an {ExtractJob} is
- # immediately returned. The caller may poll the service by repeatedly
- # calling {Job#reload!} and {Job#done?} to detect when the job is done,
- # or simply block until the job is done by calling
+ # Extracts the data from a table or exports a model to Google Cloud Storage
+ # asynchronously, immediately returning an {ExtractJob} that can be used to
+ # track the progress of the export job. The caller may poll the service by
+ # repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
+ # is done, or simply block until the job is done by calling
# #{Job#wait_until_done!}. See {#extract} for the synchronous version.
- # Use this method instead of {Table#extract_job} to extract data from
- # source tables in other projects.
#
+ # Use this method instead of {Table#extract_job} or {Model#extract_job} to
+ # extract data from source tables or models in other projects.
+ #
# The geographic location for the job ("US", "EU", etc.) can be set via
# {ExtractJob::Updater#location=} in a block passed to this method.
#
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
- # Exporting Data From BigQuery
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
+ # Exporting table data
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+ # Exporting models
#
- # @param [String, Table] table The source table from which to extract
- # data. This can be a table object; or a string ID as specified by the
- # [Standard SQL Query
+ # @param [Table, Model, String] source The source table or model for
+ # the extract operation. This can be a table or model object; or a
+ # table ID string as specified by the [Standard SQL Query
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
# Reference](https://cloud.google.com/bigquery/query-reference#from)
# (`project-name:dataset_id.table_id`).
# @param [Google::Cloud::Storage::File, String, Array<String>]
# extract_url The Google Storage file or file URI pattern(s) to which
- # BigQuery should extract the table data.
- # @param [String] format The exported file format. The default value is
- # `csv`.
+ # BigQuery should extract. For a model export this value should be a
+ # string ending in an object name prefix, since multiple objects will
+ # be exported.
+ # @param [String] format The exported file format. The default value for
+ # tables is `csv`. Tables with nested or repeated fields cannot be
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
#
- # The following values are supported:
+ # Supported values for tables:
#
# * `csv` - CSV
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
# * `avro` - [Avro](http://avro.apache.org/)
+ #
+ # Supported values for models:
+ #
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
+ # * `ml_xgboost_booster` - XGBoost Booster
# @param [String] compression The compression type to use for exported
# files. Possible values include `GZIP` and `NONE`. The default value
- # is `NONE`.
+ # is `NONE`. Not applicable when extracting models.
# @param [String] delimiter Delimiter to use between fields in the
- # exported data. Default is <code>,</code>.
- # @param [Boolean] header Whether to print out a header row in the
- # results. Default is `true`.
+ # exported table data. Default is `,`. Not applicable when extracting
+ # models.
+ # @param [Boolean] header Whether to print out a header row in table
+ # exports. Default is `true`. Not applicable when extracting models.
# @param [String] job_id A user-defined ID for the extract job. The ID
# must contain only letters (a-z, A-Z), numbers (0-9), underscores
# (_), or dashes (-). The maximum length is 1,024 characters. If
# `job_id` is provided, then `prefix` will not be used.
#
@@ -1499,108 +1527,149 @@
# prefix must contain only letters (a-z, A-Z), numbers (0-9),
# underscores (_), or dashes (-). The maximum length of the entire ID
# is 1,024 characters. If `job_id` is provided, then `prefix` will not
# be used.
# @param [Hash] labels A hash of user-provided labels associated with
- # the job. You can use these to organize and group your jobs. Label
- # keys and values can be no longer than 63 characters, can only
- # contain lowercase letters, numeric characters, underscores and
- # dashes. International characters are allowed. Label values are
- # optional. Label keys must start with a letter and each label in the
- # list must have a different key. See [Requirements for
- # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements).
+ # the job. You can use these to organize and group your jobs.
+ #
+ # The labels applied to a resource must meet the following requirements:
+ #
+ # * Each resource can have multiple labels, up to a maximum of 64.
+ # * Each label must be a key-value pair.
+ # * Keys have a minimum length of 1 character and a maximum length of
+ # 63 characters, and cannot be empty. Values can be empty, and have
+ # a maximum length of 63 characters.
+ # * Keys and values can contain only lowercase letters, numeric characters,
+ # underscores, and dashes. All characters must use UTF-8 encoding, and
+ # international characters are allowed.
+ # * The key portion of a label must be unique. However, you can use the
+ # same key with multiple resources.
+ # * Keys must start with a lowercase letter or international character.
# @yield [job] a job configuration object
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
# configuration object for setting additional options.
#
# @return [Google::Cloud::Bigquery::ExtractJob]
#
- # @example
+ # @example Export table data
# require "google/cloud/bigquery"
#
# bigquery = Google::Cloud::Bigquery.new
#
# table_id = "bigquery-public-data.samples.shakespeare"
- # extract_job = bigquery.extract_job table_id,
- # "gs://my-bucket/shakespeare.csv"
+ # extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
# extract_job.wait_until_done!
# extract_job.done? #=> true
#
+ # @example Export a model
+ # require "google/cloud/bigquery"
+ #
+ # bigquery = Google::Cloud::Bigquery.new
+ # dataset = bigquery.dataset "my_dataset"
+ # model = dataset.model "my_model"
+ #
+ # extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
+ #
# @!group Data
#
- def extract_job table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
+ def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
prefix: nil, labels: nil
ensure_service!
options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
prefix: prefix, labels: labels }
+ source_ref = if source.respond_to? :model_ref
+ source.model_ref
+ else
+ Service.get_table_ref source, default_ref: project_ref
+ end
- table_ref = Service.get_table_ref table, default_ref: project_ref
- updater = ExtractJob::Updater.from_options service, table_ref, extract_url, options
+ updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options
yield updater if block_given?
job_gapi = updater.to_gapi
gapi = service.extract_table job_gapi
Job.from_gapi gapi, service
end
##
- # Extracts the data from the provided table to a Google Cloud Storage
- # file using a synchronous method that blocks for a response. Timeouts
+ # Extracts the data from a table or exports a model to Google Cloud Storage
+ # using a synchronous method that blocks for a response. Timeouts
# and transient errors are generally handled as needed to complete the
- # job. See {#extract_job} for the asynchronous version. Use this method
- # instead of {Table#extract} to extract data from source tables in other
- # projects.
+ # job. See {#extract_job} for the asynchronous version.
#
+ # Use this method instead of {Table#extract} or {Model#extract} to
+ # extract data from source tables or models in other projects.
+ #
# The geographic location for the job ("US", "EU", etc.) can be set via
# {ExtractJob::Updater#location=} in a block passed to this method.
#
- # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery
- # Exporting Data From BigQuery
+ # @see https://cloud.google.com/bigquery/docs/exporting-data
+ # Exporting table data
+ # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
+ # Exporting models
#
- # @param [String, Table] table The source table from which to extract
- # data. This can be a table object; or a string ID as specified by the
- # [Standard SQL Query
+ # @param [Table, Model, String] source The source table or model for
+ # the extract operation. This can be a table or model object; or a
+ # table ID string as specified by the [Standard SQL Query
# Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
# (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
# Reference](https://cloud.google.com/bigquery/query-reference#from)
# (`project-name:dataset_id.table_id`).
# @param [Google::Cloud::Storage::File, String, Array<String>]
# extract_url The Google Storage file or file URI pattern(s) to which
- # BigQuery should extract the table data.
- # @param [String] format The exported file format. The default value is
- # `csv`.
+ # BigQuery should extract. For a model export this value should be a
+ # string ending in an object name prefix, since multiple objects will
+ # be exported.
+ # @param [String] format The exported file format. The default value for
+ # tables is `csv`. Tables with nested or repeated fields cannot be
+ # exported as CSV. The default value for models is `ml_tf_saved_model`.
#
- # The following values are supported:
+ # Supported values for tables:
#
# * `csv` - CSV
# * `json` - [Newline-delimited JSON](http://jsonlines.org/)
# * `avro` - [Avro](http://avro.apache.org/)
+ #
+ # Supported values for models:
+ #
+ # * `ml_tf_saved_model` - TensorFlow SavedModel
+ # * `ml_xgboost_booster` - XGBoost Booster
# @param [String] compression The compression type to use for exported
# files. Possible values include `GZIP` and `NONE`. The default value
- # is `NONE`.
+ # is `NONE`. Not applicable when extracting models.
# @param [String] delimiter Delimiter to use between fields in the
- # exported data. Default is <code>,</code>.
- # @param [Boolean] header Whether to print out a header row in the
- # results. Default is `true`.
+ # exported table data. Default is `,`. Not applicable when extracting
+ # models.
+ # @param [Boolean] header Whether to print out a header row in table
+ # exports. Default is `true`. Not applicable when extracting models.
# @yield [job] a job configuration object
# @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
# configuration object for setting additional options.
#
# @return [Boolean] Returns `true` if the extract operation succeeded.
#
- # @example
+ # @example Export table data
# require "google/cloud/bigquery"
#
# bigquery = Google::Cloud::Bigquery.new
#
# bigquery.extract "bigquery-public-data.samples.shakespeare",
# "gs://my-bucket/shakespeare.csv"
#
+ # @example Export a model
+ # require "google/cloud/bigquery"
+ #
+ # bigquery = Google::Cloud::Bigquery.new
+ # dataset = bigquery.dataset "my_dataset"
+ # model = dataset.model "my_model"
+ #
+ # bigquery.extract model, "gs://my-bucket/#{model.model_id}"
+ #
# @!group Data
#
- def extract table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
- job = extract_job table, extract_url,
+ def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
+ job = extract_job source, extract_url,
format: format,
compression: compression,
delimiter: delimiter,
header: header,
&block