lib/google/cloud/bigquery/project.rb in google-cloud-bigquery-1.21.2 vs lib/google/cloud/bigquery/project.rb in google-cloud-bigquery-1.22.0

- old
+ new

@@ -151,17 +151,25 @@ # prefix must contain only letters (a-z, A-Z), numbers (0-9), # underscores (_), or dashes (-). The maximum length of the entire ID # is 1,024 characters. If `job_id` is provided, then `prefix` will not # be used. # @param [Hash] labels A hash of user-provided labels associated with - # the job. You can use these to organize and group your jobs. Label - # keys and values can be no longer than 63 characters, can only - # contain lowercase letters, numeric characters, underscores and - # dashes. International characters are allowed. Label values are - # optional. Label keys must start with a letter and each label in the - # list must have a different key. See [Requirements for - # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements). + # the job. You can use these to organize and group your jobs. + # + # The labels applied to a resource must meet the following requirements: + # + # * Each resource can have multiple labels, up to a maximum of 64. + # * Each label must be a key-value pair. + # * Keys have a minimum length of 1 character and a maximum length of + # 63 characters, and cannot be empty. Values can be empty, and have + # a maximum length of 63 characters. + # * Keys and values can contain only lowercase letters, numeric characters, + # underscores, and dashes. All characters must use UTF-8 encoding, and + # international characters are allowed. + # * The key portion of a label must be unique. However, you can use the + # same key with multiple resources. + # * Keys must start with a lowercase letter or international character. # @yield [job] a job configuration object # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job # configuration object for setting additional options. # # @return [Google::Cloud::Bigquery::CopyJob] @@ -409,17 +417,25 @@ # be used. # # See [Generating a job # ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid). # @param [Hash] labels A hash of user-provided labels associated with - # the job. You can use these to organize and group your jobs. Label - # keys and values can be no longer than 63 characters, can only - # contain lowercase letters, numeric characters, underscores and - # dashes. International characters are allowed. Label values are - # optional. Label keys must start with a letter and each label in the - # list must have a different key. See [Requirements for - # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements). + # the job. You can use these to organize and group your jobs. + # + # The labels applied to a resource must meet the following requirements: + # + # * Each resource can have multiple labels, up to a maximum of 64. + # * Each label must be a key-value pair. + # * Keys have a minimum length of 1 character and a maximum length of + # 63 characters, and cannot be empty. Values can be empty, and have + # a maximum length of 63 characters. + # * Keys and values can contain only lowercase letters, numeric characters, + # underscores, and dashes. All characters must use UTF-8 encoding, and + # international characters are allowed. + # * The key portion of a label must be unique. However, you can use the + # same key with multiple resources. + # * Keys must start with a lowercase letter or international character. # @param [Array<String>, String] udfs User-defined function resources # used in a legacy SQL query. May be either a code resource to load from # a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource # that contains code for a user-defined function (UDF). Providing an # inline code resource is equivalent to providing a URI for a file @@ -1443,50 +1459,62 @@ encrypt_config.kms_key = kms_key unless kms_key.nil? encrypt_config end ## - # Extracts the data from the provided table to a Google Cloud Storage - # file using an asynchronous method. In this method, an {ExtractJob} is - # immediately returned. The caller may poll the service by repeatedly - # calling {Job#reload!} and {Job#done?} to detect when the job is done, - # or simply block until the job is done by calling + # Extracts the data from a table or exports a model to Google Cloud Storage + # asynchronously, immediately returning an {ExtractJob} that can be used to + # track the progress of the export job. The caller may poll the service by + # repeatedly calling {Job#reload!} and {Job#done?} to detect when the job + # is done, or simply block until the job is done by calling # #{Job#wait_until_done!}. See {#extract} for the synchronous version. - # Use this method instead of {Table#extract_job} to extract data from - # source tables in other projects. # + # Use this method instead of {Table#extract_job} or {Model#extract_job} to + # extract data from source tables or models in other projects. + # # The geographic location for the job ("US", "EU", etc.) can be set via # {ExtractJob::Updater#location=} in a block passed to this method. # - # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery - # Exporting Data From BigQuery + # @see https://cloud.google.com/bigquery/docs/exporting-data + # Exporting table data + # @see https://cloud.google.com/bigquery-ml/docs/exporting-models + # Exporting models # - # @param [String, Table] table The source table from which to extract - # data. This can be a table object; or a string ID as specified by the - # [Standard SQL Query + # @param [Table, Model, String] source The source table or model for + # the extract operation. This can be a table or model object; or a + # table ID string as specified by the [Standard SQL Query # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause) # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query # Reference](https://cloud.google.com/bigquery/query-reference#from) # (`project-name:dataset_id.table_id`). # @param [Google::Cloud::Storage::File, String, Array<String>] # extract_url The Google Storage file or file URI pattern(s) to which - # BigQuery should extract the table data. - # @param [String] format The exported file format. The default value is - # `csv`. + # BigQuery should extract. For a model export this value should be a + # string ending in an object name prefix, since multiple objects will + # be exported. + # @param [String] format The exported file format. The default value for + # tables is `csv`. Tables with nested or repeated fields cannot be + # exported as CSV. The default value for models is `ml_tf_saved_model`. # - # The following values are supported: + # Supported values for tables: # # * `csv` - CSV # * `json` - [Newline-delimited JSON](http://jsonlines.org/) # * `avro` - [Avro](http://avro.apache.org/) + # + # Supported values for models: + # + # * `ml_tf_saved_model` - TensorFlow SavedModel + # * `ml_xgboost_booster` - XGBoost Booster # @param [String] compression The compression type to use for exported # files. Possible values include `GZIP` and `NONE`. The default value - # is `NONE`. + # is `NONE`. Not applicable when extracting models. # @param [String] delimiter Delimiter to use between fields in the - # exported data. Default is <code>,</code>. - # @param [Boolean] header Whether to print out a header row in the - # results. Default is `true`. + # exported table data. Default is `,`. Not applicable when extracting + # models. + # @param [Boolean] header Whether to print out a header row in table + # exports. Default is `true`. Not applicable when extracting models. # @param [String] job_id A user-defined ID for the extract job. The ID # must contain only letters (a-z, A-Z), numbers (0-9), underscores # (_), or dashes (-). The maximum length is 1,024 characters. If # `job_id` is provided, then `prefix` will not be used. # @@ -1499,108 +1527,149 @@ # prefix must contain only letters (a-z, A-Z), numbers (0-9), # underscores (_), or dashes (-). The maximum length of the entire ID # is 1,024 characters. If `job_id` is provided, then `prefix` will not # be used. # @param [Hash] labels A hash of user-provided labels associated with - # the job. You can use these to organize and group your jobs. Label - # keys and values can be no longer than 63 characters, can only - # contain lowercase letters, numeric characters, underscores and - # dashes. International characters are allowed. Label values are - # optional. Label keys must start with a letter and each label in the - # list must have a different key. See [Requirements for - # labels](https://cloud.google.com/bigquery/docs/creating-managing-labels#requirements). + # the job. You can use these to organize and group your jobs. + # + # The labels applied to a resource must meet the following requirements: + # + # * Each resource can have multiple labels, up to a maximum of 64. + # * Each label must be a key-value pair. + # * Keys have a minimum length of 1 character and a maximum length of + # 63 characters, and cannot be empty. Values can be empty, and have + # a maximum length of 63 characters. + # * Keys and values can contain only lowercase letters, numeric characters, + # underscores, and dashes. All characters must use UTF-8 encoding, and + # international characters are allowed. + # * The key portion of a label must be unique. However, you can use the + # same key with multiple resources. + # * Keys must start with a lowercase letter or international character. # @yield [job] a job configuration object # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job # configuration object for setting additional options. # # @return [Google::Cloud::Bigquery::ExtractJob] # - # @example + # @example Export table data # require "google/cloud/bigquery" # # bigquery = Google::Cloud::Bigquery.new # # table_id = "bigquery-public-data.samples.shakespeare" - # extract_job = bigquery.extract_job table_id, - # "gs://my-bucket/shakespeare.csv" + # extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv" # extract_job.wait_until_done! # extract_job.done? #=> true # + # @example Export a model + # require "google/cloud/bigquery" + # + # bigquery = Google::Cloud::Bigquery.new + # dataset = bigquery.dataset "my_dataset" + # model = dataset.model "my_model" + # + # extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}" + # # @!group Data # - def extract_job table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil, + def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil, prefix: nil, labels: nil ensure_service! options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id, prefix: prefix, labels: labels } + source_ref = if source.respond_to? :model_ref + source.model_ref + else + Service.get_table_ref source, default_ref: project_ref + end - table_ref = Service.get_table_ref table, default_ref: project_ref - updater = ExtractJob::Updater.from_options service, table_ref, extract_url, options + updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options yield updater if block_given? job_gapi = updater.to_gapi gapi = service.extract_table job_gapi Job.from_gapi gapi, service end ## - # Extracts the data from the provided table to a Google Cloud Storage - # file using a synchronous method that blocks for a response. Timeouts + # Extracts the data from a table or exports a model to Google Cloud Storage + # using a synchronous method that blocks for a response. Timeouts # and transient errors are generally handled as needed to complete the - # job. See {#extract_job} for the asynchronous version. Use this method - # instead of {Table#extract} to extract data from source tables in other - # projects. + # job. See {#extract_job} for the asynchronous version. # + # Use this method instead of {Table#extract} or {Model#extract} to + # extract data from source tables or models in other projects. + # # The geographic location for the job ("US", "EU", etc.) can be set via # {ExtractJob::Updater#location=} in a block passed to this method. # - # @see https://cloud.google.com/bigquery/exporting-data-from-bigquery - # Exporting Data From BigQuery + # @see https://cloud.google.com/bigquery/docs/exporting-data + # Exporting table data + # @see https://cloud.google.com/bigquery-ml/docs/exporting-models + # Exporting models # - # @param [String, Table] table The source table from which to extract - # data. This can be a table object; or a string ID as specified by the - # [Standard SQL Query + # @param [Table, Model, String] source The source table or model for + # the extract operation. This can be a table or model object; or a + # table ID string as specified by the [Standard SQL Query # Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause) # (`project-name.dataset_id.table_id`) or the [Legacy SQL Query # Reference](https://cloud.google.com/bigquery/query-reference#from) # (`project-name:dataset_id.table_id`). # @param [Google::Cloud::Storage::File, String, Array<String>] # extract_url The Google Storage file or file URI pattern(s) to which - # BigQuery should extract the table data. - # @param [String] format The exported file format. The default value is - # `csv`. + # BigQuery should extract. For a model export this value should be a + # string ending in an object name prefix, since multiple objects will + # be exported. + # @param [String] format The exported file format. The default value for + # tables is `csv`. Tables with nested or repeated fields cannot be + # exported as CSV. The default value for models is `ml_tf_saved_model`. # - # The following values are supported: + # Supported values for tables: # # * `csv` - CSV # * `json` - [Newline-delimited JSON](http://jsonlines.org/) # * `avro` - [Avro](http://avro.apache.org/) + # + # Supported values for models: + # + # * `ml_tf_saved_model` - TensorFlow SavedModel + # * `ml_xgboost_booster` - XGBoost Booster # @param [String] compression The compression type to use for exported # files. Possible values include `GZIP` and `NONE`. The default value - # is `NONE`. + # is `NONE`. Not applicable when extracting models. # @param [String] delimiter Delimiter to use between fields in the - # exported data. Default is <code>,</code>. - # @param [Boolean] header Whether to print out a header row in the - # results. Default is `true`. + # exported table data. Default is `,`. Not applicable when extracting + # models. + # @param [Boolean] header Whether to print out a header row in table + # exports. Default is `true`. Not applicable when extracting models. # @yield [job] a job configuration object # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job # configuration object for setting additional options. # # @return [Boolean] Returns `true` if the extract operation succeeded. # - # @example + # @example Export table data # require "google/cloud/bigquery" # # bigquery = Google::Cloud::Bigquery.new # # bigquery.extract "bigquery-public-data.samples.shakespeare", # "gs://my-bucket/shakespeare.csv" # + # @example Export a model + # require "google/cloud/bigquery" + # + # bigquery = Google::Cloud::Bigquery.new + # dataset = bigquery.dataset "my_dataset" + # model = dataset.model "my_model" + # + # bigquery.extract model, "gs://my-bucket/#{model.model_id}" + # # @!group Data # - def extract table, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block - job = extract_job table, extract_url, + def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block + job = extract_job source, extract_url, format: format, compression: compression, delimiter: delimiter, header: header, &block