# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. module Google module Cloud module Bigquery ## # # ExtractJob # # A {Job} subclass representing an export operation that may be performed # on a {Table}. A ExtractJob instance is created when you call # {Table#extract_job}. # # @see https://cloud.google.com/bigquery/docs/exporting-data # Exporting Data From BigQuery # @see https://cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API # reference # # @example # require "google/cloud/bigquery" # # bigquery = Google::Cloud::Bigquery.new # dataset = bigquery.dataset "my_dataset" # table = dataset.table "my_table" # # extract_job = table.extract_job "gs://my-bucket/file-name.json", # format: "json" # extract_job.wait_until_done! # extract_job.done? #=> true # class ExtractJob < Job ## # The URI or URIs representing the Google Cloud Storage files to which # the data is exported. def destinations Array @gapi.configuration.extract.destination_uris end ## # The table from which the data is exported. This is the table upon # which {Table#extract_job} was called. # # @return [Table] A table instance. # def source table = @gapi.configuration.extract.source_table return nil unless table retrieve_table table.project_id, table.dataset_id, table.table_id end ## # Checks if the export operation compresses the data using gzip. The # default is `false`. # # @return [Boolean] `true` when `GZIP`, `false` otherwise. # def compression? val = @gapi.configuration.extract.compression val == "GZIP" end ## # Checks if the destination format for the data is [newline-delimited # JSON](http://jsonlines.org/). The default is `false`. # # @return [Boolean] `true` when `NEWLINE_DELIMITED_JSON`, `false` # otherwise. # def json? val = @gapi.configuration.extract.destination_format val == "NEWLINE_DELIMITED_JSON" end ## # Checks if the destination format for the data is CSV. Tables with # nested or repeated fields cannot be exported as CSV. The default is # `true`. # # @return [Boolean] `true` when `CSV`, `false` otherwise. # def csv? val = @gapi.configuration.extract.destination_format return true if val.nil? val == "CSV" end ## # Checks if the destination format for the data is # [Avro](http://avro.apache.org/). The default is `false`. # # @return [Boolean] `true` when `AVRO`, `false` otherwise. # def avro? val = @gapi.configuration.extract.destination_format val == "AVRO" end ## # The character or symbol the operation uses to delimit fields in the # exported data. The default is a comma (,). # # @return [String] A string containing the character, such as `","`. # def delimiter val = @gapi.configuration.extract.field_delimiter val = "," if val.nil? val end ## # Checks if the exported data contains a header row. The default is # `true`. # # @return [Boolean] `true` when the print header configuration is # present or `nil`, `false` otherwise. # def print_header? val = @gapi.configuration.extract.print_header val = true if val.nil? val end ## # The number of files per destination URI or URI pattern specified in # {#destinations}. # # @return [Array] An array of values in the same order as the # URI patterns. # def destinations_file_counts Array @gapi.statistics.extract.destination_uri_file_counts end ## # A hash containing the URI or URI pattern specified in # {#destinations} mapped to the counts of files per destination. # # @return [Hash] A Hash with the URI patterns as keys # and the counts as values. # def destinations_counts Hash[destinations.zip destinations_file_counts] end ## # If `#avro?` (`#format` is set to `"AVRO"`), this flag indicates # whether to enable extracting applicable column types (such as # `TIMESTAMP`) to their corresponding AVRO logical types # (`timestamp-micros`), instead of only using their raw types # (`avro-long`). # # @return [Boolean] `true` when applicable column types will use their # corresponding AVRO logical types, `false` otherwise. # def use_avro_logical_types? @gapi.configuration.extract.use_avro_logical_types end ## # Yielded to a block to accumulate changes for an API request. class Updater < ExtractJob ## # @private Create an Updater object. def initialize gapi @gapi = gapi end ## # @private Create an Updater from an options hash. # # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job # configuration object for setting query options. def self.from_options service, table, storage_files, options job_ref = service.job_ref_from options[:job_id], options[:prefix] storage_urls = Array(storage_files).map do |url| url.respond_to?(:to_gs_url) ? url.to_gs_url : url end options[:format] ||= Convert.derive_source_format storage_urls.first job = Google::Apis::BigqueryV2::Job.new( job_reference: job_ref, configuration: Google::Apis::BigqueryV2::JobConfiguration.new( extract: Google::Apis::BigqueryV2::JobConfigurationExtract.new( destination_uris: Array(storage_urls), source_table: table ), dry_run: options[:dryrun] ) ) from_job_and_options job, options end ## # @private Create an Updater from a Job and options hash. # # @return [Google::Cloud::Bigquery::ExtractJob::Updater] A job # configuration object for setting query options. def self.from_job_and_options request, options updater = ExtractJob::Updater.new request updater.compression = options[:compression] updater.delimiter = options[:delimiter] updater.format = options[:format] updater.header = options[:header] updater.labels = options[:labels] if options[:labels] unless options[:use_avro_logical_types].nil? updater.use_avro_logical_types = options[:use_avro_logical_types] end updater end ## # Sets the geographic location where the job should run. Required # except for US and EU. # # @param [String] value A geographic location, such as "US", "EU" or # "asia-northeast1". Required except for US and EU. # # @example # require "google/cloud/bigquery" # # bigquery = Google::Cloud::Bigquery.new # dataset = bigquery.dataset "my_dataset" # table = dataset.table "my_table" # # destination = "gs://my-bucket/file-name.csv" # extract_job = table.extract_job destination do |j| # j.location = "EU" # end # # extract_job.wait_until_done! # extract_job.done? #=> true # # @!group Attributes def location= value @gapi.job_reference.location = value return unless value.nil? # Treat assigning value of nil the same as unsetting the value. unset = @gapi.job_reference.instance_variables.include? :@location @gapi.job_reference.remove_instance_variable :@location if unset end ## # Sets the compression type. # # @param [String] value The compression type to use for exported # files. Possible values include `GZIP` and `NONE`. The default # value is `NONE`. # # @!group Attributes def compression= value @gapi.configuration.extract.compression = value end ## # Sets the field delimiter. # # @param [String] value Delimiter to use between fields in the # exported data. Default is ,. # # @!group Attributes def delimiter= value @gapi.configuration.extract.field_delimiter = value end ## # Sets the destination file format. The default value is `csv`. # # The following values are supported: # # * `csv` - CSV # * `json` - [Newline-delimited JSON](http://jsonlines.org/) # * `avro` - [Avro](http://avro.apache.org/) # # @param [String] new_format The new source format. # # @!group Attributes # def format= new_format @gapi.configuration.extract.update! destination_format: Convert.source_format(new_format) end ## # Print a header row in the exported file. # # @param [Boolean] value Whether to print out a header row in the # results. Default is `true`. # # @!group Attributes def header= value @gapi.configuration.extract.print_header = value end ## # Sets the labels to use for the job. # # @param [Hash] value A hash of user-provided labels associated with # the job. You can use these to organize and group your jobs. Label # keys and values can be no longer than 63 characters, can only # contain lowercase letters, numeric characters, underscores and # dashes. International characters are allowed. Label values are # optional. Label keys must start with a letter and each label in # the list must have a different key. # # @!group Attributes # def labels= value @gapi.configuration.update! labels: value end ## # Indicate whether to enable extracting applicable column types (such # as `TIMESTAMP`) to their corresponding AVRO logical types # (`timestamp-micros`), instead of only using their raw types # (`avro-long`). # # Only used when `#format` is set to `"AVRO"` (`#avro?`). # # @param [Boolean] value Whether applicable column types will use # their corresponding AVRO logical types. # # @!group Attributes def use_avro_logical_types= value @gapi.configuration.extract.use_avro_logical_types = value end def cancel raise "not implemented in #{self.class}" end def rerun! raise "not implemented in #{self.class}" end def reload! raise "not implemented in #{self.class}" end alias refresh! reload! def wait_until_done! raise "not implemented in #{self.class}" end ## # @private Returns the Google API client library version of this job. # # @return [] (See # {Google::Apis::BigqueryV2::Job}) def to_gapi @gapi end end end end end end