# Copyright 2015 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


require "google/cloud/bigquery/version"
require "google/cloud/bigquery/convert"
require "google/cloud/errors"
require "google/apis/bigquery_v2"
require "pathname"
require "securerandom"
require "mini_mime"
require "date"

module Google
  module Cloud
    module Bigquery
      ##
      # @private Represents the Bigquery service and API calls.
      class Service
        ##
        # Alias to the Google Client API module
        API = Google::Apis::BigqueryV2

        # @private
        attr_accessor :project

        # @private
        attr_accessor :credentials

        # @private
        attr_reader :retries, :timeout, :host

        ##
        # Creates a new Service instance.
        def initialize project, credentials,
                       retries: nil, timeout: nil, host: nil
          @project = project
          @credentials = credentials
          @retries = retries
          @timeout = timeout
          @host = host
        end

        def service
          return mocked_service if mocked_service
          @service ||= begin
            service = API::BigqueryService.new
            service.client_options.application_name    = "gcloud-ruby"
            service.client_options.application_version = \
              Google::Cloud::Bigquery::VERSION
            service.client_options.open_timeout_sec = timeout
            service.client_options.read_timeout_sec = timeout
            service.client_options.send_timeout_sec = timeout
            service.request_options.retries = 0 # handle retries in #execute
            service.request_options.header ||= {}
            service.request_options.header["x-goog-api-client"] = \
              "gl-ruby/#{RUBY_VERSION} gccl/#{Google::Cloud::Bigquery::VERSION}"
            service.authorization = @credentials.client
            service.root_url = host if host
            service
          end
        end
        attr_accessor :mocked_service

        def project_service_account
          service.get_project_service_account project
        end

        ##
        # Lists all datasets in the specified project to which you have
        # been granted the READER dataset role.
        def list_datasets options = {}
          # The list operation is considered idempotent
          execute backoff: true do
            service.list_datasets \
              @project, all: options[:all], filter: options[:filter],
                        max_results: options[:max], page_token: options[:token]
          end
        end

        ##
        # Returns the dataset specified by datasetID.
        def get_dataset dataset_id
          # The get operation is considered idempotent
          execute backoff: true do
            service.get_dataset @project, dataset_id
          end
        end

        ##
        # Creates a new empty dataset.
        def insert_dataset new_dataset_gapi
          execute { service.insert_dataset @project, new_dataset_gapi }
        end

        ##
        # Updates information in an existing dataset, only replacing
        # fields that are provided in the submitted dataset resource.
        def patch_dataset dataset_id, patched_dataset_gapi
          patch_with_backoff = false
          options = {}
          if patched_dataset_gapi.etag
            options[:header] = { "If-Match" => patched_dataset_gapi.etag }
            # The patch with etag operation is considered idempotent
            patch_with_backoff = true
          end
          execute backoff: patch_with_backoff do
            service.patch_dataset @project, dataset_id, patched_dataset_gapi,
                                  options: options
          end
        end

        ##
        # Deletes the dataset specified by the datasetId value.
        # Before you can delete a dataset, you must delete all its tables,
        # either manually or by specifying force: true in options.
        # Immediately after deletion, you can create another dataset with
        # the same name.
        def delete_dataset dataset_id, force = nil
          execute do
            service.delete_dataset @project, dataset_id, delete_contents: force
          end
        end

        ##
        # Lists all tables in the specified dataset.
        # Requires the READER dataset role.
        def list_tables dataset_id, options = {}
          # The list operation is considered idempotent
          execute backoff: true do
            service.list_tables @project, dataset_id,
                                max_results: options[:max],
                                page_token:  options[:token]
          end
        end

        def get_project_table project_id, dataset_id, table_id
          # The get operation is considered idempotent
          execute backoff: true do
            service.get_table project_id, dataset_id, table_id
          end
        end

        ##
        # Gets the specified table resource by table ID.
        # This method does not return the data in the table,
        # it only returns the table resource,
        # which describes the structure of this table.
        def get_table dataset_id, table_id
          # The get operation is considered idempotent
          execute backoff: true do
            get_project_table @project, dataset_id, table_id
          end
        end

        ##
        # Creates a new, empty table in the dataset.
        def insert_table dataset_id, new_table_gapi
          execute { service.insert_table @project, dataset_id, new_table_gapi }
        end

        ##
        # Updates information in an existing table, replacing fields that
        # are provided in the submitted table resource.
        def patch_table dataset_id, table_id, patched_table_gapi
          patch_with_backoff = false
          options = {}
          if patched_table_gapi.etag
            options[:header] = { "If-Match" => patched_table_gapi.etag }
            # The patch with etag operation is considered idempotent
            patch_with_backoff = true
          end
          execute backoff: patch_with_backoff do
            service.patch_table @project, dataset_id, table_id,
                                patched_table_gapi, options: options
          end
        end

        ##
        # Deletes the table specified by tableId from the dataset.
        # If the table contains data, all the data will be deleted.
        def delete_table dataset_id, table_id
          execute { service.delete_table @project, dataset_id, table_id }
        end

        ##
        # Retrieves data from the table.
        def list_tabledata dataset_id, table_id, options = {}
          # The list operation is considered idempotent
          execute backoff: true do
            json_txt = service.list_table_data \
              @project, dataset_id, table_id,
              max_results: options.delete(:max),
              page_token:  options.delete(:token),
              start_index: options.delete(:start),
              options:     { skip_deserialization: true }
            JSON.parse json_txt, symbolize_names: true
          end
        end

        def insert_tabledata dataset_id, table_id, rows, options = {}
          json_rows = Array(rows).map { |row| Convert.to_json_row row }
          insert_tabledata_json_rows dataset_id, table_id, json_rows, options
        end

        def insert_tabledata_json_rows dataset_id, table_id, json_rows,
                                       options = {}

          rows_and_ids = Array(json_rows).zip Array(options[:insert_ids])
          insert_rows = rows_and_ids.map do |json_row, insert_id|
            insert_id ||= SecureRandom.uuid
            {
              insertId: insert_id,
              json:     json_row
            }
          end

          insert_req = {
            rows:                insert_rows,
            ignoreUnknownValues: options[:ignore_unknown],
            skipInvalidRows:     options[:skip_invalid]
          }.to_json

          # The insertAll with insertId operation is considered idempotent
          execute backoff: true do
            service.insert_all_table_data(
              @project, dataset_id, table_id, insert_req,
              options: { skip_serialization: true }
            )
          end
        end

        ##
        # Lists all models in the specified dataset.
        # Requires the READER dataset role.
        def list_models dataset_id, max: nil, token: nil
          options = { skip_deserialization: true }
          # The list operation is considered idempotent
          execute backoff: true do
            json_txt = service.list_models @project, dataset_id,
                                           max_results: max,
                                           page_token:  token,
                                           options:     options
            JSON.parse json_txt, symbolize_names: true
          end
        end

        # Gets the specified model resource by model ID.
        # This method does not return the data in the model,
        # it only returns the model resource,
        # which describes the structure of this model.
        def get_model dataset_id, model_id
          # The get operation is considered idempotent
          execute backoff: true do
            json_txt = service.get_model @project, dataset_id, model_id,
                                         options: { skip_deserialization: true }
            JSON.parse json_txt, symbolize_names: true
          end
        end

        ##
        # Updates information in an existing model, replacing fields that
        # are provided in the submitted model resource.
        def patch_model dataset_id, model_id, patched_model_gapi, etag = nil
          patch_with_backoff = false
          options = { skip_deserialization: true }
          if etag
            options[:header] = { "If-Match" => etag }
            # The patch with etag operation is considered idempotent
            patch_with_backoff = true
          end
          execute backoff: patch_with_backoff do
            json_txt = service.patch_model @project, dataset_id, model_id,
                                           patched_model_gapi,
                                           options: options
            JSON.parse json_txt, symbolize_names: true
          end
        end

        ##
        # Deletes the model specified by modelId from the dataset.
        # If the model contains data, all the data will be deleted.
        def delete_model dataset_id, model_id
          execute { service.delete_model @project, dataset_id, model_id }
        end

        ##
        # Lists all jobs in the specified project to which you have
        # been granted the READER job role.
        def list_jobs options = {}
          # The list operation is considered idempotent
          min_creation_time = Convert.time_to_millis options[:min_created_at]
          max_creation_time = Convert.time_to_millis options[:max_created_at]
          execute backoff: true do
            service.list_jobs \
              @project, all_users: options[:all], max_results: options[:max],
                        page_token: options[:token], projection: "full",
                        state_filter: options[:filter],
                        min_creation_time: min_creation_time,
                        max_creation_time: max_creation_time
          end
        end

        ##
        # Cancel the job specified by jobId.
        def cancel_job job_id, location: nil
          # The BigQuery team has told us cancelling is considered idempotent
          execute backoff: true do
            service.cancel_job @project, job_id, location: location
          end
        end

        ##
        # Returns the job specified by jobID.
        def get_job job_id, location: nil
          # The get operation is considered idempotent
          execute backoff: true do
            service.get_job @project, job_id, location: location
          end
        end

        def insert_job config, location: nil
          job_object = API::Job.new(
            job_reference: job_ref_from(nil, nil, location: location),
            configuration: config
          )
          # Jobs have generated id, so this operation is considered idempotent
          execute backoff: true do
            service.insert_job @project, job_object
          end
        end

        def query_job query_job_gapi
          execute backoff: true do
            service.insert_job @project, query_job_gapi
          end
        end

        ##
        # Returns the query data for the job
        def job_query_results job_id, options = {}
          # The get operation is considered idempotent
          execute backoff: true do
            service.get_job_query_results \
              @project, job_id,
              location:    options.delete(:location),
              max_results: options.delete(:max),
              page_token:  options.delete(:token),
              start_index: options.delete(:start),
              timeout_ms:  options.delete(:timeout)
          end
        end

        def copy_table copy_job_gapi
          execute backoff: true do
            service.insert_job @project, copy_job_gapi
          end
        end

        def extract_table extract_job_gapi
          execute backoff: true do
            service.insert_job @project, extract_job_gapi
          end
        end

        def load_table_gs_url load_job_gapi
          execute backoff: true do
            service.insert_job @project, load_job_gapi
          end
        end

        def load_table_file file, load_job_gapi
          execute backoff: true do
            service.insert_job \
              @project,
              load_job_gapi,
              upload_source: file, content_type: mime_type_for(file)
          end
        end

        def self.get_table_ref table, default_ref: nil
          if table.respond_to? :table_ref
            table.table_ref
          else
            table_ref_from_s table, default_ref: default_ref
          end
        end

        ##
        # Extracts at least `tbl` group, and possibly `dts` and `prj` groups,
        # from strings in the formats: "my_table", "my_dataset.my_table", or
        # "my-project:my_dataset.my_table". Then merges project_id and
        # dataset_id from the default table ref if they are missing.
        #
        # The regex matches both Standard SQL
        # ("bigquery-public-data.samples.shakespeare") and Legacy SQL
        # ("bigquery-public-data:samples.shakespeare").
        def self.table_ref_from_s str, default_ref: {}
          str = str.to_s
          m = /\A(((?<prj>\S*)(:|\.))?(?<dts>\S*)\.)?(?<tbl>\S*)\z/.match str
          unless m
            raise ArgumentError, "unable to identify table from #{str.inspect}"
          end
          str_table_ref_hash = {
            project_id: m["prj"],
            dataset_id: m["dts"],
            table_id:   m["tbl"]
          }.delete_if { |_, v| v.nil? }
          str_table_ref_hash = default_ref.to_h.merge str_table_ref_hash
          ref = Google::Apis::BigqueryV2::TableReference.new str_table_ref_hash
          validate_table_ref ref
          ref
        end

        def self.validate_table_ref table_ref
          %i[project_id dataset_id table_id].each do |f|
            if table_ref.send(f).nil?
              raise ArgumentError, "TableReference is missing #{f}"
            end
          end
        end

        ##
        # Lists all projects to which you have been granted any project role.
        def list_projects options = {}
          execute backoff: true do
            service.list_projects max_results: options[:max],
                                  page_token:  options[:token]
          end
        end

        # If no job_id or prefix is given, always generate a client-side job ID
        # anyway, for idempotent retry in the google-api-client layer.
        # See https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid
        def job_ref_from job_id, prefix, location: nil
          prefix ||= "job_"
          job_id ||= "#{prefix}#{generate_id}"
          job_ref = API::JobReference.new(
            project_id: @project,
            job_id:     job_id
          )
          # BigQuery does not allow nil location, but missing is ok.
          job_ref.location = location if location
          job_ref
        end

        # API object for dataset.
        def dataset_ref_from dts, pjt = nil
          return nil if dts.nil?
          if dts.respond_to? :dataset_id
            Google::Apis::BigqueryV2::DatasetReference.new(
              project_id: (pjt || dts.project_id || @project),
              dataset_id: dts.dataset_id
            )
          else
            Google::Apis::BigqueryV2::DatasetReference.new(
              project_id: (pjt || @project),
              dataset_id: dts
            )
          end
        end

        def inspect
          "#{self.class}(#{@project})"
        end

        protected

        # Generate a random string similar to the BigQuery service job IDs.
        def generate_id
          SecureRandom.urlsafe_base64 21
        end

        def mime_type_for file
          mime_type = MiniMime.lookup_by_filename Pathname(file).to_path
          return nil if mime_type.nil?
          mime_type.content_type
        rescue StandardError
          nil
        end

        def execute backoff: nil
          if backoff
            Backoff.new(retries: retries).execute { yield }
          else
            yield
          end
        rescue Google::Apis::Error => e
          raise Google::Cloud::Error.from_error e
        end

        class Backoff
          class << self
            attr_accessor :retries
            attr_accessor :reasons
            attr_accessor :backoff
          end
          self.retries = 5
          self.reasons = %w[rateLimitExceeded backendError]
          self.backoff = lambda do |retries|
            # Max delay is 32 seconds
            # See "Back-off Requirements" here:
            # https://cloud.google.com/bigquery/sla
            retries = 5 if retries > 5
            delay = 2**retries
            sleep delay
          end

          def initialize options = {}
            @retries = (options[:retries] || Backoff.retries).to_i
            @reasons = (options[:reasons] || Backoff.reasons).to_a
            @backoff =  options[:backoff] || Backoff.backoff
          end

          def execute
            current_retries = 0
            loop do
              begin
                return yield
              rescue Google::Apis::Error => e
                raise e unless retry? e.body, current_retries

                @backoff.call current_retries
                current_retries += 1
              end
            end
          end

          protected

          def retry? result, current_retries #:nodoc:
            if current_retries < @retries
              return true if retry_error_reason? result
            end
            false
          end

          def retry_error_reason? err_body
            err_hash = JSON.parse err_body
            json_errors = Array err_hash["error"]["errors"]
            return false if json_errors.empty?
            json_errors.each do |json_error|
              return false unless @reasons.include? json_error["reason"]
            end
            true
          rescue StandardError
            false
          end
        end
      end
    end
  end
end