require 'traject/marc_extractor'
require 'traject/translation_map'
require 'base64'
require 'json'

module Traject::Macros
  # Some of these may be generic for any MARC, but we haven't done
  # the analytical work to think it through, some of this is
  # def specific to Marc21.
  module Marc21

    # A combo function macro that will extract data from marc according to a string
    # field/substring specification, then apply various optional post-processing to it too.
    #
    # First argument is a string spec suitable for the MarcExtractor, see
    # MarcExtractor::parse_string_spec.
    #
    # Second arg is optional options, including options valid on MarcExtractor.new,
    # and others. (TODO)
    #
    # * :first => true: take only first value
    # * :translation_map => String: translate with named translation map looked up in load
    #       path, uses Tranject::TranslationMap.new(translation_map_arg)
    # * :trim_punctuation => true; trims leading/trailing punctuation using standard algorithms that
    #     have shown themselves useful with Marc, using Marc21.trim_punctuation
    # * :default => String: if otherwise empty, add default value
    #
    # Examples:
    #
    # to_field("title"), extract_marc("245abcd", :trim_punctuation => true)
    # to_field("id"),    extract_marc("001", :first => true)
    # to_field("geo"),   extract_marc("040a", :seperator => nil, :translation_map => "marc040")
    def extract_marc(spec, options = {})
      only_first              = options.delete(:first)
      trim_punctuation        = options.delete(:trim_punctuation)
      default_value           = options.delete(:default)

      # We create the TranslationMap here on load, not inside the closure
      # where it'll be called for every record. Since TranslationMap is supposed
      # to cache, prob doesn't matter, but doens't hurt. Also causes any syntax
      # exceptions to raise on load.
      if translation_map_arg  = options.delete(:translation_map)
        translation_map = Traject::TranslationMap.new(translation_map_arg)
      end

      lambda do |record, accumulator, context|
        accumulator.concat Traject::MarcExtractor.extract_by_spec(record, spec, options)

        if only_first
          Marc21.first! accumulator
        end

        if translation_map
          translation_map.translate_array! accumulator
        end

        if trim_punctuation
          accumulator.collect! {|s| Marc21.trim_punctuation(s)}
        end

        if default_value && accumulator.empty?
          accumulator << default_value
        end
      end
    end

    # Serializes complete marc record to a serialization format.
    # required param :format,
    # serialize_marc(:format => :binary)
    #
    # formats:
    # [xml] MarcXML
    # [json] marc-in-json (http://dilettantes.code4lib.org/blog/2010/09/a-proposal-to-serialize-marc-in-json/)
    # [binary] Standard ISO 2709 binary marc. By default WILL be base64-encoded,
    #          assumed destination a solr 'binary' field.
    #          add option `:binary_escape => false` to do straight binary -- unclear
    #          what Solr's documented behavior is when you do this, and add a string
    #          with binary control chars to solr. May do different things in diff
    #          Solr versions, including raising exceptions.
    def serialized_marc(options)
      options[:format] = options[:format].to_s
      raise ArgumentError.new("Need :format => [binary|xml|json] arg") unless %w{binary xml json}.include?(options[:format])

      lambda do |record, accumulator, context|
        case options[:format]
        when "binary"
          binary = record.to_marc
          binary = Base64.encode64(binary) unless options[:binary_escape] == false
          accumulator << binary
        when "xml"
          # ruby-marc #to_xml returns a REXML object at time of this writing, bah!@
          # call #to_s on it. Hopefully that'll be forward compatible.
          accumulator << record.to_xml.to_s
        when "json"
          accumulator << JSON.dump(record.to_hash)
        end
      end
    end

    # Takes the whole record, by default from tags 100 to 899 inclusive,
    # all subfields, and adds them to output. Subfields in a record are all
    # joined by space by default.
    #
    # options
    # [:from] default 100, only tags >= lexicographically
    # [:to]   default 899, only tags <= lexicographically
    # [:seperator] how to join subfields, default space, nil means don't join
    #
    # All fields in from-to must be marc DATA (not control fields), or weirdness
    #
    # Can always run this thing multiple times on the same field if you need
    # non-contiguous ranges of fields.
    def extract_all_marc_values(options = {})
      options = {:from => "100", :to => "899", :seperator => ' '}.merge(options)

      lambda do |record, accumulator, context|
        record.each do |field|
          next unless field.tag >= options[:from] && field.tag <= options[:to]
          subfield_values = field.subfields.collect {|sf| sf.value}
          next unless subfield_values.length > 0

          if options[:seperator]
            accumulator << subfield_values.join( options[:seperator])
          else
            accumulator.concat subfield_values
          end
        end
      end

    end


    # Trims punctuation mostly from end, and occasionally from beginning
    # of string. Not nearly as complex logic as SolrMarc's version, just
    # pretty simple.
    #
    # Removes
    # * trailing: comma, slash, semicolon, colon (possibly preceded and followed by whitespace)
    # * trailing period if it is preceded by at least three letters (possibly preceded and followed by whitespace)
    # * single square bracket characters if they are the start and/or end
    #   chars and there are no internal square brackets.
    #
    # Returns altered string, doesn't change original arg.
    def self.trim_punctuation(str)
      str = str.sub(/ *[ ,\/;:] *\Z/, '')
      str = str.sub(/ *(\w\w\w)\. *\Z/, '\1')
      str = str.sub(/\A\[?([^\[\]]+)\]?\Z/, '\1')
      return str
    end

    def self.first!(arr)
      # kind of esoteric, but slice used this way does mutating first, yep
      arr.slice!(1, arr.length)
    end

  end
end