Sha256: 0880f68c8eae033aa6af1cf4e7dc25a303d59a5d03790e651678e46f92f493f1

Contents?: true

Size: 1.34 KB

Versions: 9

Compression:

Stored size: 1.34 KB

Contents

require 'active_support/core_ext'
require 'libxml'
require 'oj'

#
# (compressed) bulk files can be posted using curl, e.g. with the following command
#
# gzip -c -d aleph.PRIMO.20120908.091506.1.es_bulk.gz | curl -XPOST 'localhost:9200/catalog/record/_bulk' --data-binary @-
#
module Mabmapper
  class ElasticSearchWriter

    def self.out_file(output_dir_name, file_name, options = {})
      file_basename = File.basename(file_name).gsub(/\.tar.gz|\.tar|\.tgz/, '.es_bulk')
      file_basename << '.gz' if options[:will_be_gziped] === true

      File.join(output_dir_name, file_basename)
    end

    def initialize(io)
      @io = io

      # Set libxml as minixml backend to improve performance
      ActiveSupport::XmlMini.backend = 'LibXML'
    end

    def add_file(name, mode) # :yields: io
      yield self
    end

    def add_file_simple(name, mode, size) # :yields: io
      yield self
    end

    def close
      @io.close unless @io.closed?
    end

    # we assume that data is string serialized xml
    def write(xml)
      bulk = []
      hash = Hash.from_xml(xml)

      bulk.push(Oj.dump({ index: { _id: "#{hash['document']['id']}" }}, mode: :compat))
      bulk.push(Oj.dump(hash['document'], mode: :compat))

      # Beware, right positions of newlines is vital for elasticsearch bulk import
      @io.write(bulk.join("\n") << "\n")
    end

  end
end

Version data entries

9 entries across 9 versions & 1 rubygems

Version Path
mabmapper-2.0.4 lib/mabmapper/elasticsearch_writer.rb
mabmapper-2.0.3 lib/mabmapper/elasticsearch_writer.rb
mabmapper-2.0.2 lib/mabmapper/elasticsearch_writer.rb
mabmapper-2.0.1 lib/mabmapper/elasticsearch_writer.rb
mabmapper-2.0.0 lib/mabmapper/elasticsearch_writer.rb
mabmapper-1.0.0.pre18 lib/mabmapper/elasticsearch_writer.rb
mabmapper-1.0.0.pre17 lib/mabmapper/elasticsearch_writer.rb
mabmapper-1.0.0.pre16 lib/mabmapper/elasticsearch_writer.rb
mabmapper-1.0.0.pre15 lib/mabmapper/elasticsearch_writer.rb