Sha256: 475f7d419701d064aca601438770bbfe438d815dd8812778ff8b16f161fb35f4

Contents?: true

Size: 1.92 KB

Versions: 15

Compression:

Stored size: 1.92 KB

Contents

# coding: utf-8

module ONIX

  # A utility class that processes the code list XSD from the ONIX spec and
  # creates a set of TSV files. The generated files are used by this library
  # to make hashes of the code lists available to users.
  #
  class CodeListExtractor

    # Creates a new extractor. Expects the path to a copy of the code lists
    # file from the spec (called ONIX_BookProduct_CodeLists.xsd on my system).
    #
    def initialize(filename)
      raise ArgumentError, "#{filename} not found" unless File.file?(filename)

      @filename = filename
    end

    # generate a set of TSV files in the given directory. Creates the directory
    # if it doesn't exist and will overwrite existing files.
    #
    def run(dir)
      FileUtils.mkdir_p(dir) unless File.directory?(dir)

      each_list do |number, data|
        #puts number
        file = number.to_s.rjust(3, "0") + ".tsv"
        path = File.join(dir, file)
        File.open(path, "w") { |f| f.write data}
      end
    end

    private

    def data
      @data ||= File.open(@filename) { |f| f.read }
    end

    def document
      @document ||= Nokogiri::XML(data)
      @document.remove_namespaces! if @document.namespaces.size > 0
      @document
    end

    def each_list(&block)
      document.xpath("//simpleType").each do |node|
        list_name   = node.xpath("./@name").first.value
        list_number = list_name[/List(\d+)/,1].to_i
        if list_number > 0
          yield list_number, list_data(list_number)
        end
      end
    end

    def list_data(num)
      str   = ""
      nodes = document.xpath("//simpleType[@name='List#{num}']/restriction/enumeration")
      nodes.each do |node|
        code  = node.xpath("./@value").first.value
        desc  = node.xpath("./annotation/documentation").first.text
        ldesc = node.xpath("./annotation/documentation").last.text
        str += "#{code}\t#{desc}\t#{ldesc}\n"
      end
      str
    end

  end
end

Version data entries

15 entries across 15 versions & 2 rubygems

Version Path
onix-0.9.5 lib/onix/code_list_extractor.rb
onix-0.9.4 lib/onix/code_list_extractor.rb
milkfarm-onix-0.8.13 lib/onix/code_list_extractor.rb
milkfarm-onix-0.8.12 lib/onix/code_list_extractor.rb
onix-0.9.3 lib/onix/code_list_extractor.rb
onix-0.9.2 lib/onix/code_list_extractor.rb
onix-0.9.1 lib/onix/code_list_extractor.rb
milkfarm-onix-0.8.11 lib/onix/code_list_extractor.rb
milkfarm-onix-0.8.10 lib/onix/code_list_extractor.rb
onix-0.9.0 lib/onix/code_list_extractor.rb
milkfarm-onix-0.8.9 lib/onix/code_list_extractor.rb
milkfarm-onix-0.8.8 lib/onix/code_list_extractor.rb
milkfarm-onix-0.8.7 lib/onix/code_list_extractor.rb
onix-0.8.5 lib/onix/code_list_extractor.rb
onix-0.8.4 lib/onix/code_list_extractor.rb