lib/unicoder/builder.rb in unicoder-0.1.0 vs lib/unicoder/builder.rb in unicoder-1.0.0

- old
+ new

@@ -1,73 +1,135 @@ require "json" +require "rubygems/util" module Unicoder # A builder defines a parse function which translates one (ore more) unicode data # files into an index hash module Builder - attr_reader :index + attr_reader :index, :formats, :option + attr_writer :option - def initialize(unicode_version = nil) - @unicode_version = unicode_version + def formats + { + marshal: { + ext: ".marshal", + }, + json: { + ext: ".json", + option: "charkeys+stringfractions" + }, + esm: { + ext: ".mjs", + option: "charkeys+stringfractions" + } + } + end + + def meta + { + META: { + generator: "unicoder v#{Unicoder::VERSION}", + unicodeVersion: @unicode_version, + }, + } + end + + def initialize(unicode_version = nil, emoji_version = nil, format = nil) + @unicode_version = unicode_version || CURRENT_UNICODE_VERSION + @emoji_version = emoji_version || CURRENT_EMOJI_VERSION + @option = formats[format.to_sym] ? formats[format.to_sym][:option] || "" : "" initialize_index end def initialize_index @index = {} end - def assign_codepoint(codepoint, value, index = @index) - index[codepoint] = value + def assign_codepoint(codepoint, value, idx = @index) + if option =~ /charkeys/ + idx[[codepoint].pack("U*")] = value + else + idx[codepoint] = value + end end + def assign(sub_index_name, codepoint, value) + assign_codepoint(codepoint, value, index[sub_index_name]) + end + def parse! raise ArgumentError, "abstract" end def parse_file(identifier, parse_mode, **parse_options) filename = UNICODE_FILES[identifier.to_sym] || filename raise ArgumentError, "No valid file identifier or filename given" if !filename - filename.sub! 'VERSION', @unicode_version - Downloader.fetch(identifier) unless File.exists?(filename) + filename = filename.dup + filename.sub! 'UNICODE_VERSION', @unicode_version + filename.sub! 'EMOJI_VERSION', @emoji_version + filename.sub! 'EMOJI_RELATED_VERSION', EMOJI_RELATED_UNICODE_VERSIONS[@emoji_version] + filename.sub! '.zip', '' + filename.sub! /\A(https?|ftp):\//, "" + Downloader.fetch(identifier) unless File.exist?(LOCAL_DATA_DIRECTORY + filename) file = File.read(LOCAL_DATA_DIRECTORY + filename) if parse_mode == :line file.each_line{ |line| yield Hash[ $~.names.zip( $~.captures ) ] if line =~ parse_options[:regex] } + elsif parse_mode == :xml + require "oga" + yield Oga.parse_xml(file) + else + yield file end end def export(format: :marshal, **options) p index if options[:verbose] + if options[:meta] + idx = meta.merge(index) + else + idx = index + end + + case format.to_sym when :marshal - index_file = Marshal.dump(index) + index_file = Marshal.dump(idx) when :json - index_file = JSON.dump(index) + index_file = JSON.dump(idx) + when :esm + index_file = "export default " + JSON.dump(idx) end - # if false# || options[:gzip] if options[:gzip] - Gem.gzip(index_file) + Gem::Util.gzip(index_file) else index_file end end - + def self.build(identifier, **options) format = options[:format] || :marshal require_relative "builders/#{identifier}" # require "unicoder/builders/#{identifier}" builder_class = self.const_get(identifier.to_s.gsub(/(?:^|_)([a-z])/){ $1.upcase }) - builder = builder_class.new(options[:unicode_version] || CURRENT_UNICODE_VERSION) + builder = builder_class.new( + options[:unicode_version], + options[:emoji_version], + format + ) puts "Building index for #{identifier}…" + if options[:option] + builder.option = options[:option] + end builder.parse! - index_file = builder.export(options) + index_file = builder.export(**options) destination ||= options[:destination] || identifier.to_s - destination += ".#{format}" + destination += "#{builder.formats.dig(format.to_sym, :ext)}" destination += ".gz" if options[:gzip] bytes = File.write destination, index_file puts "Index created at: #{destination} (#{bytes} bytes written)" end