# encoding: utf-8

# Copyright (C) 2016 Mikio Ikoma

# CURRENTLY NOT USED

require "pp"

# 鬼雲のマニュアルからPOSIX文字クラスの定義を得る
def get_onigmo_posix_char_class(file, hash)
  content = IO.read(file)
  if(!md = content.match(/\r?\n\d\.\s+Character\s+class.+?   Unicode Case:(.+?)\r?\n\r?\n\r?\n/m))
    raise "#{file} format is unmatched"
  end
  posix_def = md[1]
  posix_def.gsub!(/\r?\n+/m, "\n")
  posix_def.gsub!(/\|\r?\n/m, "|")
  posix_def.split(/\r?\n/).each do | line |
    elems = line.split(/\s+/)
    if(elems[1] && elems[1].match(/^\w+$/) && elems[2])
      raise "Duplicated symbol #{elems[1]}" if hash[elems[1]]
      hash[elems[1]] = elems[2..-1].join("")
    end
  end
end

# 鬼雲のマニュアルからUnicode文字クラスの一覧を得る
def get_onigmo_unicode_propety_class(file, hash)
  content = IO.read(file)
  class_name = nil
  content.split(/\r?\n/).each do | line |
    if(line[0..0] == "*")
      class_name = line[2..-1].gsub(/\W+/, "_")
      class_name.chop! if(class_name[-1..-1] == "_")
      next
    end
    next if(!class_name || line.length == 0)
    prop_name = line.gsub(/^\s+/, "")
    raise "Duplicated symbol #{prop_name}" if hash[prop_name]
    hash[prop_name] = class_name.to_sym
  end
end

hash = {}
get_onigmo_posix_char_class("../contrib/onigmo/RE.txt", hash)
# get_onigmo_unicode_propety_class("../contrib/onigmo/UnicodeProps.txt", hash)
pp hash
exit


# Unicode定義ファイルの共通文法の処理
def read_unicode_line(file)
  content = IO.read(file)
  content.split(/\r?\n/).each do | line |
    next if(line.length == 0 || line[0..0] == '#')
    yield(line)
  end
end

# スクリプトファイルの読み込み
def read_scripts(scripts_file, ranges)
  read_unicode_line(scripts_file) do | line |
    if(md = line.match(/^(\h{4,6})(?:\.\.(\h{4,6}))?\s+;\s+(\w+)\s+#\s+(\S+)\s+/))
      range_start = md[1].hex
      range_end   = (md[2])?(md[2].hex):(range_start)

      script1 = md[3]
      script2 = md[4]
      script2 = "LC" if(script2 == "L&")
      script3 = script2[0..0]

      #puts "range: [#{range_start}:#{range_end}]\t#{script1}\t#{script2}"
      [script1, script2, script3].each do | script |
        if(ranges[script])
          if(range_start == ranges[script][-1][1] + 1)
            ranges[script][-1][1] = range_end
          else
            ranges[script].push [range_start, range_end]
          end
        else
          ranges[script] = [[range_start, range_end]]
        end
      end
        
    else
      raise "syntax error: #{line}"
    end
  end
end

# ブロックファイルの読み込み
def read_blocks(blocks_file, ranges)
  read_unicode_line(blocks_file) do | line |
    if(md = line.match(/^(\h{4,6})\.\.(\h{4,6})\s*;\s+(.+)$/))
      range_start = md[1].hex
      range_end   = md[2].hex
      block_name = "In_" + md[3].gsub(/\W/, "_")
      if ranges[block_name]
        raise "block name #{block_name} is already used"
      else
        ranges[block_name] = [[range_start, range_end]]
      end
    end
  end
end

# Unicodeのスクリプト、ブロックに対応したTRangeのRubyソースの出力
def puts_unicode_ranges(unicode_file, ranges)
  ranges_source = ranges.keys.map { |class_name|
    (" "*12) +
    "hash[\"#{class_name}\"] = CharClass.new([" +
    ( ranges[class_name].map{|range| "TRange.new(#{range[0]}, #{range[1]})"}.join(", ") ) +
    "])"
  }.join("\n")
  
  template =<<"  END_OF_TEMPLATE"
    # encoding: utf-8
    # DO NOT Modify This File Since Automatically Generated

    # Unicodeのレンジ
    module Regextest::Front::ParseUnicode
      class Unicode
        # ハッシュの生成
        def self.ranges()
          hash = {}
#{ranges_source}
          hash
        end
      end
    end

    # Test suite (execute when this file is specified in command line)
    if __FILE__ == $0 
    end
  END_OF_TEMPLATE
  template.gsub!(/^    /, "")
  File.open(unicode_file, "w") do |fp|
    fp.puts template
  end
  
end

ranges = {}
read_scripts("./unicode/Scripts.txt", ranges)
read_blocks("./unicode/Blocks.txt", ranges)
puts_unicode_ranges('tst-reg-parse-unicode', ranges)
# pp ranges