# Copyright (c) 2011-2012 Hongli Lai
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

require 'mizuho/fuzzystringmatch'

module Mizuho

class IdMap
	class AlreadyAssociatedError < StandardError
	end

	URANDOM = File.open("/dev/urandom", "rb")
	MATCHER = JaroWinklerPure.new
	BANNER =
		"###### Autogenerated by Mizuho, DO NOT EDIT ######\n" <<
		"# This file maps section names to IDs so that the commenting system knows which\n" <<
		"# comments belong to which section. Section names may be changed at will but\n" <<
		"# IDs always stay the same, allowing one to retain old comments even if you\n" <<
		"# rename a section.\n" <<
		"#\n" <<
		"# This file is autogenerated but is not a cache; you MUST NOT DELETE this\n" <<
		"# file and you must check it into your version control system. If you lose\n" <<
		"# this file you may lose the ability to identity old comments.\n" <<
		"#\n" <<
		"# Entries marked with \"fuzzy\" indicate that the section title has changed\n" <<
		"# and that Mizuho has found an ID which appears to be associated with that\n" <<
		"# section. You should check whether it is correct, and if not, fix it.\n\n"

	attr_reader :entries

	def initialize
		@entries = {}
		#@namespace = slug(File.basename(filename, File.extname(filename)))
	end

	def [](title)
		return @entries[title]
	end

	def load(filename_or_io)
		@entries.clear
		open_io(filename_or_io, :read) do |io|
			fuzzy = false
			while true
				begin
					line = io.readline.strip
					if line.empty?
						fuzzy = false
					elsif line == "# fuzzy"
						fuzzy = true
					elsif line !~ /\A#/
						title, id = line.split("\t=>\t", 2)
						add(title, id, fuzzy, false)
						fuzzy = false
					end
				rescue EOFError
					break
				end
			end
		end
		return self
	end

	def save(filename_or_io)
		normal, orphaned = group_and_sort_entries
		open_io(filename_or_io, :write) do |f|
			f.write(BANNER)
			normal.each do |entry|
				f.puts "# fuzzy" if entry.fuzzy?
				f.puts "#{entry.title}	=>	#{entry.id}"
				f.puts
			end
			if !orphaned.empty?
				f.puts
				f.puts "### These sections appear to have been removed. Please check."
				f.puts
				orphaned.each do |entry|
					f.puts "# fuzzy" if entry.fuzzy?
					f.puts "#{entry.title}	=>	#{entry.id}"
					f.puts
				end
			end
		end
	end
	
	def associate(title)
		if entry = @entries[title]
			if entry.associated?
				raise AlreadyAssociatedError, "Cannot associate an already-associated title"
			else
				entry.associated = true
				id = entry.id
			end
		elsif entry = find_similar(title)
			@entries.delete(entry.title)
			@entries[title] = entry
			entry.title = title
			entry.associated = true
			entry.fuzzy = true
			id = entry.id
		else
			id = create_unique_id(title)
			add(title, id, false, true)
		end
		return id
	end

	def add(title, id, *options)
		return @entries[title] = Entry.new(title, id || create_unique_id(title), *options)
	end

	def stats
		fuzzy = 0
		orphaned = 0
		@entries.each_value do |entry|
			fuzzy += 1 if entry.fuzzy?
			orphaned += 1 if !entry.associated?
		end
		return { :fuzzy => fuzzy, :orphaned => orphaned }
	end

private
	# fuzzy
	#   Whether #associate has fuzzily associated a title with this entry.
	#
	# associated
	#   Whether #associate has associated a title with this entry.
	#   Immediately after loading a map file, all entries are marked
	#   as 'not associated'.
	class Entry < Struct.new(:title, :id, :fuzzy, :associated)
		alias fuzzy? fuzzy
		alias associated? associated
		
		def <=>(other)
			return title <=> other.title
		end
	end
	
	def find_similar(title)
		lower_title = title.downcase
		best_score = nil
		best_match = nil
		@entries.each_value do |entry|
			next if entry.associated?
			score = MATCHER.getDistance(entry.title.downcase, lower_title)
			if best_score.nil? || score > best_score
				best_score = score
				best_match = entry
			end
		end
		if best_score && best_score > 0.8
			return best_match
		else
			return nil
		end
	end
	
	def slug(text)
		text = text.downcase
		text.gsub!(/^(\d+\.)+ /, '')
		text.gsub!(/[^a-z0-9\-\_]/i, '-')
		text.gsub!('_', '-')
		text.gsub!(/--+/, '-')
		return text
	end
	
	def create_unique_id(title)
		suffix = URANDOM.read(4).unpack('H*')[0].to_i(16).to_s(36)
		return "#{slug(title)}-#{suffix}"
	end

	def open_io(filename_or_io, mode, &block)
		if mode == :read
			if filename_or_io.respond_to?(:readline)
				yield(filename_or_io)
			else
				File.open(filename_or_io, 'r', &block)
			end
		else
			if filename_or_io.respond_to?(:write)
				yield(filename_or_io)
			else
				File.open(filename_or_io, 'w', &block)
			end
		end
	end

	def group_and_sort_entries
		normal = []
		orphaned = []
		
		@entries.each_value do |entry|
			if entry.associated?
				normal << entry
			else
				orphaned << entry
			end
		end
		
		normal.sort!
		orphaned.sort!

		return [normal, orphaned]
	end
end

end