module MemDump
    class MemoryDump
        attr_reader :address_to_record

        def initialize(address_to_record)
            @address_to_record = address_to_record
            @forward_graph = nil
            @backward_graph = nil
        end

        def include?(address)
            address_to_record.has_key?(address)
        end

        def each_record(&block)
            address_to_record.each_value(&block)
        end

        def addresses
            address_to_record.keys
        end

        def size
            address_to_record.size
        end

        def find_by_address(address)
            address_to_record[address]
        end

        def inspect
            to_s
        end

        def save(io_or_path)
            if io_or_path.respond_to?(:open)
                io_or_path.open 'w' do |io|
                    save(io)
                end
            else
                each_record do |r|
                    io_or_path.puts JSON.dump(r)
                end
            end
        end

        # Filter the records
        #
        # @yieldparam record a record
        # @yieldreturn [Object] the record object that should be included in the
        #   returned dump
        # @return [MemoryDump]
        def find_all
            return enum_for(__method__) if !block_given?

            address_to_record = Hash.new
            each_record do |r|
                if yield(r)
                    address_to_record[r['address']] = r
                end
            end
            MemoryDump.new(address_to_record)
        end
        
        # Map the records
        #
        # @yieldparam record a record
        # @yieldreturn [Object] the record object that should be included in the
        #   returned dump
        # @return [MemoryDump]
        def map
            return enum_for(__method__) if !block_given?

            address_to_record = Hash.new
            each_record do |r|
                address_to_record[r['address']] = yield(r.dup).to_hash
            end
            MemoryDump.new(address_to_record)
        end

        # Filter the entries, removing those for which the block returns falsy
        #
        # @yieldparam record a record
        # @yieldreturn [nil,Object] either a record object, or falsy to remove
        #   this record in the returned dump
        # @return [MemoryDump]
        def find_and_map
            return enum_for(__method__) if !block_given?

            address_to_record = Hash.new
            each_record do |r|
                if result = yield(r.dup)
                    address_to_record[r['address']] = result.to_hash
                end
            end
            MemoryDump.new(address_to_record)
        end

        # Return the records of a given type
        #
        # @param [String] name the type
        # @return [MemoryDump] the matching records
        #
        # @example return all ICLASS (singleton) records
        #   objects_of_class("ICLASS")
        def objects_of_type(name)
            find_all { |r| name === r['type'] }
        end

        # Return the records of a given class
        #
        # @param [String] name the class
        # @return [MemoryDump] the matching entries
        #
        # @example return all string records
        #   objects_of_class("String")
        def objects_of_class(name)
            find_all { |r| name === r['class'] }
        end

        # Return the entries that refer to the entries in the dump
        #
        # @param [MemoryDump] the set of entries whose parents we're looking for
        # @param [Integer] min only return the entries in self that refer to
        #   more than this much entries in 'dump'
        # @param [Boolean] exclude_dump exclude the entries that are already in
        #   'dump'
        # @return [(MemoryDump,Hash)] the parent entries, and a mapping from
        #   records in the parent entries to the count of entries in 'dump' they
        #   refer to
        def parents_of(dump, min: 0, exclude_dump: false)
            children = dump.addresses.to_set
            counts = Hash.new
            filtered = find_all do |r|
                next if exclude_dump && children.include?(r['address'])

                count = r['references'].count { |r| children.include?(r) }
                if count > min
                    counts[r] = count
                    true
                end
            end
            return filtered, counts
        end

        # Remove entries from this dump, keeping the transitivity in the
        # remaining graph
        #
        # @param [MemoryDump] entries entries to remove
        #
        # @example remove all entries that are of type HASH
        #    collapse(objects_of_type('HASH'))
        def collapse(entries)
            collapsed_entries = Hash.new
            entries.each_record do |r|
                collapsed_entries[r['address']] = r['references'].dup
            end


            # Remove references in-between the entries to collapse
            already_expanded = Hash.new { |h, k| h[k] = Set[k] }
            begin
                changed_entries  = Hash.new
                collapsed_entries.each do |address, references|
                    sets = references.classify { |ref_address| collapsed_entries.has_key?(ref_address) }
                    updated_references = sets[false] || Set.new
                    if to_collapse = sets[true]
                        to_collapse.each do |ref_address|
                            next if already_expanded[address].include?(ref_address)
                            updated_references.merge(collapsed_entries[ref_address])
                        end
                        already_expanded[address].merge(to_collapse)
                        changed_entries[address] = updated_references
                    end
                end
                puts "#{changed_entries.size} changed entries"
                collapsed_entries.merge!(changed_entries)
            end while !changed_entries.empty?

            find_and_map do |record|
                next if collapsed_entries.has_key?(record['address'])

                sets = record['references'].classify do |ref_address|
                    collapsed_entries.has_key?(ref_address)
                end
                updated_references = sets[false] || Set.new
                if to_collapse = sets[true]
                    to_collapse.each do |ref_address|
                        updated_references.merge(collapsed_entries[ref_address])
                    end
                    record = record.dup
                    record['references'] = updated_references
                end
                record
            end
        end

        # Remove entries from the dump, and all references to them
        #
        # @param [MemoryDump] the set of entries to remove, as e.g. returned by
        #   {#objects_of_class}
        # @return [MemoryDump] the filtered dump
        def without(entries)
            find_and_map do |record|
                next if entries.include?(record['address'])
                record_refs = record['references']
                references = record_refs.find_all { |r| !entries.include?(r) }
                if references.size != record_refs.size
                    record = record.dup
                    record['references'] = references.to_set
                end
                record
            end
        end

        # Write the dump to a GML file that can loaded by Gephi
        #
        # @param [Pathname,String,IO] the path or the IO stream into which we should
        #   dump
        def to_gml(io_or_path)
            if io_or_path.kind_of?(IO)
                MemDump.convert_to_gml(self, io_or_path)
            else
                Pathname(io_or_path).open 'w' do |io|
                    to_gml(io)
                end
            end
            nil
        end

        # Save the dump
        def save(io_or_path)
            if io_or_path.kind_of?(IO)
                each_record do |r|
                    r = r.dup
                    r['address'] = r['address'].gsub(/:\d+$/, '')
                    if r['class_address']
                        r['class_address'] = r['class_address'].gsub(/:\d+$/, '')
                    elsif r['address']
                        r['address'] = r['address'].gsub(/:\d+$/, '')
                    end
                    r['references'] = r['references'].map { |ref_addr| ref_addr.gsub(/:\d+$/, '') }
                    io_or_path.puts JSON.dump(r)
                end
                nil
            else
                Pathname(io_or_path).open 'w' do |io|
                    save(io)
                end
            end
        end

        COMMON_COLLAPSE_TYPES = %w{IMEMO HASH ARRAY}
        COMMON_COLLAPSE_CLASSES = %w{Set RubyVM::Env}

        # Perform common initial cleanup
        #
        # It basically removes common classes that usually make a dump analysis
        # more complicated without providing more information
        #
        # Namely, it collapses internal Ruby node types ROOT and IMEMO, as well
        # as common collection classes {COMMON_COLLAPSE_CLASSES}.
        #
        # One usually analyses a cleaned-up dump before getting into the full
        # dump
        #
        # @return [MemDump] the filtered dump
        def common_cleanup
            without_weakrefs = remove(objects_of_class 'WeakRef')
            to_collapse = without_weakrefs.find_all do |r|
                COMMON_COLLAPSE_CLASSES.include?(r['class']) ||
                    COMMON_COLLAPSE_TYPES.include?(r['type']) ||
                    r['method'] == 'dump_all'
            end
            without_weakrefs.collapse(to_collapse)
        end

        # Remove entries in the reference for which we can't find an object with
        # the matching address
        #
        # @return [(MemoryDump,Set)] the filtered dump and the set of missing addresses found
        def remove_invalid_references
            addresses = self.addresses.to_set
            missing = Set.new
            result = map do |r|
                common = (addresses & r['references'])
                if common.size != r['references'].size
                    missing.merge(r['references'] - common)
                end
                r = r.dup
                r['references'] = common
                r
            end
            return result, missing
        end

        # Return the graph of object that keeps objects in dump alive
        #
        # It contains only the shortest paths from the roots to the objects in
        # dump
        #
        # @param [MemoryDump] dump
        # @return [MemoryDump]
        def roots_of(dump, root_dump: nil)
            if root_dump && root_dump.empty?
                raise ArgumentError, "no roots provided"
            end

            root_addresses =
                if root_dump then root_dump.addresses
                else
                    ['ALL_ROOTS']
                end

            ensure_graphs_computed

            result_nodes = Set.new
            dump_addresses = dump.addresses
            root_addresses.each do |root_address|
                visitor = RGL::DijkstraVisitor.new(@forward_graph)
                dijkstra = RGL::DijkstraAlgorithm.new(@forward_graph, Hash.new(1), visitor)
                dijkstra.find_shortest_paths(root_address)
                path_builder = RGL::PathBuilder.new(root_address, visitor.parents_map)

                dump_addresses.each_with_index do |record_address, record_i|
                    if path = path_builder.path(record_address)
                        result_nodes.merge(path)
                    end
                end
            end

            find_and_map do |record|
                address = record['address']
                next if !result_nodes.include?(address)

                # Prefer records in 'dump' to allow for annotations in the
                # source
                record = dump.find_by_address(address) || record
                record = record.dup
                record['references'] = result_nodes & record['references']
                record
            end
        end

        def minimum_spanning_tree(root_dump)
            if root_dump.size != 1
                raise ArgumentError, "there should be exactly one root"
            end
            root_address, _ = root_dump.address_to_record.first
            if !(root = address_to_record[root_address])
                raise ArgumentError, "no record with address #{root_address} in self"
            end

            ensure_graphs_computed

            mst = @forward_graph.minimum_spanning_tree(root)
            map = Hash.new
            mst.each_vertex do |record|
                record = record.dup
                record['references'] = record['references'].dup
                record['references'].delete_if { |ref_address| !mst.has_vertex?(ref_address) }
            end
            MemoryDump.new(map)
        end

        # @api private
        #
        # Ensure that @forward_graph and @backward_graph are computed
        def ensure_graphs_computed
            if !@forward_graph
                @forward_graph, @backward_graph = compute_graphs
            end
        end

        # @api private
        #
        # Force recomputation of the graph representation of the dump the next
        # time it is needed
        def clear_graph
            @forward_graph = nil
            @backward_graph = nil
        end

        # @api private
        #
        # Create two RGL::DirectedAdjacencyGraph, for the forward and backward edges of the graph
        def compute_graphs
            forward_graph  = RGL::DirectedAdjacencyGraph.new
            forward_graph.add_vertex 'ALL_ROOTS'
            address_to_record.each do |address, record|
                forward_graph.add_vertex(address)

                if record['type'] == 'ROOT'
                    forward_graph.add_edge('ALL_ROOTS', address)
                end
                record['references'].each do |ref_address|
                    forward_graph.add_edge(address, ref_address)
                end
            end

            backward_graph  = RGL::DirectedAdjacencyGraph.new
            forward_graph.each_edge do |u, v|
                backward_graph.add_edge(v, u)
            end
            return forward_graph, backward_graph
        end

        def depth_first_visit(root, &block)
            ensure_graphs_computed
            @forward_graph.depth_first_visit(root, &block)
        end

        # Validate that all reference entries have a matching dump entry
        #
        # @raise [RuntimeError] if references have been found
        def validate_references
            addresses = self.addresses.to_set
            each_record do |r|
                common = addresses & r['references']
                if common.size != r['references'].size
                    missing = r['references'] - common
                    raise "#{r} references #{missing.to_a.sort.join(", ")} which do not exist"
                end
            end
            nil
        end

        # Get a random sample of the records
        #
        # The sampling is random, so the returned set might be bigger or smaller
        # than expected. Do not use on small sets.
        #
        # @param [Float] the ratio of selected samples vs. total samples (0.1
        #   will select approximately 10% of the samples)
        def sample(ratio)
            result = Hash.new
            each_record do |record|
                if rand <= ratio
                    result[record['address']] = record
                end
            end
            MemoryDump.new(result)
        end

        # @api private
        #
        # Return the set of record addresses that are the addresses of roots in
        # the live graph
        #
        # @return [Set<String>]
        def root_addresses
            roots = self.addresses.to_set.dup
            each_record do |r|
                roots.subtract(r['references'])
            end
            roots
        end

        # Returns the set of roots
        def roots(with_keepalive_count: false)
            result = Hash.new
            self.root_addresses.each do |addr|
                record = find_by_address(addr)
                if with_keepalive_count
                    record = record.dup
                    count = 0
                    depth_first_visit(addr) { count += 1 }
                    record['keepalive_count'] = count
                end
                result[addr] = record
            end
            MemoryDump.new(result)
        end

        def add_children(roots, with_keepalive_count: false)
            result = Hash.new
            roots.each_record do |root_record|
                result[root_record['address']] = root_record

                root_record['references'].each do |addr|
                    ref_record = find_by_address(addr)
                    next if !ref_record

                    if with_keepalive_count
                        ref_record = ref_record.dup
                        count = 0
                        depth_first_visit(addr) { count += 1 }
                        ref_record['keepalive_count'] = count
                    end
                    result[addr] = ref_record
                end
            end
            MemoryDump.new(result)
        end

        def dup
            find_all { true }
        end

        # Simply remove the given objects
        def remove(objects)
            removed_addresses = objects.addresses.to_set
            return dup if removed_addresses.empty?

            find_and_map do |r|
                if !removed_addresses.include?(r['address'])
                    references = r['references'].dup
                    references.delete_if { |a| removed_addresses.include?(a) }
                    r['references'] = references
                    r
                end
            end
        end

        # Remove all components that are smaller than the given number of nodes
        #
        # It really looks only at the number of nodes reachable from a root
        # (i.e. won't notice if two smaller-than-threshold roots have nodes in
        # common)
        def remove_small_components(max_size: 1)
            roots = self.addresses.to_set.dup
            leaves  = Set.new
            each_record do |r|
                refs = r['references']
                if refs.empty?
                    leaves << r['address']
                else
                    roots.subtract(r['references'])
                end
            end

            to_remove = Set.new
            roots.each do |root_address|
                component = Set[]
                queue = Set[root_address]
                while !queue.empty? && (component.size <= max_size)
                    address = queue.first
                    queue.delete(address)
                    next if component.include?(address)
                    component << address
                    queue.merge(address_to_record[address]['references'])
                end

                if component.size <= max_size
                    to_remove.merge(component)
                end
            end

            without(find_all { |r| to_remove.include?(r['address']) })
        end

        def stats
            unknown_class = 0
            by_class = Hash.new(0)
            each_record do |r|
                if klass = (r['class'] || r['type'] || r['root'])
                    by_class[klass] += 1
                else
                    unknown_class += 1
                end
            end
            return unknown_class, by_class
        end

        # Compute the set of records that are not in self but are in to
        #
        # @param [MemoryDump]
        # @return [MemoryDump]
        def diff(to)
            diff = Hash.new
            to.each_record do |r|
                address = r['address']
                if !@address_to_record.include?(address)
                    diff[address] = r
                end
            end
            MemoryDump.new(diff)
        end

        # Compute the interface between self and the other dump, that is the
        # elements of self that have a child in dump, and the elements of dump
        # that have a parent in self
        def interface_with(dump)
            self_border = Hash.new
            dump_border = Hash.new
            each_record do |r|
                next if dump.find_by_address(r['address'])

                refs_in_dump = r['references'].map do |addr|
                    dump.find_by_address(addr)
                end.compact

                if !refs_in_dump.empty?
                    self_border[r['address']] = r
                    refs_in_dump.each do |child|
                        dump_border[child['address']] = child.dup
                    end
                end
            end

            self_border = MemoryDump.new(self_border)
            dump_border = MemoryDump.new(dump_border)

            dump.update_keepalive_count(dump_border)
            return self_border, dump_border
        end

        # Replace all objects in dump by a single "group" object
        def group(name, dump, attributes = Hash.new)
            group_addresses   = Set.new
            group_references  = Set.new
            dump.each_record do |r|
                group_addresses << r['address']
                group_references.merge(r['references'])
            end
            group_record = attributes.dup
            group_record['address']    = name
            group_record['references'] = group_references - group_addresses

            updated = Hash[name => group_record]
            each_record do |record|
                next if group_addresses.include?(record['address'])

                updated_record = record.dup
                updated_record['references'] -= group_addresses
                if updated_record['references'].size != record['references'].size
                    updated_record['references'] << name
                end

                if group_addresses.include?(updated_record['class_address'])
                    updated_record['class_address'] = name
                end
                if group_addresses.include?(updated_record['class'])
                    updated_record['class'] = name
                end

                updated[updated_record['address']] = updated_record
            end

            MemoryDump.new(updated)
        end

        def update_keepalive_count(dump)
            ensure_graphs_computed
            dump.each_record do |record|
                count = 0
                dump.depth_first_visit(record['address']) { |obj| count += 1 }
                record['keepalive_count'] = count
                record
            end
        end

        def replace_class_id_by_class_name(add_reference_to_class: false)
            MemDump.replace_class_address_by_name(self, add_reference_to_class: add_reference_to_class)
        end

        def to_s
            "#<MemoryDump size=#{size}>"
        end
    end
end