# frozen_string_literal: true

module SyntaxTree
  module YARV
    # This class represents a control flow graph of a YARV instruction sequence.
    # It constructs a graph of basic blocks that hold subsets of the list of
    # instructions from the instruction sequence.
    #
    # You can use this class by calling the ::compile method and passing it a
    # YARV instruction sequence. It will return a control flow graph object.
    #
    #     iseq = RubyVM::InstructionSequence.compile("1 + 2")
    #     iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a)
    #     cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
    #
    class ControlFlowGraph
      # This class is responsible for creating a control flow graph from the
      # given instruction sequence.
      class Compiler
        # This is the instruction sequence that is being compiled.
        attr_reader :iseq

        # This is a hash of indices in the YARV instruction sequence that point
        # to their corresponding instruction.
        attr_reader :insns

        # This is a hash of labels that point to their corresponding index into
        # the YARV instruction sequence. Note that this is not the same as the
        # index into the list of instructions on the instruction sequence
        # object. Instead, this is the index into the C array, so it includes
        # operands.
        attr_reader :labels

        def initialize(iseq)
          @iseq = iseq

          @insns = {}
          @labels = {}

          length = 0
          iseq.insns.each do |insn|
            case insn
            when Instruction
              @insns[length] = insn
              length += insn.length
            when InstructionSequence::Label
              @labels[insn] = length
            end
          end
        end

        # This method is used to compile the instruction sequence into a control
        # flow graph. It returns an instance of ControlFlowGraph.
        def compile
          blocks = build_basic_blocks

          connect_basic_blocks(blocks)
          prune_basic_blocks(blocks)

          ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify)
        end

        private

        # Finds the indices of the instructions that start a basic block because
        # they're either:
        #
        # * the start of an instruction sequence
        # * the target of a branch
        # * fallen through to from a branch
        #
        def find_basic_block_starts
          block_starts = Set.new([0])

          insns.each do |index, insn|
            branch_targets = insn.branch_targets

            if branch_targets.any?
              branch_targets.each do |branch_target|
                block_starts.add(labels[branch_target])
              end

              block_starts.add(index + insn.length) if insn.falls_through?
            end
          end

          block_starts.to_a.sort
        end

        # Builds up a set of basic blocks by iterating over the starts of each
        # block. They are keyed by the index of their first instruction.
        def build_basic_blocks
          block_starts = find_basic_block_starts

          length = 0
          blocks =
            iseq
              .insns
              .grep(Instruction)
              .slice_after do |insn|
                length += insn.length
                block_starts.include?(length)
              end

          block_starts
            .zip(blocks)
            .to_h do |block_start, insns|
              # It's possible that we have not detected a block start but still
              # have branching instructions inside of a basic block. This can
              # happen if you have an unconditional jump which is followed by
              # instructions that are unreachable. As of Ruby 3.2, this is
              # possible with something as simple as "1 => a". In this case we
              # can discard all instructions that follow branching instructions.
              block_insns =
                insns.slice_after { |insn| insn.branch_targets.any? }.first

              [block_start, BasicBlock.new(block_start, block_insns)]
            end
        end

        # Connect the blocks by letting them know which blocks are incoming and
        # outgoing from each block.
        def connect_basic_blocks(blocks)
          blocks.each do |block_start, block|
            insn = block.insns.last

            insn.branch_targets.each do |branch_target|
              block.outgoing_blocks << blocks.fetch(labels[branch_target])
            end

            if (insn.branch_targets.empty? && !insn.leaves?) ||
                 insn.falls_through?
              fall_through_start = block_start + block.insns.sum(&:length)
              block.outgoing_blocks << blocks.fetch(fall_through_start)
            end

            block.outgoing_blocks.each do |outgoing_block|
              outgoing_block.incoming_blocks << block
            end
          end
        end

        # If there are blocks that are unreachable, we can remove them from the
        # graph entirely at this point.
        def prune_basic_blocks(blocks)
          visited = Set.new
          queue = [blocks.fetch(0)]

          until queue.empty?
            current_block = queue.shift
            next if visited.include?(current_block)

            visited << current_block
            queue.concat(current_block.outgoing_blocks)
          end

          blocks.select! { |_, block| visited.include?(block) }
        end
      end

      # This is the instruction sequence that this control flow graph
      # corresponds to.
      attr_reader :iseq

      # This is the list of instructions that this control flow graph contains.
      # It is effectively the same as the list of instructions in the
      # instruction sequence but with line numbers and events filtered out.
      attr_reader :insns

      # This is the set of basic blocks that this control-flow graph contains.
      attr_reader :blocks

      def initialize(iseq, insns, blocks)
        @iseq = iseq
        @insns = insns
        @blocks = blocks
      end

      def disasm
        fmt = Disassembler.new(iseq)
        fmt.puts("== cfg: #{iseq.inspect}")

        blocks.each do |block|
          fmt.puts(block.id)
          fmt.with_prefix("    ") do |prefix|
            unless block.incoming_blocks.empty?
              from = block.incoming_blocks.map(&:id)
              fmt.puts("#{prefix}== from: #{from.join(", ")}")
            end

            fmt.format_insns!(block.insns, block.block_start)

            to = block.outgoing_blocks.map(&:id)
            to << "leaves" if block.insns.last.leaves?
            fmt.puts("#{prefix}== to: #{to.join(", ")}")
          end
        end

        fmt.string
      end

      def to_dfg
        DataFlowGraph.compile(self)
      end

      def to_son
        to_dfg.to_son
      end

      def to_mermaid
        Mermaid.flowchart do |flowchart|
          disasm = Disassembler::Squished.new

          blocks.each do |block|
            flowchart.subgraph(block.id) do
              previous = nil

              block.each_with_length do |insn, length|
                node =
                  flowchart.node(
                    "node_#{length}",
                    "%04d %s" % [length, insn.disasm(disasm)]
                  )

                flowchart.link(previous, node) if previous
                previous = node
              end
            end
          end

          blocks.each do |block|
            block.outgoing_blocks.each do |outgoing|
              offset =
                block.block_start + block.insns.sum(&:length) -
                  block.insns.last.length

              from = flowchart.fetch("node_#{offset}")
              to = flowchart.fetch("node_#{outgoing.block_start}")
              flowchart.link(from, to)
            end
          end
        end
      end

      # This method is used to verify that the control flow graph is well
      # formed. It does this by checking that each basic block is itself well
      # formed.
      def verify
        blocks.each(&:verify)
      end

      def self.compile(iseq)
        Compiler.new(iseq).compile
      end
    end
  end
end