/*
 * This file was initially derived from the files
 * `js/src/jit/BacktrackingAllocator.h` and
 * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was
 * originally licensed under the Mozilla Public License 2.0. We
 * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see
 * https://github.com/bytecodealliance/regalloc2/issues/7).
 *
 * Since the initial port, the design has been substantially evolved
 * and optimized.
 */

//! Data structures for backtracking allocator.

use super::liveranges::SpillWeight;
use crate::cfg::{CFGInfo, CFGInfoCtx};
use crate::index::ContainerComparator;
use crate::indexset::IndexSet;
use crate::Vec2;
use crate::{
    define_index, Allocation, Block, Bump, Edit, Function, FxHashMap, FxHashSet, MachineEnv,
    Operand, Output, PReg, ProgPoint, RegClass, VReg,
};
use alloc::collections::BTreeMap;
use alloc::collections::VecDeque;
use alloc::string::String;
use alloc::vec::Vec;
use core::cmp::Ordering;
use core::fmt::Debug;
use core::ops::{Deref, DerefMut};
use smallvec::SmallVec;

/// A range from `from` (inclusive) to `to` (exclusive).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct CodeRange {
    pub from: ProgPoint,
    pub to: ProgPoint,
}

impl CodeRange {
    #[inline(always)]
    pub fn is_empty(&self) -> bool {
        self.from >= self.to
    }
    #[inline(always)]
    pub fn contains(&self, other: &Self) -> bool {
        other.from >= self.from && other.to <= self.to
    }
    #[inline(always)]
    pub fn contains_point(&self, other: ProgPoint) -> bool {
        other >= self.from && other < self.to
    }
    #[inline(always)]
    pub fn overlaps(&self, other: &Self) -> bool {
        other.to > self.from && other.from < self.to
    }
    #[inline(always)]
    pub fn len(&self) -> usize {
        self.to.inst().index() - self.from.inst().index()
    }
    /// Returns the range covering just one program point.
    #[inline(always)]
    pub fn singleton(pos: ProgPoint) -> CodeRange {
        CodeRange {
            from: pos,
            to: pos.next(),
        }
    }

    /// Join two [CodeRange] values together, producing a [CodeRange] that includes both.
    #[inline(always)]
    pub fn join(&self, other: CodeRange) -> Self {
        CodeRange {
            from: self.from.min(other.from),
            to: self.to.max(other.to),
        }
    }
}

impl core::cmp::PartialOrd for CodeRange {
    #[inline(always)]
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}
impl core::cmp::Ord for CodeRange {
    #[inline(always)]
    fn cmp(&self, other: &Self) -> Ordering {
        if self.to <= other.from {
            Ordering::Less
        } else if self.from >= other.to {
            Ordering::Greater
        } else {
            Ordering::Equal
        }
    }
}

define_index!(LiveBundleIndex, LiveBundles, LiveBundle);
define_index!(LiveRangeIndex, LiveRanges, LiveRange);
define_index!(SpillSetIndex, SpillSets, SpillSet);
define_index!(UseIndex);
define_index!(VRegIndex, VRegs, VRegData);
define_index!(PRegIndex);
define_index!(SpillSlotIndex);

/// Used to carry small sets of bundles, e.g. for conflict sets.
pub type LiveBundleVec = Vec<LiveBundleIndex>;

#[derive(Clone, Copy, Debug)]
pub struct LiveRangeListEntry {
    pub range: CodeRange,
    pub index: LiveRangeIndex,
}

pub type LiveRangeList = Vec2<LiveRangeListEntry, Bump>;
pub type UseList = Vec2<Use, Bump>;

#[derive(Clone, Debug)]
pub struct LiveRange {
    pub range: CodeRange,

    pub vreg: VRegIndex,
    pub bundle: LiveBundleIndex,
    pub uses_spill_weight_and_flags: u32,
    pub(crate) uses: UseList,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(u32)]
pub enum LiveRangeFlag {
    StartsAtDef = 1,
}

impl LiveRange {
    #[inline(always)]
    pub fn set_flag(&mut self, flag: LiveRangeFlag) {
        self.uses_spill_weight_and_flags |= (flag as u32) << 29;
    }
    #[inline(always)]
    pub fn clear_flag(&mut self, flag: LiveRangeFlag) {
        self.uses_spill_weight_and_flags &= !((flag as u32) << 29);
    }
    #[inline(always)]
    pub fn assign_flag(&mut self, flag: LiveRangeFlag, val: bool) {
        let bit = if val { (flag as u32) << 29 } else { 0 };
        self.uses_spill_weight_and_flags &= 0xe000_0000;
        self.uses_spill_weight_and_flags |= bit;
    }
    #[inline(always)]
    pub fn has_flag(&self, flag: LiveRangeFlag) -> bool {
        self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0
    }
    #[inline(always)]
    pub fn flag_word(&self) -> u32 {
        self.uses_spill_weight_and_flags & 0xe000_0000
    }
    #[inline(always)]
    pub fn merge_flags(&mut self, flag_word: u32) {
        self.uses_spill_weight_and_flags |= flag_word;
    }
    #[inline(always)]
    pub fn uses_spill_weight(&self) -> SpillWeight {
        // NOTE: the spill weight is technically stored in 29 bits, but we ignore the sign bit as
        // we will always be dealing with positive values. Thus we mask out the top 3 bits to
        // ensure that the sign bit is clear, then shift left by only two.
        let bits = (self.uses_spill_weight_and_flags & 0x1fff_ffff) << 2;
        SpillWeight::from_f32(f32::from_bits(bits))
    }
    #[inline(always)]
    pub fn set_uses_spill_weight(&mut self, weight: SpillWeight) {
        let weight_bits = (weight.to_f32().to_bits() >> 2) & 0x1fff_ffff;
        self.uses_spill_weight_and_flags =
            (self.uses_spill_weight_and_flags & 0xe000_0000) | weight_bits;
    }
}

#[derive(Clone, Copy, Debug)]
pub struct Use {
    pub operand: Operand,
    pub pos: ProgPoint,
    pub slot: u8,
    pub weight: u16,
}

impl Use {
    #[inline(always)]
    pub fn new(operand: Operand, pos: ProgPoint, slot: u8) -> Self {
        Self {
            operand,
            pos,
            slot,
            // Weight is updated on insertion into LR.
            weight: 0,
        }
    }
}

#[derive(Clone, Debug)]
pub struct LiveBundle {
    pub(crate) ranges: LiveRangeList,
    pub spillset: SpillSetIndex,
    pub allocation: Allocation,
    pub prio: u32, // recomputed after every bulk update
    pub spill_weight_and_props: u32,
}

pub const BUNDLE_MAX_SPILL_WEIGHT: u32 = (1 << 29) - 1;
pub const MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT: u32 = BUNDLE_MAX_SPILL_WEIGHT;
pub const MINIMAL_BUNDLE_SPILL_WEIGHT: u32 = BUNDLE_MAX_SPILL_WEIGHT - 1;
pub const BUNDLE_MAX_NORMAL_SPILL_WEIGHT: u32 = BUNDLE_MAX_SPILL_WEIGHT - 2;

impl LiveBundle {
    #[inline(always)]
    pub fn set_cached_spill_weight_and_props(
        &mut self,
        spill_weight: u32,
        minimal: bool,
        fixed: bool,
        fixed_def: bool,
    ) {
        debug_assert!(spill_weight <= BUNDLE_MAX_SPILL_WEIGHT);
        self.spill_weight_and_props = spill_weight
            | (if minimal { 1 << 31 } else { 0 })
            | (if fixed { 1 << 30 } else { 0 })
            | (if fixed_def { 1 << 29 } else { 0 });
    }

    #[inline(always)]
    pub fn cached_minimal(&self) -> bool {
        self.spill_weight_and_props & (1 << 31) != 0
    }

    #[inline(always)]
    pub fn cached_fixed(&self) -> bool {
        self.spill_weight_and_props & (1 << 30) != 0
    }

    #[inline(always)]
    pub fn cached_fixed_def(&self) -> bool {
        self.spill_weight_and_props & (1 << 29) != 0
    }

    #[inline(always)]
    pub fn set_cached_fixed(&mut self) {
        self.spill_weight_and_props |= 1 << 30;
    }

    #[inline(always)]
    pub fn set_cached_fixed_def(&mut self) {
        self.spill_weight_and_props |= 1 << 29;
    }

    #[inline(always)]
    pub fn cached_spill_weight(&self) -> u32 {
        self.spill_weight_and_props & BUNDLE_MAX_SPILL_WEIGHT
    }
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct BundleProperties {
    pub minimal: bool,
    pub fixed: bool,
}

/// Calculate the maximum `N` inline capacity for a `SmallVec<[T; N]>` we can
/// have without bloating its size to be larger than a `Vec<T>`.
const fn no_bloat_capacity<T>() -> usize {
    // `Vec<T>` is three words: `(pointer, capacity, length)`.
    //
    // A `SmallVec<[T; N]>` replaces the first two members with the following:
    //
    //     union {
    //         Inline([T; N]),
    //         Heap(pointer, capacity),
    //     }
    //
    // So if `size_of([T; N]) == size_of(pointer) + size_of(capacity)` then we
    // get the maximum inline capacity without bloat.
    core::mem::size_of::<usize>() * 2 / core::mem::size_of::<T>()
}

#[derive(Clone, Debug)]
pub struct SpillSet {
    pub slot: SpillSlotIndex,
    pub reg_hint: PReg,
    pub class: RegClass,
    pub spill_bundle: LiveBundleIndex,
    pub required: bool,
    pub splits: u8,

    /// The aggregate [`CodeRange`] of all involved [`LiveRange`]s. The effect of this abstraction
    /// is that we attempt to allocate one spill slot for the extent of a bundle. For fragmented
    /// bundles with lots of open space this abstraction is pessimistic, but when bundles are small
    /// or dense this yields similar results to tracking individual live ranges.
    pub range: CodeRange,
}

pub(crate) const MAX_SPLITS_PER_SPILLSET: u8 = 2;

#[derive(Clone, Debug)]
pub struct VRegData {
    pub(crate) ranges: LiveRangeList,
    pub blockparam: Block,
    // We don't initially know the RegClass until we observe a use of the VReg.
    pub class: Option<RegClass>,
}

#[derive(Clone, Debug)]
pub struct PRegData {
    pub allocations: LiveRangeSet,
    pub is_stack: bool,
}

#[derive(Clone, Debug)]
pub struct MultiFixedRegFixup {
    pub pos: ProgPoint,
    pub from_slot: u8,
    pub to_slot: u8,
    pub level: FixedRegFixupLevel,
    pub to_preg: PRegIndex,
    pub vreg: VRegIndex,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum FixedRegFixupLevel {
    /// A fixup copy for the initial fixed reg; must come first.
    Initial,
    /// A fixup copy from the first fixed reg to other fixed regs for
    /// the same vreg; must come second.
    Secondary,
}

/// The field order is significant: these are sorted so that a
/// scan over vregs, then blocks in each range, can scan in
/// order through this (sorted) list and add allocs to the
/// half-move list.
///
/// The fields in this struct are reversed in sort order so that the entire
/// struct can be treated as a u128 for sorting purposes.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(C)]
pub struct BlockparamOut {
    pub to_vreg: VRegIndex,
    pub to_block: Block,
    pub from_block: Block,
    pub from_vreg: VRegIndex,
}
impl BlockparamOut {
    #[inline(always)]
    pub fn key(&self) -> u128 {
        u128_key(
            self.from_vreg.raw_u32(),
            self.from_block.raw_u32(),
            self.to_block.raw_u32(),
            self.to_vreg.raw_u32(),
        )
    }
}

/// As above for `BlockparamIn`, field order is significant.
///
/// The fields in this struct are reversed in sort order so that the entire
/// struct can be treated as a u128 for sorting purposes.
#[derive(Clone, Debug)]
#[repr(C)]
pub struct BlockparamIn {
    pub from_block: Block,
    pub to_block: Block,
    pub to_vreg: VRegIndex,
}
impl BlockparamIn {
    #[inline(always)]
    pub fn key(&self) -> u128 {
        u128_key(
            self.to_vreg.raw_u32(),
            self.to_block.raw_u32(),
            self.from_block.raw_u32(),
            0,
        )
    }
}

impl LiveRanges {
    pub(crate) fn add(&mut self, range: CodeRange, bump: Bump) -> LiveRangeIndex {
        self.push(LiveRange {
            range,
            vreg: VRegIndex::invalid(),
            bundle: LiveBundleIndex::invalid(),
            uses_spill_weight_and_flags: 0,
            uses: UseList::new_in(bump),
        })
    }
}

impl LiveBundles {
    pub(crate) fn add(&mut self, bump: Bump) -> LiveBundleIndex {
        self.push(LiveBundle {
            allocation: Allocation::none(),
            ranges: LiveRangeList::new_in(bump),
            spillset: SpillSetIndex::invalid(),
            prio: 0,
            spill_weight_and_props: 0,
        })
    }
}

impl VRegs {
    pub fn add(&mut self, reg: VReg, data: VRegData) -> VRegIndex {
        let idx = self.push(data);
        debug_assert_eq!(reg.vreg(), idx.index());
        idx
    }
}

impl core::ops::Index<VReg> for VRegs {
    type Output = VRegData;

    #[inline(always)]
    fn index(&self, idx: VReg) -> &Self::Output {
        &self.storage[idx.vreg()]
    }
}

impl core::ops::IndexMut<VReg> for VRegs {
    #[inline(always)]
    fn index_mut(&mut self, idx: VReg) -> &mut Self::Output {
        &mut self.storage[idx.vreg()]
    }
}

#[derive(Default)]
pub struct Ctx {
    pub(crate) cfginfo: CFGInfo,
    pub(crate) cfginfo_ctx: CFGInfoCtx,
    pub(crate) liveins: Vec<IndexSet>,
    pub(crate) liveouts: Vec<IndexSet>,
    pub(crate) blockparam_outs: Vec<BlockparamOut>,
    pub(crate) blockparam_ins: Vec<BlockparamIn>,

    pub(crate) ranges: LiveRanges,
    pub(crate) bundles: LiveBundles,
    pub(crate) spillsets: SpillSets,
    pub(crate) vregs: VRegs,
    pub(crate) pregs: Vec<PRegData>,
    pub(crate) allocation_queue: PrioQueue,

    pub(crate) spilled_bundles: Vec<LiveBundleIndex>,
    pub(crate) spillslots: Vec<SpillSlotData>,
    pub(crate) slots_by_class: [SpillSlotList; 3],

    pub(crate) extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 3],
    pub(crate) preferred_victim_by_class: [PReg; 3],

    // When multiple fixed-register constraints are present on a
    // single VReg at a single program point (this can happen for,
    // e.g., call args that use the same value multiple times), we
    // remove all but one of the fixed-register constraints, make a
    // note here, and add a clobber with that PReg instread to keep
    // the register available. When we produce the final edit-list, we
    // will insert a copy from wherever the VReg's primary allocation
    // was to the approprate PReg.
    pub(crate) multi_fixed_reg_fixups: Vec<MultiFixedRegFixup>,

    pub(crate) allocated_bundle_count: usize,

    // For debug output only: a list of textual annotations at every
    // ProgPoint to insert into the final allocated program listing.
    pub(crate) debug_annotations: FxHashMap<ProgPoint, Vec<String>>,
    pub(crate) annotations_enabled: bool,

    // Cached allocation for `try_to_allocate_bundle_to_reg` to avoid allocating
    // a new HashSet on every call.
    pub(crate) conflict_set: FxHashSet<LiveBundleIndex>,

    // Output:
    pub output: Output,

    pub(crate) scratch_conflicts: LiveBundleVec,
    pub(crate) scratch_bundle: LiveBundleVec,
    pub(crate) scratch_vreg_ranges: Vec<LiveRangeIndex>,
    pub(crate) scratch_spillset_pool: Vec<SpillSetRanges>,

    pub(crate) scratch_workqueue: VecDeque<Block>,

    pub(crate) scratch_operand_rewrites: FxHashMap<usize, Operand>,
    pub(crate) scratch_removed_lrs: FxHashSet<LiveRangeIndex>,
    pub(crate) scratch_removed_lrs_vregs: FxHashSet<VRegIndex>,
    pub(crate) scratch_workqueue_set: FxHashSet<Block>,

    pub(crate) scratch_bump: Bump,
}

impl Ctx {
    pub(crate) fn bump(&self) -> Bump {
        self.scratch_bump.clone()
    }
}

pub struct Env<'a, F: Function> {
    pub func: &'a F,
    pub env: &'a MachineEnv,
    pub ctx: &'a mut Ctx,
}

impl<'a, F: Function> Deref for Env<'a, F> {
    type Target = Ctx;

    fn deref(&self) -> &Self::Target {
        self.ctx
    }
}

impl<'a, F: Function> DerefMut for Env<'a, F> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        self.ctx
    }
}

impl<'a, F: Function> Env<'a, F> {
    /// Get the VReg (with bundled RegClass) from a vreg index.
    #[inline]
    pub fn vreg(&self, index: VRegIndex) -> VReg {
        let class = self.vregs[index]
            .class
            .expect("trying to get a VReg before observing its class");
        VReg::new(index.index(), class)
    }

    /// Record the class of a VReg. We learn this only when we observe
    /// the VRegs in use.
    pub fn observe_vreg_class(&mut self, vreg: VReg) {
        let old_class = self.vregs[vreg].class.replace(vreg.class());
        // We should never observe two different classes for two
        // mentions of a VReg in the source program.
        debug_assert!(old_class == None || old_class == Some(vreg.class()));
    }

    /// Is this vreg actually used in the source program?
    pub fn is_vreg_used(&self, index: VRegIndex) -> bool {
        self.vregs[index].class.is_some()
    }
}

#[derive(Clone, Debug, Default)]
pub struct SpillSetRanges {
    pub btree: BTreeMap<LiveRangeKey, SpillSetIndex>,
}

#[derive(Clone, Debug)]
pub struct SpillSlotData {
    pub ranges: SpillSetRanges,
    pub slots: u32,
    pub alloc: Allocation,
}

#[derive(Clone, Debug, Default)]
pub struct SpillSlotList {
    pub slots: SmallVec<[SpillSlotIndex; 32]>,
    pub probe_start: usize,
}

impl SpillSlotList {
    /// Get the next spillslot index in probing order, wrapping around
    /// at the end of the slots list.
    pub(crate) fn next_index(&self, index: usize) -> usize {
        debug_assert!(index < self.slots.len());
        if index == self.slots.len() - 1 {
            0
        } else {
            index + 1
        }
    }
}

#[derive(Clone, Debug, Default)]
pub struct PrioQueue {
    pub heap: alloc::collections::BinaryHeap<PrioQueueEntry>,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct PrioQueueEntry {
    pub prio: u32,
    pub bundle: LiveBundleIndex,
    pub reg_hint: PReg,
}

#[derive(Clone, Debug)]
pub struct LiveRangeSet {
    pub btree: BTreeMap<LiveRangeKey, LiveRangeIndex>,
}

#[derive(Clone, Copy, Debug)]
pub struct LiveRangeKey {
    pub from: u32,
    pub to: u32,
}

impl LiveRangeKey {
    #[inline(always)]
    pub fn from_range(range: &CodeRange) -> Self {
        Self {
            from: range.from.to_index(),
            to: range.to.to_index(),
        }
    }

    #[inline(always)]
    pub fn to_range(&self) -> CodeRange {
        CodeRange {
            from: ProgPoint::from_index(self.from),
            to: ProgPoint::from_index(self.to),
        }
    }
}

impl core::cmp::PartialEq for LiveRangeKey {
    #[inline(always)]
    fn eq(&self, other: &Self) -> bool {
        self.to > other.from && self.from < other.to
    }
}
impl core::cmp::Eq for LiveRangeKey {}
impl core::cmp::PartialOrd for LiveRangeKey {
    #[inline(always)]
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
        Some(self.cmp(other))
    }
}
impl core::cmp::Ord for LiveRangeKey {
    #[inline(always)]
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
        if self.to <= other.from {
            core::cmp::Ordering::Less
        } else if self.from >= other.to {
            core::cmp::Ordering::Greater
        } else {
            core::cmp::Ordering::Equal
        }
    }
}

pub struct PrioQueueComparator<'a> {
    pub prios: &'a [usize],
}
impl<'a> ContainerComparator for PrioQueueComparator<'a> {
    type Ix = LiveBundleIndex;
    fn compare(&self, a: Self::Ix, b: Self::Ix) -> core::cmp::Ordering {
        self.prios[a.index()].cmp(&self.prios[b.index()])
    }
}

impl PrioQueue {
    #[inline(always)]
    pub fn insert(&mut self, bundle: LiveBundleIndex, prio: usize, reg_hint: PReg) {
        self.heap.push(PrioQueueEntry {
            prio: prio as u32,
            bundle,
            reg_hint,
        });
    }

    #[inline(always)]
    pub fn is_empty(self) -> bool {
        self.heap.is_empty()
    }

    #[inline(always)]
    pub fn pop(&mut self) -> Option<(LiveBundleIndex, PReg)> {
        self.heap.pop().map(|entry| (entry.bundle, entry.reg_hint))
    }
}

impl LiveRangeSet {
    pub(crate) fn new() -> Self {
        Self {
            btree: BTreeMap::default(),
        }
    }
}

#[derive(Clone, Debug)]
pub struct InsertedMove {
    pub pos_prio: PosWithPrio,
    pub from_alloc: Allocation,
    pub to_alloc: Allocation,
    pub to_vreg: VReg,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum InsertMovePrio {
    InEdgeMoves,
    Regular,
    MultiFixedRegInitial,
    MultiFixedRegSecondary,
    ReusedInput,
    OutEdgeMoves,
}

#[derive(Debug, Default)]
pub struct InsertedMoves {
    pub moves: Vec<InsertedMove>,
}

impl InsertedMoves {
    pub fn push(
        &mut self,
        pos: ProgPoint,
        prio: InsertMovePrio,
        from_alloc: Allocation,
        to_alloc: Allocation,
        to_vreg: VReg,
    ) {
        trace!(
            "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?} to_vreg {:?}",
            pos,
            prio,
            from_alloc,
            to_alloc,
            to_vreg
        );
        if from_alloc == to_alloc {
            trace!(" -> skipping move with same source and  dest");
            return;
        }
        if let Some(from) = from_alloc.as_reg() {
            debug_assert_eq!(from.class(), to_vreg.class());
        }
        if let Some(to) = to_alloc.as_reg() {
            debug_assert_eq!(to.class(), to_vreg.class());
        }
        self.moves.push(InsertedMove {
            pos_prio: PosWithPrio {
                pos,
                prio: prio as u32,
            },
            from_alloc,
            to_alloc,
            to_vreg,
        });
    }
}

#[derive(Clone, Debug, Default)]
pub struct Edits {
    edits: Vec<(PosWithPrio, Edit)>,
}

impl Edits {
    #[inline(always)]
    pub fn with_capacity(n: usize) -> Self {
        Self {
            edits: Vec::with_capacity(n),
        }
    }

    #[inline(always)]
    pub fn len(&self) -> usize {
        self.edits.len()
    }

    #[inline(always)]
    pub fn iter(&self) -> impl Iterator<Item = &(PosWithPrio, Edit)> {
        self.edits.iter()
    }

    #[inline(always)]
    pub fn drain_edits(&mut self) -> impl Iterator<Item = (ProgPoint, Edit)> + '_ {
        self.edits.drain(..).map(|(pos, edit)| (pos.pos, edit))
    }

    /// Sort edits by the combination of their program position and priority. This is a stable sort
    /// to preserve the order of the moves the parallel move resolver inserts.
    #[inline(always)]
    pub fn sort(&mut self) {
        self.edits.sort_by_key(|&(pos_prio, _)| pos_prio.key());
    }

    pub fn add(&mut self, pos_prio: PosWithPrio, from: Allocation, to: Allocation) {
        if from != to {
            if from.is_reg() && to.is_reg() {
                debug_assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class());
            }
            self.edits.push((pos_prio, Edit::Move { from, to }));
        }
    }
}

/// The fields in this struct are reversed in sort order so that the entire
/// struct can be treated as a u64 for sorting purposes.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(C)]
pub struct PosWithPrio {
    pub prio: u32,
    pub pos: ProgPoint,
}

impl PosWithPrio {
    #[inline]
    pub fn key(self) -> u64 {
        u64_key(self.pos.to_index(), self.prio)
    }
}

#[derive(Clone, Copy, Debug, Default)]
#[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Stats {
    pub livein_blocks: usize,
    pub livein_iterations: usize,
    pub initial_liverange_count: usize,
    pub merged_bundle_count: usize,
    pub process_bundle_count: usize,
    pub process_bundle_reg_probes_fixed: usize,
    pub process_bundle_reg_success_fixed: usize,
    pub process_bundle_bounding_range_probe_start_any: usize,
    pub process_bundle_bounding_range_probes_any: usize,
    pub process_bundle_bounding_range_success_any: usize,
    pub process_bundle_reg_probe_start_any: usize,
    pub process_bundle_reg_probes_any: usize,
    pub process_bundle_reg_success_any: usize,
    pub evict_bundle_event: usize,
    pub evict_bundle_count: usize,
    pub splits: usize,
    pub splits_clobbers: usize,
    pub splits_hot: usize,
    pub splits_conflicts: usize,
    pub splits_defs: usize,
    pub splits_all: usize,
    pub final_liverange_count: usize,
    pub final_bundle_count: usize,
    pub spill_bundle_count: usize,
    pub spill_bundle_reg_probes: usize,
    pub spill_bundle_reg_success: usize,
    pub blockparam_ins_count: usize,
    pub blockparam_outs_count: usize,
    pub halfmoves_count: usize,
    pub edits_count: usize,
}

// Helper function for generating sorting keys. The order of arguments is from
// the most significant field to the least significant one.
//
// These work best when the fields are stored in reverse order in memory so that
// they can be loaded with a single u64 load on little-endian machines.
#[inline(always)]
pub fn u64_key(b: u32, a: u32) -> u64 {
    a as u64 | (b as u64) << 32
}
#[inline(always)]
pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 {
    a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96
}