//! This is the core implementation that doesn't depend on the hasher at all. //! //! The methods of `IndexMapCore` don't use any Hash properties of K. //! //! It's cleaner to separate them out, then the compiler checks that we are not //! using Hash at all in these methods. //! //! However, we should probably not let this show in the public API or docs. mod entry; pub mod raw_entry_v1; use hashbrown::hash_table; use crate::vec::{self, Vec}; use crate::TryReserveError; use core::mem; use core::ops::RangeBounds; use crate::util::simplify_range; use crate::{Bucket, Equivalent, HashValue}; type Indices = hash_table::HashTable; type Entries = Vec>; pub use entry::{Entry, IndexedEntry, OccupiedEntry, VacantEntry}; /// Core of the map that does not depend on S #[derive(Debug)] pub(crate) struct IndexMapCore { /// indices mapping from the entry hash to its index. indices: Indices, /// entries is a dense vec maintaining entry order. entries: Entries, } /// Mutable references to the parts of an `IndexMapCore`. /// /// When using `HashTable::find_entry`, that takes hold of `&mut indices`, so we have to borrow our /// `&mut entries` separately, and there's no way to go back to a `&mut IndexMapCore`. So this type /// is used to implement methods on the split references, and `IndexMapCore` can also call those to /// avoid duplication. struct RefMut<'a, K, V> { indices: &'a mut Indices, entries: &'a mut Entries, } #[inline(always)] fn get_hash(entries: &[Bucket]) -> impl Fn(&usize) -> u64 + '_ { move |&i| entries[i].hash.get() } #[inline] fn equivalent<'a, K, V, Q: ?Sized + Equivalent>( key: &'a Q, entries: &'a [Bucket], ) -> impl Fn(&usize) -> bool + 'a { move |&i| Q::equivalent(key, &entries[i].key) } #[inline] fn erase_index(table: &mut Indices, hash: HashValue, index: usize) { if let Ok(entry) = table.find_entry(hash.get(), move |&i| i == index) { entry.remove(); } else if cfg!(debug_assertions) { panic!("index not found"); } } #[inline] fn update_index(table: &mut Indices, hash: HashValue, old: usize, new: usize) { let index = table .find_mut(hash.get(), move |&i| i == old) .expect("index not found"); *index = new; } /// Inserts many entries into the indices table without reallocating, /// and without regard for duplication. /// /// ***Panics*** if there is not sufficient capacity already. fn insert_bulk_no_grow(indices: &mut Indices, entries: &[Bucket]) { assert!(indices.capacity() - indices.len() >= entries.len()); for entry in entries { indices.insert_unique(entry.hash.get(), indices.len(), |_| unreachable!()); } } impl Clone for IndexMapCore where K: Clone, V: Clone, { fn clone(&self) -> Self { let mut new = Self::new(); new.clone_from(self); new } fn clone_from(&mut self, other: &Self) { self.indices.clone_from(&other.indices); if self.entries.capacity() < other.entries.len() { // If we must resize, match the indices capacity. let additional = other.entries.len() - self.entries.len(); self.borrow_mut().reserve_entries(additional); } self.entries.clone_from(&other.entries); } } impl crate::Entries for IndexMapCore { type Entry = Bucket; #[inline] fn into_entries(self) -> Vec { self.entries } #[inline] fn as_entries(&self) -> &[Self::Entry] { &self.entries } #[inline] fn as_entries_mut(&mut self) -> &mut [Self::Entry] { &mut self.entries } fn with_entries(&mut self, f: F) where F: FnOnce(&mut [Self::Entry]), { f(&mut self.entries); self.rebuild_hash_table(); } } impl IndexMapCore { /// The maximum capacity before the `entries` allocation would exceed `isize::MAX`. const MAX_ENTRIES_CAPACITY: usize = (isize::MAX as usize) / mem::size_of::>(); #[inline] pub(crate) const fn new() -> Self { IndexMapCore { indices: Indices::new(), entries: Vec::new(), } } #[inline] fn borrow_mut(&mut self) -> RefMut<'_, K, V> { RefMut::new(&mut self.indices, &mut self.entries) } #[inline] pub(crate) fn with_capacity(n: usize) -> Self { IndexMapCore { indices: Indices::with_capacity(n), entries: Vec::with_capacity(n), } } #[inline] pub(crate) fn len(&self) -> usize { self.indices.len() } #[inline] pub(crate) fn capacity(&self) -> usize { Ord::min(self.indices.capacity(), self.entries.capacity()) } pub(crate) fn clear(&mut self) { self.indices.clear(); self.entries.clear(); } pub(crate) fn truncate(&mut self, len: usize) { if len < self.len() { self.erase_indices(len, self.entries.len()); self.entries.truncate(len); } } #[track_caller] pub(crate) fn drain(&mut self, range: R) -> vec::Drain<'_, Bucket> where R: RangeBounds, { let range = simplify_range(range, self.entries.len()); self.erase_indices(range.start, range.end); self.entries.drain(range) } #[cfg(feature = "rayon")] pub(crate) fn par_drain(&mut self, range: R) -> rayon::vec::Drain<'_, Bucket> where K: Send, V: Send, R: RangeBounds, { use rayon::iter::ParallelDrainRange; let range = simplify_range(range, self.entries.len()); self.erase_indices(range.start, range.end); self.entries.par_drain(range) } #[track_caller] pub(crate) fn split_off(&mut self, at: usize) -> Self { let len = self.entries.len(); assert!( at <= len, "index out of bounds: the len is {len} but the index is {at}. Expected index <= len" ); self.erase_indices(at, self.entries.len()); let entries = self.entries.split_off(at); let mut indices = Indices::with_capacity(entries.len()); insert_bulk_no_grow(&mut indices, &entries); Self { indices, entries } } #[track_caller] pub(crate) fn split_splice(&mut self, range: R) -> (Self, vec::IntoIter>) where R: RangeBounds, { let range = simplify_range(range, self.len()); self.erase_indices(range.start, self.entries.len()); let entries = self.entries.split_off(range.end); let drained = self.entries.split_off(range.start); let mut indices = Indices::with_capacity(entries.len()); insert_bulk_no_grow(&mut indices, &entries); (Self { indices, entries }, drained.into_iter()) } /// Append from another map without checking whether items already exist. pub(crate) fn append_unchecked(&mut self, other: &mut Self) { self.reserve(other.len()); insert_bulk_no_grow(&mut self.indices, &other.entries); self.entries.append(&mut other.entries); other.indices.clear(); } /// Reserve capacity for `additional` more key-value pairs. pub(crate) fn reserve(&mut self, additional: usize) { self.indices.reserve(additional, get_hash(&self.entries)); // Only grow entries if necessary, since we also round up capacity. if additional > self.entries.capacity() - self.entries.len() { self.borrow_mut().reserve_entries(additional); } } /// Reserve capacity for `additional` more key-value pairs, without over-allocating. pub(crate) fn reserve_exact(&mut self, additional: usize) { self.indices.reserve(additional, get_hash(&self.entries)); self.entries.reserve_exact(additional); } /// Try to reserve capacity for `additional` more key-value pairs. pub(crate) fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { self.indices .try_reserve(additional, get_hash(&self.entries)) .map_err(TryReserveError::from_hashbrown)?; // Only grow entries if necessary, since we also round up capacity. if additional > self.entries.capacity() - self.entries.len() { self.try_reserve_entries(additional) } else { Ok(()) } } /// Try to reserve entries capacity, rounded up to match the indices fn try_reserve_entries(&mut self, additional: usize) -> Result<(), TryReserveError> { // Use a soft-limit on the maximum capacity, but if the caller explicitly // requested more, do it and let them have the resulting error. let new_capacity = Ord::min(self.indices.capacity(), Self::MAX_ENTRIES_CAPACITY); let try_add = new_capacity - self.entries.len(); if try_add > additional && self.entries.try_reserve_exact(try_add).is_ok() { return Ok(()); } self.entries .try_reserve_exact(additional) .map_err(TryReserveError::from_alloc) } /// Try to reserve capacity for `additional` more key-value pairs, without over-allocating. pub(crate) fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { self.indices .try_reserve(additional, get_hash(&self.entries)) .map_err(TryReserveError::from_hashbrown)?; self.entries .try_reserve_exact(additional) .map_err(TryReserveError::from_alloc) } /// Shrink the capacity of the map with a lower bound pub(crate) fn shrink_to(&mut self, min_capacity: usize) { self.indices .shrink_to(min_capacity, get_hash(&self.entries)); self.entries.shrink_to(min_capacity); } /// Remove the last key-value pair pub(crate) fn pop(&mut self) -> Option<(K, V)> { if let Some(entry) = self.entries.pop() { let last = self.entries.len(); erase_index(&mut self.indices, entry.hash, last); Some((entry.key, entry.value)) } else { None } } /// Return the index in `entries` where an equivalent key can be found pub(crate) fn get_index_of(&self, hash: HashValue, key: &Q) -> Option where Q: ?Sized + Equivalent, { let eq = equivalent(key, &self.entries); self.indices.find(hash.get(), eq).copied() } /// Append a key-value pair to `entries`, /// *without* checking whether it already exists. fn push_entry(&mut self, hash: HashValue, key: K, value: V) { if self.entries.len() == self.entries.capacity() { // Reserve our own capacity synced to the indices, // rather than letting `Vec::push` just double it. self.borrow_mut().reserve_entries(1); } self.entries.push(Bucket { hash, key, value }); } pub(crate) fn insert_full(&mut self, hash: HashValue, key: K, value: V) -> (usize, Option) where K: Eq, { let eq = equivalent(&key, &self.entries); let hasher = get_hash(&self.entries); match self.indices.entry(hash.get(), eq, hasher) { hash_table::Entry::Occupied(entry) => { let i = *entry.get(); (i, Some(mem::replace(&mut self.entries[i].value, value))) } hash_table::Entry::Vacant(entry) => { let i = self.entries.len(); entry.insert(i); self.push_entry(hash, key, value); debug_assert_eq!(self.indices.len(), self.entries.len()); (i, None) } } } /// Same as `insert_full`, except it also replaces the key pub(crate) fn replace_full( &mut self, hash: HashValue, key: K, value: V, ) -> (usize, Option<(K, V)>) where K: Eq, { let eq = equivalent(&key, &self.entries); let hasher = get_hash(&self.entries); match self.indices.entry(hash.get(), eq, hasher) { hash_table::Entry::Occupied(entry) => { let i = *entry.get(); let entry = &mut self.entries[i]; let kv = ( mem::replace(&mut entry.key, key), mem::replace(&mut entry.value, value), ); (i, Some(kv)) } hash_table::Entry::Vacant(entry) => { let i = self.entries.len(); entry.insert(i); self.push_entry(hash, key, value); debug_assert_eq!(self.indices.len(), self.entries.len()); (i, None) } } } /// Remove an entry by shifting all entries that follow it pub(crate) fn shift_remove_full(&mut self, hash: HashValue, key: &Q) -> Option<(usize, K, V)> where Q: ?Sized + Equivalent, { let eq = equivalent(key, &self.entries); match self.indices.find_entry(hash.get(), eq) { Ok(entry) => { let (index, _) = entry.remove(); let (key, value) = self.borrow_mut().shift_remove_finish(index); Some((index, key, value)) } Err(_) => None, } } /// Remove an entry by shifting all entries that follow it #[inline] pub(crate) fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { self.borrow_mut().shift_remove_index(index) } #[inline] #[track_caller] pub(super) fn move_index(&mut self, from: usize, to: usize) { self.borrow_mut().move_index(from, to); } #[inline] #[track_caller] pub(crate) fn swap_indices(&mut self, a: usize, b: usize) { self.borrow_mut().swap_indices(a, b); } /// Remove an entry by swapping it with the last pub(crate) fn swap_remove_full(&mut self, hash: HashValue, key: &Q) -> Option<(usize, K, V)> where Q: ?Sized + Equivalent, { let eq = equivalent(key, &self.entries); match self.indices.find_entry(hash.get(), eq) { Ok(entry) => { let (index, _) = entry.remove(); let (key, value) = self.borrow_mut().swap_remove_finish(index); Some((index, key, value)) } Err(_) => None, } } /// Remove an entry by swapping it with the last #[inline] pub(crate) fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { self.borrow_mut().swap_remove_index(index) } /// Erase `start..end` from `indices`, and shift `end..` indices down to `start..` /// /// All of these items should still be at their original location in `entries`. /// This is used by `drain`, which will let `Vec::drain` do the work on `entries`. fn erase_indices(&mut self, start: usize, end: usize) { let (init, shifted_entries) = self.entries.split_at(end); let (start_entries, erased_entries) = init.split_at(start); let erased = erased_entries.len(); let shifted = shifted_entries.len(); let half_capacity = self.indices.capacity() / 2; // Use a heuristic between different strategies if erased == 0 { // Degenerate case, nothing to do } else if start + shifted < half_capacity && start < erased { // Reinsert everything, as there are few kept indices self.indices.clear(); // Reinsert stable indices, then shifted indices insert_bulk_no_grow(&mut self.indices, start_entries); insert_bulk_no_grow(&mut self.indices, shifted_entries); } else if erased + shifted < half_capacity { // Find each affected index, as there are few to adjust // Find erased indices for (i, entry) in (start..).zip(erased_entries) { erase_index(&mut self.indices, entry.hash, i); } // Find shifted indices for ((new, old), entry) in (start..).zip(end..).zip(shifted_entries) { update_index(&mut self.indices, entry.hash, old, new); } } else { // Sweep the whole table for adjustments let offset = end - start; self.indices.retain(move |i| { if *i >= end { *i -= offset; true } else { *i < start } }); } debug_assert_eq!(self.indices.len(), start + shifted); } pub(crate) fn retain_in_order(&mut self, mut keep: F) where F: FnMut(&mut K, &mut V) -> bool, { self.entries .retain_mut(|entry| keep(&mut entry.key, &mut entry.value)); if self.entries.len() < self.indices.len() { self.rebuild_hash_table(); } } fn rebuild_hash_table(&mut self) { self.indices.clear(); insert_bulk_no_grow(&mut self.indices, &self.entries); } pub(crate) fn reverse(&mut self) { self.entries.reverse(); // No need to save hash indices, can easily calculate what they should // be, given that this is an in-place reversal. let len = self.entries.len(); for i in &mut self.indices { *i = len - *i - 1; } } } /// Reserve entries capacity, rounded up to match the indices (via `try_capacity`). fn reserve_entries(entries: &mut Entries, additional: usize, try_capacity: usize) { // Use a soft-limit on the maximum capacity, but if the caller explicitly // requested more, do it and let them have the resulting panic. let try_capacity = try_capacity.min(IndexMapCore::::MAX_ENTRIES_CAPACITY); let try_add = try_capacity - entries.len(); if try_add > additional && entries.try_reserve_exact(try_add).is_ok() { return; } entries.reserve_exact(additional); } impl<'a, K, V> RefMut<'a, K, V> { #[inline] fn new(indices: &'a mut Indices, entries: &'a mut Entries) -> Self { Self { indices, entries } } /// Reserve entries capacity, rounded up to match the indices #[inline] fn reserve_entries(&mut self, additional: usize) { reserve_entries(self.entries, additional, self.indices.capacity()); } /// Insert a key-value pair in `entries`, /// *without* checking whether it already exists. fn insert_unique(self, hash: HashValue, key: K, value: V) -> OccupiedEntry<'a, K, V> { let i = self.indices.len(); debug_assert_eq!(i, self.entries.len()); let entry = self .indices .insert_unique(hash.get(), i, get_hash(self.entries)); if self.entries.len() == self.entries.capacity() { // We can't call `indices.capacity()` while this `entry` has borrowed it, so we'll have // to amortize growth on our own. It's still an improvement over the basic `Vec::push` // doubling though, since we also consider `MAX_ENTRIES_CAPACITY`. reserve_entries(self.entries, 1, 2 * self.entries.capacity()); } self.entries.push(Bucket { hash, key, value }); OccupiedEntry::new(self.entries, entry) } /// Insert a key-value pair in `entries` at a particular index, /// *without* checking whether it already exists. fn shift_insert_unique(&mut self, index: usize, hash: HashValue, key: K, value: V) { let end = self.indices.len(); assert!(index <= end); // Increment others first so we don't have duplicate indices. self.increment_indices(index, end); let entries = &*self.entries; self.indices.insert_unique(hash.get(), index, move |&i| { // Adjust for the incremented indices to find hashes. debug_assert_ne!(i, index); let i = if i < index { i } else { i - 1 }; entries[i].hash.get() }); if self.entries.len() == self.entries.capacity() { // Reserve our own capacity synced to the indices, // rather than letting `Vec::insert` just double it. self.reserve_entries(1); } self.entries.insert(index, Bucket { hash, key, value }); } /// Remove an entry by shifting all entries that follow it fn shift_remove_index(&mut self, index: usize) -> Option<(K, V)> { match self.entries.get(index) { Some(entry) => { erase_index(self.indices, entry.hash, index); Some(self.shift_remove_finish(index)) } None => None, } } /// Remove an entry by shifting all entries that follow it /// /// The index should already be removed from `self.indices`. fn shift_remove_finish(&mut self, index: usize) -> (K, V) { // Correct indices that point to the entries that followed the removed entry. self.decrement_indices(index + 1, self.entries.len()); // Use Vec::remove to actually remove the entry. let entry = self.entries.remove(index); (entry.key, entry.value) } /// Remove an entry by swapping it with the last fn swap_remove_index(&mut self, index: usize) -> Option<(K, V)> { match self.entries.get(index) { Some(entry) => { erase_index(self.indices, entry.hash, index); Some(self.swap_remove_finish(index)) } None => None, } } /// Finish removing an entry by swapping it with the last /// /// The index should already be removed from `self.indices`. fn swap_remove_finish(&mut self, index: usize) -> (K, V) { // use swap_remove, but then we need to update the index that points // to the other entry that has to move let entry = self.entries.swap_remove(index); // correct index that points to the entry that had to swap places if let Some(entry) = self.entries.get(index) { // was not last element // examine new element in `index` and find it in indices let last = self.entries.len(); update_index(self.indices, entry.hash, last, index); } (entry.key, entry.value) } /// Decrement all indices in the range `start..end`. /// /// The index `start - 1` should not exist in `self.indices`. /// All entries should still be in their original positions. fn decrement_indices(&mut self, start: usize, end: usize) { // Use a heuristic between a full sweep vs. a `find()` for every shifted item. let shifted_entries = &self.entries[start..end]; if shifted_entries.len() > self.indices.capacity() / 2 { // Shift all indices in range. for i in &mut *self.indices { if start <= *i && *i < end { *i -= 1; } } } else { // Find each entry in range to shift its index. for (i, entry) in (start..end).zip(shifted_entries) { update_index(self.indices, entry.hash, i, i - 1); } } } /// Increment all indices in the range `start..end`. /// /// The index `end` should not exist in `self.indices`. /// All entries should still be in their original positions. fn increment_indices(&mut self, start: usize, end: usize) { // Use a heuristic between a full sweep vs. a `find()` for every shifted item. let shifted_entries = &self.entries[start..end]; if shifted_entries.len() > self.indices.capacity() / 2 { // Shift all indices in range. for i in &mut *self.indices { if start <= *i && *i < end { *i += 1; } } } else { // Find each entry in range to shift its index, updated in reverse so // we never have duplicated indices that might have a hash collision. for (i, entry) in (start..end).zip(shifted_entries).rev() { update_index(self.indices, entry.hash, i, i + 1); } } } #[track_caller] fn move_index(&mut self, from: usize, to: usize) { let from_hash = self.entries[from].hash; let _ = self.entries[to]; // explicit bounds check if from != to { // Use a sentinel index so other indices don't collide. update_index(self.indices, from_hash, from, usize::MAX); // Update all other indices and rotate the entry positions. if from < to { self.decrement_indices(from + 1, to + 1); self.entries[from..=to].rotate_left(1); } else if to < from { self.increment_indices(to, from); self.entries[to..=from].rotate_right(1); } // Change the sentinel index to its final position. update_index(self.indices, from_hash, usize::MAX, to); } } #[track_caller] fn swap_indices(&mut self, a: usize, b: usize) { // If they're equal and in-bounds, there's nothing to do. if a == b && a < self.entries.len() { return; } // We'll get a "nice" bounds-check from indexing `entries`, // and then we expect to find it in the table as well. match self.indices.get_many_mut( [self.entries[a].hash.get(), self.entries[b].hash.get()], move |i, &x| if i == 0 { x == a } else { x == b }, ) { [Some(ref_a), Some(ref_b)] => { mem::swap(ref_a, ref_b); self.entries.swap(a, b); } _ => panic!("indices not found"), } } } #[test] fn assert_send_sync() { fn assert_send_sync() {} assert_send_sync::>(); assert_send_sync::>(); assert_send_sync::>(); assert_send_sync::>(); }