//! A hashmap with "external hashing": nodes are hashed or compared for //! equality only with some external context provided on lookup/insert. //! This allows very memory-efficient data structures where //! node-internal data references some other storage (e.g., offsets into //! an array or pool of shared data). use hashbrown::raw::RawTable; use std::hash::{Hash, Hasher}; /// Trait that allows for equality comparison given some external /// context. /// /// Note that this trait is implemented by the *context*, rather than /// the item type, for somewhat complex lifetime reasons (lack of GATs /// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on /// the value type). pub trait CtxEq { /// Determine whether `a` and `b` are equal, given the context in /// `self` and the union-find data structure `uf`. fn ctx_eq(&self, a: &V1, b: &V2) -> bool; } /// Trait that allows for hashing given some external context. pub trait CtxHash: CtxEq { /// Compute the hash of `value`, given the context in `self` and /// the union-find data structure `uf`. fn ctx_hash(&self, state: &mut H, value: &Value); } /// A null-comparator context type for underlying value types that /// already have `Eq` and `Hash`. #[derive(Default)] pub struct NullCtx; impl CtxEq for NullCtx { fn ctx_eq(&self, a: &V, b: &V) -> bool { a.eq(b) } } impl CtxHash for NullCtx { fn ctx_hash(&self, state: &mut H, value: &V) { value.hash(state); } } /// A bucket in the hash table. /// /// Some performance-related design notes: we cache the hashcode for /// speed, as this often buys a few percent speed in /// interning-table-heavy workloads. We only keep the low 32 bits of /// the hashcode, for memory efficiency: in common use, `K` and `V` /// are often 32 bits also, and a 12-byte bucket is measurably better /// than a 16-byte bucket. struct BucketData { hash: u32, k: K, v: V, } /// A HashMap that takes external context for all operations. pub struct CtxHashMap { raw: RawTable>, } impl CtxHashMap { /// Create an empty hashmap with pre-allocated space for the given /// capacity. pub fn with_capacity(capacity: usize) -> Self { Self { raw: RawTable::with_capacity(capacity), } } } fn compute_hash(ctx: &Ctx, k: &K) -> u32 where Ctx: CtxHash, { let mut hasher = rustc_hash::FxHasher::default(); ctx.ctx_hash(&mut hasher, k); hasher.finish() as u32 } impl CtxHashMap { /// Insert a new key-value pair, returning the old value associated /// with this key (if any). pub fn insert(&mut self, k: K, v: V, ctx: &Ctx) -> Option where Ctx: CtxEq + CtxHash, { let hash = compute_hash(ctx, &k); match self.raw.find(hash as u64, |bucket| { hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k) }) { Some(bucket) => { let data = unsafe { bucket.as_mut() }; Some(std::mem::replace(&mut data.v, v)) } None => { let data = BucketData { hash, k, v }; self.raw .insert_entry(hash as u64, data, |bucket| bucket.hash as u64); None } } } /// Look up a key, returning a borrow of the value if present. pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V> where Ctx: CtxEq + CtxHash + CtxHash, { let hash = compute_hash(ctx, k); self.raw .find(hash as u64, |bucket| { hash == bucket.hash && ctx.ctx_eq(&bucket.k, k) }) .map(|bucket| { let data = unsafe { bucket.as_ref() }; &data.v }) } } #[cfg(test)] mod test { use super::*; #[derive(Clone, Copy, Debug)] struct Key { index: u32, } struct Ctx { vals: &'static [&'static str], } impl CtxEq for Ctx { fn ctx_eq(&self, a: &Key, b: &Key) -> bool { self.vals[a.index as usize].eq(self.vals[b.index as usize]) } } impl CtxHash for Ctx { fn ctx_hash(&self, state: &mut H, value: &Key) { self.vals[value.index as usize].hash(state); } } #[test] fn test_basic() { let ctx = Ctx { vals: &["a", "b", "a"], }; let k0 = Key { index: 0 }; let k1 = Key { index: 1 }; let k2 = Key { index: 2 }; assert!(ctx.ctx_eq(&k0, &k2)); assert!(!ctx.ctx_eq(&k0, &k1)); assert!(!ctx.ctx_eq(&k2, &k1)); let mut map: CtxHashMap = CtxHashMap::with_capacity(4); assert_eq!(map.insert(k0, 42, &ctx), None); assert_eq!(map.insert(k2, 84, &ctx), Some(42)); assert_eq!(map.get(&k1, &ctx), None); assert_eq!(*map.get(&k0, &ctx).unwrap(), 84); } }