Replace the raw module with just hash_map

[anymap] / src / lib.rs
diff --git a/src/lib.rs b/src/lib.rs

index d41c335d1b73fe822f442ac71bff0317856b6b51..beef1bd3b67055a788fe7e2b6a286b8577fe0631 100644 (file)
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,16 +1,52 @@
-//! This crate provides the `AnyMap` type, a safe and convenient store for one value of each type.
+//! This crate provides a safe and convenient store for one value of each type.
+//!
+//! Your starting point is [`Map`]. It has an example.
  
-#![cfg_attr(all(feature = "bench", test), feature(test))]
  #![warn(missing_docs, unused_results)]
  
-#[cfg(all(feature = "bench", test))]
-extern crate test;
+#![cfg_attr(not(feature = "std"), no_std)]
  
-use std::any::TypeId;
-use std::marker::PhantomData;
+use core::any::{Any, TypeId};
+use core::convert::TryInto;
+use core::hash::{Hasher, BuildHasherDefault};
+use core::marker::PhantomData;
  
-use raw::RawMap;
-use any::{UncheckedAnyExt, IntoBox, Any};
+#[cfg(not(any(feature = "std", feature = "hashbrown")))]
+compile_error!("anymap: you must enable the 'std' feature or the 'hashbrown' feature");
+
+#[cfg(not(feature = "std"))]
+extern crate alloc;
+
+#[cfg(not(feature = "std"))]
+use alloc::boxed::Box;
+
+use any::{UncheckedAnyExt, IntoBox};
+pub use any::CloneAny;
+
+#[cfg(all(feature = "std", not(feature = "hashbrown")))]
+/// A re-export of [`std::collections::hash_map`] for raw access.
+///
+/// If the `hashbrown` feature gets enabled, this will become an export of `hashbrown::hash_map`.
+///
+/// As with [`RawMap`][crate::RawMap], this is exposed for compatibility reasons, since features
+/// are supposed to be additive. This *is* imperfect, since the two modules are incompatible in a
+/// few places (e.g. hashbrown’s entry types have an extra generic parameter), but it’s close, and
+/// much too useful to give up the whole concept.
+pub use std::collections::hash_map as raw_hash_map;
+
+#[cfg(feature = "hashbrown")]
+/// A re-export of [`hashbrown::hash_map`] for raw access.
+///
+/// If the `hashbrown` feature was disabled, this would become an export of
+/// `std::collections::hash_map`.
+///
+/// As with [`RawMap`][crate::RawMap], this is exposed for compatibility reasons, since features
+/// are supposed to be additive. This *is* imperfect, since the two modules are incompatible in a
+/// few places (e.g. hashbrown’s entry types have an extra generic parameter), but it’s close, and
+/// much too useful to give up the whole concept.
+pub use hashbrown::hash_map as raw_hash_map;
+
+use self::raw_hash_map::HashMap;
  
  macro_rules! impl_common_methods {
      (
@@ -61,6 +97,10 @@ macro_rules! impl_common_methods {
                  self.$field.shrink_to_fit()
              }
  
+            // Additional stable methods (as of 1.60.0-nightly) that could be added:
+            // try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError>    (1.57.0)
+            // shrink_to(&mut self, min_capacity: usize)                                   (1.56.0)
+
              /// Returns the number of items in the collection.
              #[inline]
              pub fn len(&self) -> usize {
@@ -79,24 +119,56 @@ macro_rules! impl_common_methods {
                  self.$field.clear()
              }
          }
+
+        impl<A: ?Sized + UncheckedAnyExt> Default for $t<A> {
+            #[inline]
+            fn default() -> $t<A> {
+                $t::new()
+            }
+        }
      }
  }
  
-pub mod any;
-pub mod raw;
+mod any;
+
+/// Raw access to the underlying `HashMap`.
+///
+/// This is a public type alias because the underlying `HashMap` could be
+/// `std::collections::HashMap` or `hashbrown::HashMap`, depending on the crate features enabled.
+/// For that reason, you should refer to this type as `anymap::RawMap` rather than
+/// `std::collections::HashMap` to avoid breakage if something else in your crate tree enables
+/// hashbrown.
+///
+/// See also [`raw_hash_map`], an export of the corresponding `hash_map` module.
+pub type RawMap<A> = HashMap<TypeId, Box<A>, BuildHasherDefault<TypeIdHasher>>;
  
  /// A collection containing zero or one values for any given type and allowing convenient,
  /// type-safe access to those values.
  ///
  /// The type parameter `A` allows you to use a different value type; normally you will want it to
-/// be `anymap::any::Any`, but there are other choices:
+/// be `core::any::Any` (also known as `std::any::Any`), but there are other choices:
+///
+/// - If you want the entire map to be cloneable, use `CloneAny` instead of `Any`; with that, you
+///   can only add types that implement `Clone` to the map.
+/// - You can add on `+ Send` or `+ Send + Sync` (e.g. `Map<dyn Any + Send>`) to add those auto
+///   traits.
+///
+/// Cumulatively, there are thus six forms of map:
+///
+/// - <code>[Map]&lt;dyn [core::any::Any]&gt;</code>, also spelled [`AnyMap`] for convenience.
+/// - <code>[Map]&lt;dyn [core::any::Any] + Send&gt;</code>
+/// - <code>[Map]&lt;dyn [core::any::Any] + Send + Sync&gt;</code>
+/// - <code>[Map]&lt;dyn [CloneAny]&gt;</code>
+/// - <code>[Map]&lt;dyn [CloneAny] + Send&gt;</code>
+/// - <code>[Map]&lt;dyn [CloneAny] + Send + Sync&gt;</code>
+///
+/// ## Example
  ///
-/// - If you want the entire map to be cloneable, use `CloneAny` instead of `Any`.
-/// - You can add on `+ Send` and/or `+ Sync` (e.g. `Map<Any + Send>`) to add those bounds.
+/// (Here using the [`AnyMap`] convenience alias; the first line could use
+/// <code>[anymap::Map][Map]::&lt;[core::any::Any]&gt;::new()</code> instead if desired.)
  ///
  /// ```rust
-/// # use anymap::AnyMap;
-/// let mut data = AnyMap::new();
+/// let mut data = anymap::AnyMap::new();
  /// assert_eq!(data.get(), None::<&i32>);
  /// data.insert(42i32);
  /// assert_eq!(data.get(), Some(&42i32));
@@ -117,7 +189,7 @@ pub mod raw;
  ///
  /// Values containing non-static references are not permitted.
  #[derive(Debug)]
-pub struct Map<A: ?Sized + UncheckedAnyExt = Any> {
+pub struct Map<A: ?Sized + UncheckedAnyExt = dyn Any> {
      raw: RawMap<A>,
  }
  
@@ -131,17 +203,17 @@ impl<A: ?Sized + UncheckedAnyExt> Clone for Map<A> where Box<A>: Clone {
      }
  }
  
-/// The most common type of `Map`: just using `Any`.
+/// The most common type of `Map`: just using `Any`; <code>[Map]&lt;dyn [Any]&gt;</code>.
  ///
  /// Why is this a separate type alias rather than a default value for `Map<A>`? `Map::new()`
  /// doesn’t seem to be happy to infer that it should go with the default value.
  /// It’s a bit sad, really. Ah well, I guess this approach will do.
-pub type AnyMap = Map<Any>;
+pub type AnyMap = Map<dyn Any>;
  
  impl_common_methods! {
      field: Map.raw;
-    new() => RawMap::new();
-    with_capacity(capacity) => RawMap::with_capacity(capacity);
+    new() => RawMap::with_hasher(Default::default());
+    with_capacity(capacity) => RawMap::with_capacity_and_hasher(capacity, Default::default());
  }
  
  impl<A: ?Sized + UncheckedAnyExt> Map<A> {
@@ -171,6 +243,8 @@ impl<A: ?Sized + UncheckedAnyExt> Map<A> {
          }
      }
  
+    // rustc 1.60.0-nightly has another method try_insert that would be nice to add when stable.
+
      /// Removes the `T` value from the collection,
      /// returning it if there was one or `None` if there was not.
      #[inline]
@@ -189,48 +263,121 @@ impl<A: ?Sized + UncheckedAnyExt> Map<A> {
      #[inline]
      pub fn entry<T: IntoBox<A>>(&mut self) -> Entry<A, T> {
          match self.raw.entry(TypeId::of::<T>()) {
-            raw::Entry::Occupied(e) => Entry::Occupied(OccupiedEntry {
+            raw_hash_map::Entry::Occupied(e) => Entry::Occupied(OccupiedEntry {
                  inner: e,
                  type_: PhantomData,
              }),
-            raw::Entry::Vacant(e) => Entry::Vacant(VacantEntry {
+            raw_hash_map::Entry::Vacant(e) => Entry::Vacant(VacantEntry {
                  inner: e,
                  type_: PhantomData,
              }),
          }
      }
-}
  
-impl<A: ?Sized + UncheckedAnyExt> AsRef<RawMap<A>> for Map<A> {
+    /// Get access to the raw hash map that backs this.
+    ///
+    /// This will seldom be useful, but it’s conceivable that you could wish to iterate over all
+    /// the items in the collection, and this lets you do that.
+    ///
+    /// To improve compatibility with Cargo features, interact with this map through the names
+    /// [`anymap::RawMap`][RawMap] and [`anymap::raw_hash_map`][raw_hash_map], rather than through
+    /// `std::collections::{HashMap, hash_map}` or `hashbrown::{HashMap, hash_map}`, for anything
+    /// beyond self methods. Otherwise, if you use std and another crate in the tree enables
+    /// hashbrown, your code will break.
      #[inline]
-    fn as_ref(&self) -> &RawMap<A> {
+    pub fn as_raw(&self) -> &RawMap<A> {
          &self.raw
      }
-}
  
-impl<A: ?Sized + UncheckedAnyExt> AsMut<RawMap<A>> for Map<A> {
+    /// Get mutable access to the raw hash map that backs this.
+    ///
+    /// This will seldom be useful, but it’s conceivable that you could wish to iterate over all
+    /// the items in the collection mutably, or drain or something, or *possibly* even batch
+    /// insert, and this lets you do that.
+    ///
+    /// To improve compatibility with Cargo features, interact with this map through the names
+    /// [`anymap::RawMap`][RawMap] and [`anymap::raw_hash_map`][raw_hash_map], rather than through
+    /// `std::collections::{HashMap, hash_map}` or `hashbrown::{HashMap, hash_map}`, for anything
+    /// beyond self methods. Otherwise, if you use std and another crate in the tree enables
+    /// hashbrown, your code will break.
+    ///
+    /// # Safety
+    ///
+    /// If you insert any values to the raw map, the key (a `TypeId`) must match the value’s type,
+    /// or *undefined behaviour* will occur when you access those values.
+    ///
+    /// (*Removing* entries is perfectly safe.)
      #[inline]
-    fn as_mut(&mut self) -> &mut RawMap<A> {
+    pub unsafe fn as_raw_mut(&mut self) -> &mut RawMap<A> {
          &mut self.raw
      }
-}
  
-impl<A: ?Sized + UncheckedAnyExt> Into<RawMap<A>> for Map<A> {
+    /// Convert this into the raw hash map that backs this.
+    ///
+    /// This will seldom be useful, but it’s conceivable that you could wish to consume all the
+    /// items in the collection and do *something* with some or all of them, and this lets you do
+    /// that, without the `unsafe` that `.as_raw_mut().drain()` would require.
+    ///
+    /// To improve compatibility with Cargo features, interact with this map through the names
+    /// [`anymap::RawMap`][RawMap] and [`anymap::raw_hash_map`][raw_hash_map], rather than through
+    /// `std::collections::{HashMap, hash_map}` or `hashbrown::{HashMap, hash_map}`, for anything
+    /// beyond self methods. Otherwise, if you use std and another crate in the tree enables
+    /// hashbrown, your code will break.
      #[inline]
-    fn into(self) -> RawMap<A> {
+    pub fn into_raw(self) -> RawMap<A> {
          self.raw
      }
+
+    /// Construct a map from a collection of raw values.
+    ///
+    /// You know what? I can’t immediately think of any legitimate use for this, especially because
+    /// of the requirement of the `BuildHasherDefault<TypeIdHasher>` generic in the map.
+    ///
+    /// Perhaps this will be most practical as `unsafe { Map::from_raw(iter.collect()) }`, iter
+    /// being an iterator over `(TypeId, Box<A>)` pairs. Eh, this method provides symmetry with
+    /// `into_raw`, so I don’t care if literally no one ever uses it. I’m not even going to write a
+    /// test for it, it’s so trivial.
+    ///
+    /// To improve compatibility with Cargo features, interact with this map through the names
+    /// [`anymap::RawMap`][RawMap] and [`anymap::raw_hash_map`][raw_hash_map], rather than through
+    /// `std::collections::{HashMap, hash_map}` or `hashbrown::{HashMap, hash_map}`, for anything
+    /// beyond self methods. Otherwise, if you use std and another crate in the tree enables
+    /// hashbrown, your code will break.
+    ///
+    /// # Safety
+    ///
+    /// For all entries in the raw map, the key (a `TypeId`) must match the value’s type,
+    /// or *undefined behaviour* will occur when you access that entry.
+    #[inline]
+    pub unsafe fn from_raw(raw: RawMap<A>) -> Map<A> {
+        Self { raw }
+    }
+}
+
+impl<A: ?Sized + UncheckedAnyExt> Extend<Box<A>> for Map<A> {
+    #[inline]
+    fn extend<T: IntoIterator<Item = Box<A>>>(&mut self, iter: T) {
+        for item in iter {
+            let _ = self.raw.insert(item.type_id(), item);
+        }
+    }
  }
  
  /// A view into a single occupied location in an `Map`.
  pub struct OccupiedEntry<'a, A: ?Sized + UncheckedAnyExt, V: 'a> {
-    inner: raw::OccupiedEntry<'a, A>,
+    #[cfg(all(feature = "std", not(feature = "hashbrown")))]
+    inner: raw_hash_map::OccupiedEntry<'a, TypeId, Box<A>>,
+    #[cfg(feature = "hashbrown")]
+    inner: raw_hash_map::OccupiedEntry<'a, TypeId, Box<A>, BuildHasherDefault<TypeIdHasher>>,
      type_: PhantomData<V>,
  }
  
  /// A view into a single empty location in an `Map`.
  pub struct VacantEntry<'a, A: ?Sized + UncheckedAnyExt, V: 'a> {
-    inner: raw::VacantEntry<'a, A>,
+    #[cfg(all(feature = "std", not(feature = "hashbrown")))]
+    inner: raw_hash_map::VacantEntry<'a, TypeId, Box<A>>,
+    #[cfg(feature = "hashbrown")]
+    inner: raw_hash_map::VacantEntry<'a, TypeId, Box<A>, BuildHasherDefault<TypeIdHasher>>,
      type_: PhantomData<V>,
  }
  
@@ -306,92 +453,38 @@ impl<'a, A: ?Sized + UncheckedAnyExt, V: IntoBox<A>> VacantEntry<'a, A, V> {
      }
  }
  
-#[cfg(all(feature = "bench", test))]
-mod bench {
-    use AnyMap;
-    use test::Bencher;
-    use test::black_box;
-
-    #[bench]
-    fn insertion(b: &mut Bencher) {
-        b.iter(|| {
-            let mut data = AnyMap::new();
-            for _ in 0..100 {
-                let _ = data.insert(42);
-            }
-        })
-    }
-
-    #[bench]
-    fn get_missing(b: &mut Bencher) {
-        b.iter(|| {
-            let data = AnyMap::new();
-            for _ in 0..100 {
-                assert_eq!(data.get(), None::<&i32>);
-            }
-        })
-    }
-
-    #[bench]
-    fn get_present(b: &mut Bencher) {
-        b.iter(|| {
-            let mut data = AnyMap::new();
-            let _ = data.insert(42);
-            // These inner loops are a feeble attempt to drown the other factors.
-            for _ in 0..100 {
-                assert_eq!(data.get(), Some(&42));
-            }
-        })
-    }
-
-    macro_rules! big_benchmarks {
-        ($name:ident, $($T:ident)*) => (
-            #[bench]
-            fn $name(b: &mut Bencher) {
-                $(
-                    struct $T(&'static str);
-                )*
-
-                b.iter(|| {
-                    let mut data = AnyMap::new();
-                    $(
-                        let _ = black_box(data.insert($T(stringify!($T))));
-                    )*
-                    $(
-                        let _ = black_box(data.get::<$T>());
-                    )*
-                })
-            }
-        );
-    }
+/// A hasher designed to eke a little more speed out, given `TypeId`’s known characteristics.
+///
+/// Specifically, this is a no-op hasher that expects to be fed a u64’s worth of
+/// randomly-distributed bits. It works well for `TypeId` (eliminating start-up time, so that my
+/// get_missing benchmark is ~30ns rather than ~900ns, and being a good deal faster after that, so
+/// that my insert_and_get_on_260_types benchmark is ~12μs instead of ~21.5μs), but will
+/// panic in debug mode and always emit zeros in release mode for any other sorts of inputs, so
+/// yeah, don’t use it! 😀
+#[derive(Default)]
+pub struct TypeIdHasher {
+    value: u64,
+}
  
-    // Caution: if the macro does too much (e.g. assertions) this goes from being slow to being
-    // *really* slow (like add a minute for each assertion on it) and memory-hungry (like, adding
-    // several hundred megabytes to the peak for each assertion).
-    big_benchmarks! {
-        insert_and_get_on_260_types,
-        A0 B0 C0 D0 E0 F0 G0 H0 I0 J0 K0 L0 M0 N0 O0 P0 Q0 R0 S0 T0 U0 V0 W0 X0 Y0 Z0
-        A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 O1 P1 Q1 R1 S1 T1 U1 V1 W1 X1 Y1 Z1
-        A2 B2 C2 D2 E2 F2 G2 H2 I2 J2 K2 L2 M2 N2 O2 P2 Q2 R2 S2 T2 U2 V2 W2 X2 Y2 Z2
-        A3 B3 C3 D3 E3 F3 G3 H3 I3 J3 K3 L3 M3 N3 O3 P3 Q3 R3 S3 T3 U3 V3 W3 X3 Y3 Z3
-        A4 B4 C4 D4 E4 F4 G4 H4 I4 J4 K4 L4 M4 N4 O4 P4 Q4 R4 S4 T4 U4 V4 W4 X4 Y4 Z4
-        A5 B5 C5 D5 E5 F5 G5 H5 I5 J5 K5 L5 M5 N5 O5 P5 Q5 R5 S5 T5 U5 V5 W5 X5 Y5 Z5
-        A6 B6 C6 D6 E6 F6 G6 H6 I6 J6 K6 L6 M6 N6 O6 P6 Q6 R6 S6 T6 U6 V6 W6 X6 Y6 Z6
-        A7 B7 C7 D7 E7 F7 G7 H7 I7 J7 K7 L7 M7 N7 O7 P7 Q7 R7 S7 T7 U7 V7 W7 X7 Y7 Z7
-        A8 B8 C8 D8 E8 F8 G8 H8 I8 J8 K8 L8 M8 N8 O8 P8 Q8 R8 S8 T8 U8 V8 W8 X8 Y8 Z8
-        A9 B9 C9 D9 E9 F9 G9 H9 I9 J9 K9 L9 M9 N9 O9 P9 Q9 R9 S9 T9 U9 V9 W9 X9 Y9 Z9
+impl Hasher for TypeIdHasher {
+    #[inline]
+    fn write(&mut self, bytes: &[u8]) {
+        // This expects to receive exactly one 64-bit value, and there’s no realistic chance of
+        // that changing, but I don’t want to depend on something that isn’t expressly part of the
+        // contract for safety. But I’m OK with release builds putting everything in one bucket
+        // if it *did* change (and debug builds panicking).
+        debug_assert_eq!(bytes.len(), 8);
+        let _ = bytes.try_into()
+            .map(|array| self.value = u64::from_ne_bytes(array));
      }
  
-    big_benchmarks! {
-        insert_and_get_on_26_types,
-        A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
-    }
+    #[inline]
+    fn finish(&self) -> u64 { self.value }
  }
  
  #[cfg(test)]
  mod tests {
-    use {Map, AnyMap, Entry};
-    use any::{Any, CloneAny};
+    use super::*;
  
      #[derive(Clone, Debug, PartialEq)] struct A(i32);
      #[derive(Clone, Debug, PartialEq)] struct B(i32);
@@ -473,11 +566,17 @@ mod tests {
      }
  
      test_entry!(test_entry_any, AnyMap);
-    test_entry!(test_entry_cloneany, Map<CloneAny>);
+    test_entry!(test_entry_cloneany, Map<dyn CloneAny>);
+
+    #[test]
+    fn test_default() {
+        let map: AnyMap = Default::default();
+        assert_eq!(map.len(), 0);
+    }
  
      #[test]
      fn test_clone() {
-        let mut map: Map<CloneAny> = Map::new();
+        let mut map: Map<dyn CloneAny> = Map::new();
          let _ = map.insert(A(1));
          let _ = map.insert(B(2));
          let _ = map.insert(D(3));
@@ -500,26 +599,40 @@ mod tests {
          fn assert_send<T: Send>() { }
          fn assert_sync<T: Sync>() { }
          fn assert_clone<T: Clone>() { }
-        fn assert_debug<T: ::std::fmt::Debug>() { }
-        assert_send::<Map<Any + Send>>();
-        assert_send::<Map<Any + Send + Sync>>();
-        assert_sync::<Map<Any + Sync>>();
-        assert_sync::<Map<Any + Send + Sync>>();
-        assert_debug::<Map<Any>>();
-        assert_debug::<Map<Any + Send>>();
-        assert_debug::<Map<Any + Sync>>();
-        assert_debug::<Map<Any + Send + Sync>>();
-        assert_send::<Map<CloneAny + Send>>();
-        assert_send::<Map<CloneAny + Send + Sync>>();
-        assert_sync::<Map<CloneAny + Sync>>();
-        assert_sync::<Map<CloneAny + Send + Sync>>();
-        assert_clone::<Map<CloneAny + Send>>();
-        assert_clone::<Map<CloneAny + Send + Sync>>();
-        assert_clone::<Map<CloneAny + Sync>>();
-        assert_clone::<Map<CloneAny + Send + Sync>>();
-        assert_debug::<Map<CloneAny>>();
-        assert_debug::<Map<CloneAny + Send>>();
-        assert_debug::<Map<CloneAny + Sync>>();
-        assert_debug::<Map<CloneAny + Send + Sync>>();
+        fn assert_debug<T: ::core::fmt::Debug>() { }
+        assert_send::<Map<dyn Any + Send>>();
+        assert_send::<Map<dyn Any + Send + Sync>>();
+        assert_sync::<Map<dyn Any + Send + Sync>>();
+        assert_debug::<Map<dyn Any>>();
+        assert_debug::<Map<dyn Any + Send>>();
+        assert_debug::<Map<dyn Any + Send + Sync>>();
+        assert_send::<Map<dyn CloneAny + Send>>();
+        assert_send::<Map<dyn CloneAny + Send + Sync>>();
+        assert_sync::<Map<dyn CloneAny + Send + Sync>>();
+        assert_clone::<Map<dyn CloneAny + Send>>();
+        assert_clone::<Map<dyn CloneAny + Send + Sync>>();
+        assert_clone::<Map<dyn CloneAny + Send + Sync>>();
+        assert_debug::<Map<dyn CloneAny>>();
+        assert_debug::<Map<dyn CloneAny + Send>>();
+        assert_debug::<Map<dyn CloneAny + Send + Sync>>();
+    }
+
+    #[test]
+    fn type_id_hasher() {
+        #[cfg(not(feature = "std"))]
+        use alloc::vec::Vec;
+        use core::hash::Hash;
+        fn verify_hashing_with(type_id: TypeId) {
+            let mut hasher = TypeIdHasher::default();
+            type_id.hash(&mut hasher);
+            // SAFETY: u64 is valid for all bit patterns.
+            assert_eq!(hasher.finish(), unsafe { core::mem::transmute::<TypeId, u64>(type_id) });
+        }
+        // Pick a variety of types, just to demonstrate it’s all sane. Normal, zero-sized, unsized, &c.
+        verify_hashing_with(TypeId::of::<usize>());
+        verify_hashing_with(TypeId::of::<()>());
+        verify_hashing_with(TypeId::of::<str>());
+        verify_hashing_with(TypeId::of::<&str>());
+        verify_hashing_with(TypeId::of::<Vec<u8>>());
      }
  }