// Copyright © 2019 The Rust Fuzz Project Developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! The `Arbitrary` trait crate. //! //! This trait provides an [`Arbitrary`] trait to //! produce well-typed, structured values, from raw, byte buffers. It is //! generally intended to be used with fuzzers like AFL or libFuzzer. See the //! [`Arbitrary`] trait's documentation for details on //! automatically deriving, implementing, and/or using the trait. #![deny(bad_style)] #![deny(missing_docs)] #![deny(future_incompatible)] #![deny(nonstandard_style)] #![deny(rust_2018_compatibility)] #![deny(rust_2018_idioms)] #![deny(unused)] mod error; mod foreign; pub mod size_hint; pub mod unstructured; #[cfg(test)] mod tests; pub use error::*; #[cfg(feature = "derive_arbitrary")] pub use derive_arbitrary::*; #[doc(inline)] pub use unstructured::Unstructured; /// Error indicating that the maximum recursion depth has been reached while calculating [`Arbitrary::size_hint`]() #[derive(Debug, Clone)] #[non_exhaustive] pub struct MaxRecursionReached {} impl core::fmt::Display for MaxRecursionReached { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("Maximum recursion depth has been reached") } } impl std::error::Error for MaxRecursionReached {} /// Generate arbitrary structured values from raw, unstructured data. /// /// The `Arbitrary` trait allows you to generate valid structured values, like /// `HashMap`s, or ASTs, or `MyTomlConfig`, or any other data structure from /// raw, unstructured bytes provided by a fuzzer. /// /// # Deriving `Arbitrary` /// /// Automatically deriving the `Arbitrary` trait is the recommended way to /// implement `Arbitrary` for your types. /// /// Using the custom derive requires that you enable the `"derive"` cargo /// feature in your `Cargo.toml`: /// /// ```toml /// [dependencies] /// arbitrary = { version = "1", features = ["derive"] } /// ``` /// /// Then, you add the `#[derive(Arbitrary)]` annotation to your `struct` or /// `enum` type definition: /// /// ``` /// # #[cfg(feature = "derive")] mod foo { /// use arbitrary::Arbitrary; /// use std::collections::HashSet; /// /// #[derive(Arbitrary)] /// pub struct AddressBook { /// friends: HashSet, /// } /// /// #[derive(Arbitrary, Hash, Eq, PartialEq)] /// pub enum Friend { /// Buddy { name: String }, /// Pal { age: usize }, /// } /// # } /// ``` /// /// Every member of the `struct` or `enum` must also implement `Arbitrary`. /// /// It is also possible to change the default bounds added by the derive: /// /// ``` /// # #[cfg(feature = "derive")] mod foo { /// use arbitrary::Arbitrary; /// /// trait Trait { /// type Assoc: for<'a> Arbitrary<'a>; /// } /// /// #[derive(Arbitrary)] /// // The bounds are used verbatim, so any existing trait bounds will need to be repeated. /// #[arbitrary(bound = "T: Trait")] /// struct Point { /// x: T::Assoc, /// } /// # } /// ``` /// /// # Implementing `Arbitrary` By Hand /// /// Implementing `Arbitrary` mostly involves nested calls to other `Arbitrary` /// arbitrary implementations for each of your `struct` or `enum`'s members. But /// sometimes you need some amount of raw data, or you need to generate a /// variably-sized collection type, or something of that sort. The /// [`Unstructured`] type helps you with these tasks. /// /// ``` /// # #[cfg(feature = "derive")] mod foo { /// # pub struct MyCollection { _t: std::marker::PhantomData } /// # impl MyCollection { /// # pub fn new() -> Self { MyCollection { _t: std::marker::PhantomData } } /// # pub fn insert(&mut self, element: T) {} /// # } /// use arbitrary::{Arbitrary, Result, Unstructured}; /// /// impl<'a, T> Arbitrary<'a> for MyCollection /// where /// T: Arbitrary<'a>, /// { /// fn arbitrary(u: &mut Unstructured<'a>) -> Result { /// // Get an iterator of arbitrary `T`s. /// let iter = u.arbitrary_iter::()?; /// /// // And then create a collection! /// let mut my_collection = MyCollection::new(); /// for elem_result in iter { /// let elem = elem_result?; /// my_collection.insert(elem); /// } /// /// Ok(my_collection) /// } /// } /// # } /// ``` /// /// # A Note On Output Distributions /// /// There is no requirement for a particular distribution of the values. For /// example, it is not required that every value appears with the same /// probability. That being said, the main use for `Arbitrary` is for fuzzing, /// so in many cases a uniform distribution will make the most sense in order to /// provide the best coverage of the domain. In other cases this is not /// desirable or even possible, for example when sampling from a uniform /// distribution is computationally expensive or in the case of collections that /// may grow indefinitely. pub trait Arbitrary<'a>: Sized { /// Generate an arbitrary value of `Self` from the given unstructured data. /// /// Calling `Arbitrary::arbitrary` requires that you have some raw data, /// perhaps given to you by a fuzzer like AFL or libFuzzer. You wrap this /// raw data in an `Unstructured`, and then you can call `::arbitrary` to construct an arbitrary instance of `MyType` /// from that unstructured data. /// /// Implementations may return an error if there is not enough data to /// construct a full instance of `Self`, or they may fill out the rest of /// `Self` with dummy values. Using dummy values when the underlying data is /// exhausted can help avoid accidentally "defeating" some of the fuzzer's /// mutations to the underlying byte stream that might otherwise lead to /// interesting runtime behavior or new code coverage if only we had just a /// few more bytes. However, it also requires that implementations for /// recursive types (e.g. `struct Foo(Option>)`) avoid infinite /// recursion when the underlying data is exhausted. /// /// ``` /// # #[cfg(feature = "derive")] fn foo() { /// use arbitrary::{Arbitrary, Unstructured}; /// /// #[derive(Arbitrary)] /// pub struct MyType { /// // ... /// } /// /// // Get the raw data from the fuzzer or wherever else. /// # let get_raw_data_from_fuzzer = || &[]; /// let raw_data: &[u8] = get_raw_data_from_fuzzer(); /// /// // Wrap that raw data in an `Unstructured`. /// let mut unstructured = Unstructured::new(raw_data); /// /// // Generate an arbitrary instance of `MyType` and do stuff with it. /// if let Ok(value) = MyType::arbitrary(&mut unstructured) { /// # let do_stuff = |_| {}; /// do_stuff(value); /// } /// # } /// ``` /// /// See also the documentation for [`Unstructured`]. fn arbitrary(u: &mut Unstructured<'a>) -> Result; /// Generate an arbitrary value of `Self` from the entirety of the given /// unstructured data. /// /// This is similar to Arbitrary::arbitrary, however it assumes that it is /// the last consumer of the given data, and is thus able to consume it all /// if it needs. See also the documentation for /// [`Unstructured`]. fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result { Self::arbitrary(&mut u) } /// Get a size hint for how many bytes out of an `Unstructured` this type /// needs to construct itself. /// /// This is useful for determining how many elements we should insert when /// creating an arbitrary collection. /// /// The return value is similar to [`Iterator::size_hint`]: it returns a /// tuple where the first element is a lower bound on the number of bytes /// required, and the second element is an optional upper bound. /// /// The default implementation return `(0, None)` which is correct for any /// type, but not ultimately that useful. Using `#[derive(Arbitrary)]` will /// create a better implementation. If you are writing an `Arbitrary` /// implementation by hand, and your type can be part of a dynamically sized /// collection (such as `Vec`), you are strongly encouraged to override this /// default with a better implementation, and also override /// [`try_size_hint`]. /// /// ## How to implement this /// /// If the size hint calculation is a trivial constant and does not recurse /// into any other `size_hint` call, you should implement it in `size_hint`: /// /// ``` /// use arbitrary::{size_hint, Arbitrary, Result, Unstructured}; /// /// struct SomeStruct(u8); /// /// impl<'a> Arbitrary<'a> for SomeStruct { /// fn arbitrary(u: &mut Unstructured<'a>) -> Result { /// let buf = &mut [0]; /// u.fill_buffer(buf)?; /// Ok(SomeStruct(buf[0])) /// } /// /// #[inline] /// fn size_hint(depth: usize) -> (usize, Option) { /// let _ = depth; /// (1, Some(1)) /// } /// } /// ``` /// /// Otherwise, it should instead be implemented in [`try_size_hint`], /// and the `size_hint` implementation should forward to it: /// /// ``` /// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Result, Unstructured}; /// /// struct SomeStruct { /// a: A, /// b: B, /// } /// /// impl<'a, A: Arbitrary<'a>, B: Arbitrary<'a>> Arbitrary<'a> for SomeStruct { /// fn arbitrary(u: &mut Unstructured<'a>) -> Result { /// // ... /// # todo!() /// } /// /// fn size_hint(depth: usize) -> (usize, Option) { /// // Return the value of try_size_hint /// // /// // If the recursion fails, return the default, always valid `(0, None)` /// Self::try_size_hint(depth).unwrap_or_default() /// } /// /// fn try_size_hint(depth: usize) -> Result<(usize, Option), MaxRecursionReached> { /// // Protect against potential infinite recursion with /// // `try_recursion_guard`. /// size_hint::try_recursion_guard(depth, |depth| { /// // If we aren't too deep, then `recursion_guard` calls /// // this closure, which implements the natural size hint. /// // Don't forget to use the new `depth` in all nested /// // `try_size_hint` calls! We recommend shadowing the /// // parameter, like what is done here, so that you can't /// // accidentally use the wrong depth. /// Ok(size_hint::and( /// ::try_size_hint(depth)?, /// ::try_size_hint(depth)?, /// )) /// }) /// } /// } /// ``` /// /// ## Invariant /// /// It must be possible to construct every possible output using only inputs /// of lengths bounded by these parameters. This applies to both /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`]. /// /// This is trivially true for `(0, None)`. To restrict this further, it /// must be proven that all inputs that are now excluded produced redundant /// outputs which are still possible to produce using the reduced input /// space. /// /// [iterator-size-hint]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.size_hint /// [`try_size_hint`]: Arbitrary::try_size_hint #[inline] fn size_hint(depth: usize) -> (usize, Option) { let _ = depth; (0, None) } /// Get a size hint for how many bytes out of an `Unstructured` this type /// needs to construct itself. /// /// Unlike [`size_hint`], this function keeps the information that the /// recursion limit was reached. This is required to "short circuit" the /// calculation and avoid exponential blowup with recursive structures. /// /// If you are implementing [`size_hint`] for a struct that could be /// recursive, you should implement `try_size_hint` and call the /// `try_size_hint` when recursing /// /// /// The return value is similar to [`core::iter::Iterator::size_hint`]: it /// returns a tuple where the first element is a lower bound on the number /// of bytes required, and the second element is an optional upper bound. /// /// The default implementation returns the value of [`size_hint`] which is /// correct for any type, but might lead to exponential blowup when dealing /// with recursive types. /// /// ## Invariant /// /// It must be possible to construct every possible output using only inputs /// of lengths bounded by these parameters. This applies to both /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`]. /// /// This is trivially true for `(0, None)`. To restrict this further, it /// must be proven that all inputs that are now excluded produced redundant /// outputs which are still possible to produce using the reduced input /// space. /// /// ## When to implement `try_size_hint` /// /// If you 100% know that the type you are implementing `Arbitrary` for is /// not a recursive type, or your implementation is not transitively calling /// any other `size_hint` methods, you may implement [`size_hint`], and the /// default `try_size_hint` implementation will use it. /// /// Note that if you are implementing `Arbitrary` for a generic type, you /// cannot guarantee the lack of type recursion! /// /// Otherwise, when there is possible type recursion, you should implement /// `try_size_hint` instead. /// /// ## The `depth` parameter /// /// When implementing `try_size_hint`, you need to use /// [`arbitrary::size_hint::try_recursion_guard(depth)`][crate::size_hint::try_recursion_guard] /// to prevent potential infinite recursion when calculating size hints for /// potentially recursive types: /// /// ``` /// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Unstructured}; /// /// // This can potentially be a recursive type if `L` or `R` contain /// // something like `Box>>`! /// enum MyEither { /// Left(L), /// Right(R), /// } /// /// impl<'a, L, R> Arbitrary<'a> for MyEither /// where /// L: Arbitrary<'a>, /// R: Arbitrary<'a>, /// { /// fn arbitrary(u: &mut Unstructured) -> arbitrary::Result { /// // ... /// # unimplemented!() /// } /// /// fn size_hint(depth: usize) -> (usize, Option) { /// // Return the value of `try_size_hint` /// // /// // If the recursion fails, return the default `(0, None)` range, /// // which is always valid. /// Self::try_size_hint(depth).unwrap_or_default() /// } /// /// fn try_size_hint(depth: usize) -> Result<(usize, Option), MaxRecursionReached> { /// // Protect against potential infinite recursion with /// // `try_recursion_guard`. /// size_hint::try_recursion_guard(depth, |depth| { /// // If we aren't too deep, then `recursion_guard` calls /// // this closure, which implements the natural size hint. /// // Don't forget to use the new `depth` in all nested /// // `try_size_hint` calls! We recommend shadowing the /// // parameter, like what is done here, so that you can't /// // accidentally use the wrong depth. /// Ok(size_hint::or( /// ::try_size_hint(depth)?, /// ::try_size_hint(depth)?, /// )) /// }) /// } /// } /// ``` #[inline] fn try_size_hint(depth: usize) -> Result<(usize, Option), MaxRecursionReached> { Ok(Self::size_hint(depth)) } } /// Multiple conflicting arbitrary attributes are used on the same field: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// struct Point { /// #[arbitrary(value = 2)] /// #[arbitrary(value = 2)] /// x: i32, /// } /// ``` /// /// An unknown attribute: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// struct Point { /// #[arbitrary(unknown_attr)] /// x: i32, /// } /// ``` /// /// An unknown attribute with a value: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// struct Point { /// #[arbitrary(unknown_attr = 13)] /// x: i32, /// } /// ``` /// /// `value` without RHS: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// struct Point { /// #[arbitrary(value)] /// x: i32, /// } /// ``` /// /// `with` without RHS: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// struct Point { /// #[arbitrary(with)] /// x: i32, /// } /// ``` /// /// Multiple conflicting bounds at the container-level: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// #[arbitrary(bound = "T: Default")] /// #[arbitrary(bound = "T: Default")] /// struct Point { /// #[arbitrary(default)] /// x: T, /// } /// ``` /// /// Multiple conflicting bounds in a single bound attribute: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// #[arbitrary(bound = "T: Default, T: Default")] /// struct Point { /// #[arbitrary(default)] /// x: T, /// } /// ``` /// /// Multiple conflicting bounds in multiple bound attributes: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// #[arbitrary(bound = "T: Default", bound = "T: Default")] /// struct Point { /// #[arbitrary(default)] /// x: T, /// } /// ``` /// /// Too many bounds supplied: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// #[arbitrary(bound = "T: Default")] /// struct Point { /// x: i32, /// } /// ``` /// /// Too many bounds supplied across multiple attributes: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// #[arbitrary(bound = "T: Default")] /// #[arbitrary(bound = "U: Default")] /// struct Point { /// #[arbitrary(default)] /// x: T, /// } /// ``` /// /// Attempt to use the derive attribute on an enum variant: /// ```compile_fail /// #[derive(::arbitrary::Arbitrary)] /// enum Enum { /// #[arbitrary(default)] /// Variant(T), /// } /// ``` #[cfg(all(doctest, feature = "derive"))] pub struct CompileFailTests;