use alloc::{ string::{String, ToString}, vec, vec::Vec, }; use crate::ext::Byte; pub(crate) mod naive; #[macro_use] pub(crate) mod prop; const SEEDS: &'static [Seed] = &[ Seed { haystack: "a", needles: &[b'a'], positions: &[0] }, Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] }, Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] }, Seed { haystack: "", needles: &[b'a'], positions: &[] }, Seed { haystack: "z", needles: &[b'a'], positions: &[] }, Seed { haystack: "zz", needles: &[b'a'], positions: &[] }, Seed { haystack: "zza", needles: &[b'a'], positions: &[2] }, Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] }, Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] }, Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] }, Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] }, Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] }, Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] }, Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] }, Seed { haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[32], }, // two needles (applied to memchr2 + memchr3) Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] }, Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] }, Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] }, Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] }, Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] }, // three needles (applied to memchr3) Seed { haystack: "xyz", needles: &[b'x', b'y', b'z'], positions: &[0, 1, 2], }, Seed { haystack: "zxy", needles: &[b'x', b'y', b'z'], positions: &[0, 1, 2], }, Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] }, Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] }, Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] }, ]; /// Runs a host of substring search tests. /// /// This has support for "partial" substring search implementations only work /// for a subset of needles/haystacks. For example, the "packed pair" substring /// search implementation only works for haystacks of some minimum length based /// of the pair of bytes selected and the size of the vector used. pub(crate) struct Runner { needle_len: usize, } impl Runner { /// Create a new test runner for forward and reverse byte search /// implementations. /// /// The `needle_len` given must be at most `3` and at least `1`. It /// corresponds to the number of needle bytes to search for. pub(crate) fn new(needle_len: usize) -> Runner { assert!(needle_len >= 1, "needle_len must be at least 1"); assert!(needle_len <= 3, "needle_len must be at most 3"); Runner { needle_len } } /// Run all tests. This panics on the first failure. /// /// If the implementation being tested returns `None` for a particular /// haystack/needle combination, then that test is skipped. pub(crate) fn forward_iter(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option> + 'static, { for seed in SEEDS.iter() { if seed.needles.len() > self.needle_len { continue; } for t in seed.generate() { let results = match test(t.haystack.as_bytes(), &t.needles) { None => continue, Some(results) => results, }; assert_eq!( t.expected, results, "needles: {:?}, haystack: {:?}", t.needles .iter() .map(|&b| b.to_char()) .collect::>(), t.haystack, ); } } } /// Run all tests in the reverse direction. This panics on the first /// failure. /// /// If the implementation being tested returns `None` for a particular /// haystack/needle combination, then that test is skipped. pub(crate) fn reverse_iter(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option> + 'static, { for seed in SEEDS.iter() { if seed.needles.len() > self.needle_len { continue; } for t in seed.generate() { let mut results = match test(t.haystack.as_bytes(), &t.needles) { None => continue, Some(results) => results, }; results.reverse(); assert_eq!( t.expected, results, "needles: {:?}, haystack: {:?}", t.needles .iter() .map(|&b| b.to_char()) .collect::>(), t.haystack, ); } } } /// Run all tests as counting tests. This panics on the first failure. /// /// That is, this only checks that the number of matches is correct and /// not whether the offsets of each match are. pub(crate) fn count_iter(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option + 'static, { for seed in SEEDS.iter() { if seed.needles.len() > self.needle_len { continue; } for t in seed.generate() { let got = match test(t.haystack.as_bytes(), &t.needles) { None => continue, Some(got) => got, }; assert_eq!( t.expected.len(), got, "needles: {:?}, haystack: {:?}", t.needles .iter() .map(|&b| b.to_char()) .collect::>(), t.haystack, ); } } } /// Like `Runner::forward`, but for a function that returns only the next /// match and not all matches. /// /// If the function returns `None`, then it is skipped. pub(crate) fn forward_oneshot(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option> + 'static, { self.forward_iter(move |haystack, needles| { let mut start = 0; let mut results = vec![]; while let Some(i) = test(&haystack[start..], needles)? { results.push(start + i); start += i + 1; } Some(results) }) } /// Like `Runner::reverse`, but for a function that returns only the last /// match and not all matches. /// /// If the function returns `None`, then it is skipped. pub(crate) fn reverse_oneshot(self, mut test: F) where F: FnMut(&[u8], &[u8]) -> Option> + 'static, { self.reverse_iter(move |haystack, needles| { let mut end = haystack.len(); let mut results = vec![]; while let Some(i) = test(&haystack[..end], needles)? { results.push(i); end = i; } Some(results) }) } } /// A single test for memr?chr{,2,3}. #[derive(Clone, Debug)] struct Test { /// The string to search in. haystack: String, /// The needles to look for. needles: Vec, /// The offsets that are expected to be found for all needles in the /// forward direction. expected: Vec, } impl Test { fn new(seed: &Seed) -> Test { Test { haystack: seed.haystack.to_string(), needles: seed.needles.to_vec(), expected: seed.positions.to_vec(), } } } /// Data that can be expanded into many memchr tests by padding out the corpus. #[derive(Clone, Debug)] struct Seed { /// The thing to search. We use `&str` instead of `&[u8]` because they /// are nicer to write in tests, and we don't miss much since memchr /// doesn't care about UTF-8. /// /// Corpora cannot contain either '%' or '#'. We use these bytes when /// expanding test cases into many test cases, and we assume they are not /// used. If they are used, `memchr_tests` will panic. haystack: &'static str, /// The needles to search for. This is intended to be an alternation of /// needles. The number of needles may cause this test to be skipped for /// some memchr variants. For example, a test with 2 needles cannot be used /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. /// However, a test with only 1 needle can be used to test all of `memchr`, /// `memchr2` and `memchr3`. We achieve this by filling in the needles with /// bytes that we never used in the corpus (such as '#'). needles: &'static [u8], /// The positions expected to match for all of the needles. positions: &'static [usize], } impl Seed { /// Controls how much we expand the haystack on either side for each test. /// We lower this on Miri because otherwise running the tests would take /// forever. const EXPAND_LEN: usize = { #[cfg(not(miri))] { 515 } #[cfg(miri)] { 6 } }; /// Expand this test into many variations of the same test. /// /// In particular, this will generate more tests with larger corpus sizes. /// The expected positions are updated to maintain the integrity of the /// test. /// /// This is important in testing a memchr implementation, because there are /// often different cases depending on the length of the corpus. /// /// Note that we extend the corpus by adding `%` bytes, which we /// don't otherwise use as a needle. fn generate(&self) -> impl Iterator { let mut more = vec![]; // Add bytes to the start of the corpus. for i in 0..Seed::EXPAND_LEN { let mut t = Test::new(self); let mut new: String = core::iter::repeat('%').take(i).collect(); new.push_str(&t.haystack); t.haystack = new; t.expected = t.expected.into_iter().map(|p| p + i).collect(); more.push(t); } // Add bytes to the end of the corpus. for i in 1..Seed::EXPAND_LEN { let mut t = Test::new(self); let padding: String = core::iter::repeat('%').take(i).collect(); t.haystack.push_str(&padding); more.push(t); } more.into_iter() } }