vendor/memchr-2.6.3/src/memmem/searcher.rs - toolchain/rustc - Git at Google

 use crate::arch::all::{
     packedpair::{HeuristicFrequencyRank, Pair},
     rabinkarp, twoway,
 };

 #[cfg(target_arch = "aarch64")]
 use crate::arch::aarch64::neon::packedpair as neon;
 #[cfg(target_arch = "wasm32")]
 use crate::arch::wasm32::simd128::packedpair as simd128;
 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
 use crate::arch::x86_64::{
     avx2::packedpair as avx2, sse2::packedpair as sse2,
 };

 /// A "meta" substring searcher.
 ///
 /// To a first approximation, this chooses what it believes to be the "best"
 /// substring search implemnetation based on the needle at construction time.
 /// Then, every call to `find` will execute that particular implementation. To
 /// a second approximation, multiple substring search algorithms may be used,
 /// depending on the haystack. For example, for supremely short haystacks,
 /// Rabin-Karp is typically used.
 ///
 /// See the documentation on `Prefilter` for an explanation of the dispatching
 /// mechanism. The quick summary is that an enum has too much overhead and
 /// we can't use dynamic dispatch via traits because we need to work in a
 /// core-only environment. (Dynamic dispatch works in core-only, but you
 /// need `&dyn Trait` and we really need a `Box<dyn Trait>` here. The latter
 /// requires `alloc`.) So instead, we use a union and an appropriately paired
 /// free function to read from the correct field on the union and execute the
 /// chosen substring search implementation.
 #[derive(Clone)]
 pub(crate) struct Searcher {
     call: SearcherKindFn,
     kind: SearcherKind,
     rabinkarp: rabinkarp::Finder,
 }

 impl Searcher {
     /// Creates a new "meta" substring searcher that attempts to choose the
     /// best algorithm based on the needle, heuristics and what the current
     /// target supports.
     #[inline]
     pub(crate) fn new<R: HeuristicFrequencyRank>(
         prefilter: PrefilterConfig,
         ranker: R,
         needle: &[u8],
     ) -> Searcher {
         let rabinkarp = rabinkarp::Finder::new(needle);
         if needle.len() <= 1 {
             return if needle.is_empty() {
                 trace!("building empty substring searcher");
                 Searcher {
                     call: searcher_kind_empty,
                     kind: SearcherKind { empty: () },
                     rabinkarp,
                 }
             } else {
                 trace!("building one-byte substring searcher");
                 debug_assert_eq!(1, needle.len());
                 Searcher {
                     call: searcher_kind_one_byte,
                     kind: SearcherKind { one_byte: needle[0] },
                     rabinkarp,
                 }
             };
         }
         let pair = match Pair::with_ranker(needle, &ranker) {
             Some(pair) => pair,
             None => return Searcher::twoway(needle, rabinkarp, None),
         };
         debug_assert_ne!(
             pair.index1(),
             pair.index2(),
             "pair offsets should not be equivalent"
         );
         #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
         {
             if let Some(pp) = avx2::Finder::with_pair(needle, pair) {
                 if do_packed_search(needle) {
                     trace!("building x86_64 AVX2 substring searcher");
                     let kind = SearcherKind { avx2: pp };
                     Searcher { call: searcher_kind_avx2, kind, rabinkarp }
                 } else if prefilter.is_none() {
                     Searcher::twoway(needle, rabinkarp, None)
                 } else {
                     let prestrat = Prefilter::avx2(pp, needle);
                     Searcher::twoway(needle, rabinkarp, Some(prestrat))
                 }
             } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) {
                 if do_packed_search(needle) {
                     trace!("building x86_64 SSE2 substring searcher");
                     let kind = SearcherKind { sse2: pp };
                     Searcher { call: searcher_kind_sse2, kind, rabinkarp }
                 } else if prefilter.is_none() {
                     Searcher::twoway(needle, rabinkarp, None)
                 } else {
                     let prestrat = Prefilter::sse2(pp, needle);
                     Searcher::twoway(needle, rabinkarp, Some(prestrat))
                 }
             } else if prefilter.is_none() {
                 Searcher::twoway(needle, rabinkarp, None)
             } else {
                 // We're pretty unlikely to get to this point, but it is
                 // possible to be running on x86_64 without SSE2. Namely, it's
                 // really up to the OS whether it wants to support vector
                 // registers or not.
                 let prestrat = Prefilter::fallback(ranker, pair, needle);
                 Searcher::twoway(needle, rabinkarp, prestrat)
             }
         }
         #[cfg(target_arch = "wasm32")]
         {
             if let Some(pp) = simd128::Finder::with_pair(needle, pair) {
                 if do_packed_search(needle) {
                     trace!("building wasm32 simd128 substring searcher");
                     let kind = SearcherKind { simd128: pp };
                     Searcher { call: searcher_kind_simd128, kind, rabinkarp }
                 } else if prefilter.is_none() {
                     Searcher::twoway(needle, rabinkarp, None)
                 } else {
                     let prestrat = Prefilter::simd128(pp, needle);
                     Searcher::twoway(needle, rabinkarp, Some(prestrat))
                 }
             } else if prefilter.is_none() {
                 Searcher::twoway(needle, rabinkarp, None)
             } else {
                 let prestrat = Prefilter::fallback(ranker, pair, needle);
                 Searcher::twoway(needle, rabinkarp, prestrat)
             }
         }
         #[cfg(target_arch = "aarch64")]
         {
             if let Some(pp) = neon::Finder::with_pair(needle, pair) {
                 if do_packed_search(needle) {
                     trace!("building aarch64 neon substring searcher");
                     let kind = SearcherKind { neon: pp };
                     Searcher { call: searcher_kind_neon, kind, rabinkarp }
                 } else if prefilter.is_none() {
                     Searcher::twoway(needle, rabinkarp, None)
                 } else {
                     let prestrat = Prefilter::neon(pp, needle);
                     Searcher::twoway(needle, rabinkarp, Some(prestrat))
                 }
             } else if prefilter.is_none() {
                 Searcher::twoway(needle, rabinkarp, None)
             } else {
                 let prestrat = Prefilter::fallback(ranker, pair, needle);
                 Searcher::twoway(needle, rabinkarp, prestrat)
             }
         }
         #[cfg(not(any(
             all(target_arch = "x86_64", target_feature = "sse2"),
             target_arch = "wasm32",
             target_arch = "aarch64"
         )))]
         {
             if prefilter.is_none() {
                 Searcher::twoway(needle, rabinkarp, None)
             } else {
                 let prestrat = Prefilter::fallback(ranker, pair, needle);
                 Searcher::twoway(needle, rabinkarp, prestrat)
             }
         }
     }

     /// Creates a new searcher that always uses the Two-Way algorithm. This is
     /// typically used when vector algorithms are unavailable or inappropriate.
     /// (For example, when the needle is "too long.")
     ///
     /// If a prefilter is given, then the searcher returned will be accelerated
     /// by the prefilter.
     #[inline]
     fn twoway(
         needle: &[u8],
         rabinkarp: rabinkarp::Finder,
         prestrat: Option<Prefilter>,
     ) -> Searcher {
         let finder = twoway::Finder::new(needle);
         match prestrat {
             None => {
                 trace!("building scalar two-way substring searcher");
                 let kind = SearcherKind { two_way: finder };
                 Searcher { call: searcher_kind_two_way, kind, rabinkarp }
             }
             Some(prestrat) => {
                 trace!(
                     "building scalar two-way \
                      substring searcher with a prefilter"
                 );
                 let two_way_with_prefilter =
                     TwoWayWithPrefilter { finder, prestrat };
                 let kind = SearcherKind { two_way_with_prefilter };
                 Searcher {
                     call: searcher_kind_two_way_with_prefilter,
                     kind,
                     rabinkarp,
                 }
             }
         }
     }

     /// Searches the given haystack for the given needle. The needle given
     /// should be the same as the needle that this finder was initialized
     /// with.
     ///
     /// Inlining this can lead to big wins for latency, and #[inline] doesn't
     /// seem to be enough in some cases.
     #[inline(always)]
     pub(crate) fn find(
         &self,
         prestate: &mut PrefilterState,
         haystack: &[u8],
         needle: &[u8],
     ) -> Option<usize> {
         if haystack.len() < needle.len() {
             None
         } else {
             // SAFETY: By construction, we've ensured that the function
             // in `self.call` is properly paired with the union used in
             // `self.kind`.
             unsafe { (self.call)(self, prestate, haystack, needle) }
         }
     }
 }

 impl core::fmt::Debug for Searcher {
     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
         f.debug_struct("Searcher")
             .field("call", &"<searcher function>")
             .field("kind", &"<searcher kind union>")
             .field("rabinkarp", &self.rabinkarp)
             .finish()
     }
 }

 /// A union indicating one of several possible substring search implementations
 /// that are in active use.
 ///
 /// This union should only be read by one of the functions prefixed with
 /// `searcher_kind_`. Namely, the correct function is meant to be paired with
 /// the union by the caller, such that the function always reads from the
 /// designated union field.
 #[derive(Clone, Copy)]
 union SearcherKind {
     empty: (),
     one_byte: u8,
     two_way: twoway::Finder,
     two_way_with_prefilter: TwoWayWithPrefilter,
     #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
     sse2: crate::arch::x86_64::sse2::packedpair::Finder,
     #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
     avx2: crate::arch::x86_64::avx2::packedpair::Finder,
     #[cfg(target_arch = "wasm32")]
     simd128: crate::arch::wasm32::simd128::packedpair::Finder,
     #[cfg(target_arch = "aarch64")]
     neon: crate::arch::aarch64::neon::packedpair::Finder,
 }

 /// A two-way substring searcher with a prefilter.
 #[derive(Copy, Clone, Debug)]
 struct TwoWayWithPrefilter {
     finder: twoway::Finder,
     prestrat: Prefilter,
 }

 /// The type of a substring search function.
 ///
 /// # Safety
 ///
 /// When using a function of this type, callers must ensure that the correct
 /// function is paired with the value populated in `SearcherKind` union.
 type SearcherKindFn = unsafe fn(
     searcher: &Searcher,
     prestate: &mut PrefilterState,
     haystack: &[u8],
     needle: &[u8],
 ) -> Option<usize>;

 /// Reads from the `empty` field of `SearcherKind` to handle the case of
 /// searching for the empty needle. Works on all platforms.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `searcher.kind.empty` union field is set.
 unsafe fn searcher_kind_empty(
     _searcher: &Searcher,
     _prestate: &mut PrefilterState,
     _haystack: &[u8],
     _needle: &[u8],
 ) -> Option<usize> {
     Some(0)
 }

 /// Reads from the `one_byte` field of `SearcherKind` to handle the case of
 /// searching for a single byte needle. Works on all platforms.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `searcher.kind.one_byte` union field is set.
 unsafe fn searcher_kind_one_byte(
     searcher: &Searcher,
     _prestate: &mut PrefilterState,
     haystack: &[u8],
     _needle: &[u8],
 ) -> Option<usize> {
     let needle = searcher.kind.one_byte;
     crate::memchr(needle, haystack)
 }

 /// Reads from the `two_way` field of `SearcherKind` to handle the case of
 /// searching for an arbitrary needle without prefilter acceleration. Works on
 /// all platforms.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `searcher.kind.two_way` union field is set.
 unsafe fn searcher_kind_two_way(
     searcher: &Searcher,
     _prestate: &mut PrefilterState,
     haystack: &[u8],
     needle: &[u8],
 ) -> Option<usize> {
     if rabinkarp::is_fast(haystack, needle) {
         searcher.rabinkarp.find(haystack, needle)
     } else {
         searcher.kind.two_way.find(haystack, needle)
     }
 }

 /// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle
 /// the case of searching for an arbitrary needle with prefilter acceleration.
 /// Works on all platforms.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union
 /// field is set.
 unsafe fn searcher_kind_two_way_with_prefilter(
     searcher: &Searcher,
     prestate: &mut PrefilterState,
     haystack: &[u8],
     needle: &[u8],
 ) -> Option<usize> {
     if rabinkarp::is_fast(haystack, needle) {
         searcher.rabinkarp.find(haystack, needle)
     } else {
         let TwoWayWithPrefilter { ref finder, ref prestrat } =
             searcher.kind.two_way_with_prefilter;
         let pre = Pre { prestate, prestrat };
         finder.find_with_prefilter(Some(pre), haystack, needle)
     }
 }

 /// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2
 /// vectorized substring search implementation.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `searcher.kind.sse2` union field is set.
 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
 unsafe fn searcher_kind_sse2(
     searcher: &Searcher,
     _prestate: &mut PrefilterState,
     haystack: &[u8],
     needle: &[u8],
 ) -> Option<usize> {
     let finder = &searcher.kind.sse2;
     if haystack.len() < finder.min_haystack_len() {
         searcher.rabinkarp.find(haystack, needle)
     } else {
         finder.find(haystack, needle)
     }
 }

 /// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2
 /// vectorized substring search implementation.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `searcher.kind.avx2` union field is set.
 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
 unsafe fn searcher_kind_avx2(
     searcher: &Searcher,
     _prestate: &mut PrefilterState,
     haystack: &[u8],
     needle: &[u8],
 ) -> Option<usize> {
     let finder = &searcher.kind.avx2;
     if haystack.len() < finder.min_haystack_len() {
         searcher.rabinkarp.find(haystack, needle)
     } else {
         finder.find(haystack, needle)
     }
 }

 /// Reads from the `simd128` field of `SearcherKind` to execute the wasm32
 /// simd128 vectorized substring search implementation.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `searcher.kind.simd128` union field is set.
 #[cfg(target_arch = "wasm32")]
 unsafe fn searcher_kind_simd128(
     searcher: &Searcher,
     _prestate: &mut PrefilterState,
     haystack: &[u8],
     needle: &[u8],
 ) -> Option<usize> {
     let finder = &searcher.kind.simd128;
     if haystack.len() < finder.min_haystack_len() {
         searcher.rabinkarp.find(haystack, needle)
     } else {
         finder.find(haystack, needle)
     }
 }

 /// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon
 /// vectorized substring search implementation.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `searcher.kind.neon` union field is set.
 #[cfg(target_arch = "aarch64")]
 unsafe fn searcher_kind_neon(
     searcher: &Searcher,
     _prestate: &mut PrefilterState,
     haystack: &[u8],
     needle: &[u8],
 ) -> Option<usize> {
     let finder = &searcher.kind.neon;
     if haystack.len() < finder.min_haystack_len() {
         searcher.rabinkarp.find(haystack, needle)
     } else {
         finder.find(haystack, needle)
     }
 }

 /// A reverse substring searcher.
 #[derive(Clone, Debug)]
 pub(crate) struct SearcherRev {
     kind: SearcherRevKind,
     rabinkarp: rabinkarp::FinderRev,
 }

 /// The kind of the reverse searcher.
 ///
 /// For the reverse case, we don't do any SIMD acceleration or prefilters.
 /// There is no specific technical reason why we don't, but rather don't do it
 /// because it's not clear it's worth the extra code to do so. If you have a
 /// use case for it, please file an issue.
 ///
 /// We also don't do the union trick as we do with the forward case and
 /// prefilters. Basically for the same reason we don't have prefilters or
 /// vector algorithms for reverse searching: it's not clear it's worth doing.
 /// Please file an issue if you have a compelling use case for fast reverse
 /// substring search.
 #[derive(Clone, Debug)]
 enum SearcherRevKind {
     Empty,
     OneByte { needle: u8 },
     TwoWay { finder: twoway::FinderRev },
 }

 impl SearcherRev {
     /// Creates a new searcher for finding occurrences of the given needle in
     /// reverse. That is, it reports the last (instead of the first) occurrence
     /// of a needle in a haystack.
     #[inline]
     pub(crate) fn new(needle: &[u8]) -> SearcherRev {
         let kind = if needle.len() <= 1 {
             if needle.is_empty() {
                 trace!("building empty reverse substring searcher");
                 SearcherRevKind::Empty
             } else {
                 trace!("building one-byte reverse substring searcher");
                 debug_assert_eq!(1, needle.len());
                 SearcherRevKind::OneByte { needle: needle[0] }
             }
         } else {
             trace!("building scalar two-way reverse substring searcher");
             let finder = twoway::FinderRev::new(needle);
             SearcherRevKind::TwoWay { finder }
         };
         let rabinkarp = rabinkarp::FinderRev::new(needle);
         SearcherRev { kind, rabinkarp }
     }

     /// Searches the given haystack for the last occurrence of the given
     /// needle. The needle given should be the same as the needle that this
     /// finder was initialized with.
     #[inline]
     pub(crate) fn rfind(
         &self,
         haystack: &[u8],
         needle: &[u8],
     ) -> Option<usize> {
         if haystack.len() < needle.len() {
             return None;
         }
         match self.kind {
             SearcherRevKind::Empty => Some(haystack.len()),
             SearcherRevKind::OneByte { needle } => {
                 crate::memrchr(needle, haystack)
             }
             SearcherRevKind::TwoWay { ref finder } => {
                 if rabinkarp::is_fast(haystack, needle) {
                     self.rabinkarp.rfind(haystack, needle)
                 } else {
                     finder.rfind(haystack, needle)
                 }
             }
         }
     }
 }

 /// Prefilter controls whether heuristics are used to accelerate searching.
 ///
 /// A prefilter refers to the idea of detecting candidate matches very quickly,
 /// and then confirming whether those candidates are full matches. This
 /// idea can be quite effective since it's often the case that looking for
 /// candidates can be a lot faster than running a complete substring search
 /// over the entire input. Namely, looking for candidates can be done with
 /// extremely fast vectorized code.
 ///
 /// The downside of a prefilter is that it assumes false positives (which are
 /// candidates generated by a prefilter that aren't matches) are somewhat rare
 /// relative to the frequency of full matches. That is, if a lot of false
 /// positives are generated, then it's possible for search time to be worse
 /// than if the prefilter wasn't enabled in the first place.
 ///
 /// Another downside of a prefilter is that it can result in highly variable
 /// performance, where some cases are extraordinarily fast and others aren't.
 /// Typically, variable performance isn't a problem, but it may be for your use
 /// case.
 ///
 /// The use of prefilters in this implementation does use a heuristic to detect
 /// when a prefilter might not be carrying its weight, and will dynamically
 /// disable its use. Nevertheless, this configuration option gives callers
 /// the ability to disable prefilters if you have knowledge that they won't be
 /// useful.
 #[derive(Clone, Copy, Debug)]
 #[non_exhaustive]
 pub enum PrefilterConfig {
     /// Never used a prefilter in substring search.
     None,
     /// Automatically detect whether a heuristic prefilter should be used. If
     /// it is used, then heuristics will be used to dynamically disable the
     /// prefilter if it is believed to not be carrying its weight.
     Auto,
 }

 impl Default for PrefilterConfig {
     fn default() -> PrefilterConfig {
         PrefilterConfig::Auto
     }
 }

 impl PrefilterConfig {
     /// Returns true when this prefilter is set to the `None` variant.
     fn is_none(&self) -> bool {
         matches!(*self, PrefilterConfig::None)
     }
 }

 /// The implementation of a prefilter.
 ///
 /// This type encapsulates dispatch to one of several possible choices for a
 /// prefilter. Generally speaking, all prefilters have the same approximate
 /// algorithm: they choose a couple of bytes from the needle that are believed
 /// to be rare, use a fast vector algorithm to look for those bytes and return
 /// positions as candidates for some substring search algorithm (currently only
 /// Two-Way) to confirm as a match or not.
 ///
 /// The differences between the algorithms are actually at the vector
 /// implementation level. Namely, we need different routines based on both
 /// which target architecture we're on and what CPU features are supported.
 ///
 /// The straight-forwardly obvious approach here is to use an enum, and make
 /// `Prefilter::find` do case analysis to determine which algorithm was
 /// selected and invoke it. However, I've observed that this leads to poor
 /// codegen in some cases, especially in latency sensitive benchmarks. That is,
 /// this approach comes with overhead that I wasn't able to eliminate.
 ///
 /// The second obvious approach is to use dynamic dispatch with traits. Doing
 /// that in this context where `Prefilter` owns the selection generally
 /// requires heap allocation, and this code is designed to run in core-only
 /// environments.
 ///
 /// So we settle on using a union (that's `PrefilterKind`) and a function
 /// pointer (that's `PrefilterKindFn`). We select the right function pointer
 /// based on which field in the union we set, and that function in turn
 /// knows which field of the union to access. The downside of this approach
 /// is that it forces us to think about safety, but the upside is that
 /// there are some nice latency improvements to benchmarks. (Especially the
 /// `memmem/sliceslice/short` benchmark.)
 ///
 /// In cases where we've selected a vector algorithm and the haystack given
 /// is too short, we fallback to the scalar version of `memchr` on the
 /// `rarest_byte`. (The scalar version of `memchr` is still better than a naive
 /// byte-at-a-time loop because it will read in `usize`-sized chunks at a
 /// time.)
 #[derive(Clone, Copy)]
 struct Prefilter {
     call: PrefilterKindFn,
     kind: PrefilterKind,
     rarest_byte: u8,
     rarest_offset: u8,
 }

 impl Prefilter {
     /// Return a "fallback" prefilter, but only if it is believed to be
     /// effective.
     #[inline]
     fn fallback<R: HeuristicFrequencyRank>(
         ranker: R,
         pair: Pair,
         needle: &[u8],
     ) -> Option<Prefilter> {
         /// The maximum frequency rank permitted for the fallback prefilter.
         /// If the rarest byte in the needle has a frequency rank above this
         /// value, then no prefilter is used if the fallback prefilter would
         /// otherwise be selected.
         const MAX_FALLBACK_RANK: u8 = 250;

         trace!("building fallback prefilter");
         let rarest_offset = pair.index1();
         let rarest_byte = needle[usize::from(rarest_offset)];
         let rarest_rank = ranker.rank(rarest_byte);
         if rarest_rank > MAX_FALLBACK_RANK {
             None
         } else {
             let finder = crate::arch::all::packedpair::Finder::with_pair(
                 needle,
                 pair.clone(),
             )?;
             let call = prefilter_kind_fallback;
             let kind = PrefilterKind { fallback: finder };
             Some(Prefilter { call, kind, rarest_byte, rarest_offset })
         }
     }

     /// Return a prefilter using a x86_64 SSE2 vector algorithm.
     #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
     #[inline]
     fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter {
         trace!("building x86_64 SSE2 prefilter");
         let rarest_offset = finder.pair().index1();
         let rarest_byte = needle[usize::from(rarest_offset)];
         Prefilter {
             call: prefilter_kind_sse2,
             kind: PrefilterKind { sse2: finder },
             rarest_byte,
             rarest_offset,
         }
     }

     /// Return a prefilter using a x86_64 AVX2 vector algorithm.
     #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
     #[inline]
     fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter {
         trace!("building x86_64 AVX2 prefilter");
         let rarest_offset = finder.pair().index1();
         let rarest_byte = needle[usize::from(rarest_offset)];
         Prefilter {
             call: prefilter_kind_avx2,
             kind: PrefilterKind { avx2: finder },
             rarest_byte,
             rarest_offset,
         }
     }

     /// Return a prefilter using a wasm32 simd128 vector algorithm.
     #[cfg(target_arch = "wasm32")]
     #[inline]
     fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter {
         trace!("building wasm32 simd128 prefilter");
         let rarest_offset = finder.pair().index1();
         let rarest_byte = needle[usize::from(rarest_offset)];
         Prefilter {
             call: prefilter_kind_simd128,
             kind: PrefilterKind { simd128: finder },
             rarest_byte,
             rarest_offset,
         }
     }

     /// Return a prefilter using a aarch64 neon vector algorithm.
     #[cfg(target_arch = "aarch64")]
     #[inline]
     fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter {
         trace!("building aarch64 neon prefilter");
         let rarest_offset = finder.pair().index1();
         let rarest_byte = needle[usize::from(rarest_offset)];
         Prefilter {
             call: prefilter_kind_neon,
             kind: PrefilterKind { neon: finder },
             rarest_byte,
             rarest_offset,
         }
     }

     /// Return a *candidate* position for a match.
     ///
     /// When this returns an offset, it implies that a match could begin at
     /// that offset, but it may not. That is, it is possible for a false
     /// positive to be returned.
     ///
     /// When `None` is returned, then it is guaranteed that there are no
     /// matches for the needle in the given haystack. That is, it is impossible
     /// for a false negative to be returned.
     ///
     /// The purpose of this routine is to look for candidate matching positions
     /// as quickly as possible before running a (likely) slower confirmation
     /// step.
     #[inline]
     fn find(&self, haystack: &[u8]) -> Option<usize> {
         // SAFETY: By construction, we've ensured that the function in
         // `self.call` is properly paired with the union used in `self.kind`.
         unsafe { (self.call)(self, haystack) }
     }

     /// A "simple" prefilter that just looks for the occurrence of the rarest
     /// byte from the needle. This is generally only used for very small
     /// haystacks.
     #[inline]
     fn find_simple(&self, haystack: &[u8]) -> Option<usize> {
         // We don't use crate::memchr here because the haystack should be small
         // enough that memchr won't be able to use vector routines anyway. So
         // we just skip straight to the fallback implementation which is likely
         // faster. (A byte-at-a-time loop is only used when the haystack is
         // smaller than `size_of::<usize>()`.)
         crate::arch::all::memchr::One::new(self.rarest_byte)
             .find(haystack)
             .map(|i| i.saturating_sub(usize::from(self.rarest_offset)))
     }
 }

 impl core::fmt::Debug for Prefilter {
     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
         f.debug_struct("Prefilter")
             .field("call", &"<prefilter function>")
             .field("kind", &"<prefilter kind union>")
             .field("rarest_byte", &self.rarest_byte)
             .field("rarest_offset", &self.rarest_offset)
             .finish()
     }
 }

 /// A union indicating one of several possible prefilters that are in active
 /// use.
 ///
 /// This union should only be read by one of the functions prefixed with
 /// `prefilter_kind_`. Namely, the correct function is meant to be paired with
 /// the union by the caller, such that the function always reads from the
 /// designated union field.
 #[derive(Clone, Copy)]
 union PrefilterKind {
     fallback: crate::arch::all::packedpair::Finder,
     #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
     sse2: crate::arch::x86_64::sse2::packedpair::Finder,
     #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
     avx2: crate::arch::x86_64::avx2::packedpair::Finder,
     #[cfg(target_arch = "wasm32")]
     simd128: crate::arch::wasm32::simd128::packedpair::Finder,
     #[cfg(target_arch = "aarch64")]
     neon: crate::arch::aarch64::neon::packedpair::Finder,
 }

 /// The type of a prefilter function.
 ///
 /// # Safety
 ///
 /// When using a function of this type, callers must ensure that the correct
 /// function is paired with the value populated in `PrefilterKind` union.
 type PrefilterKindFn =
     unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option<usize>;

 /// Reads from the `fallback` field of `PrefilterKind` to execute the fallback
 /// prefilter. Works on all platforms.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `strat.kind.fallback` union field is set.
 unsafe fn prefilter_kind_fallback(
     strat: &Prefilter,
     haystack: &[u8],
 ) -> Option<usize> {
     strat.kind.fallback.find_prefilter(haystack)
 }

 /// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2
 /// prefilter.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `strat.kind.sse2` union field is set.
 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
 unsafe fn prefilter_kind_sse2(
     strat: &Prefilter,
     haystack: &[u8],
 ) -> Option<usize> {
     let finder = &strat.kind.sse2;
     if haystack.len() < finder.min_haystack_len() {
         strat.find_simple(haystack)
     } else {
         finder.find_prefilter(haystack)
     }
 }

 /// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2
 /// prefilter.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `strat.kind.avx2` union field is set.
 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
 unsafe fn prefilter_kind_avx2(
     strat: &Prefilter,
     haystack: &[u8],
 ) -> Option<usize> {
     let finder = &strat.kind.avx2;
     if haystack.len() < finder.min_haystack_len() {
         strat.find_simple(haystack)
     } else {
         finder.find_prefilter(haystack)
     }
 }

 /// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32
 /// simd128 prefilter.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `strat.kind.simd128` union field is set.
 #[cfg(target_arch = "wasm32")]
 unsafe fn prefilter_kind_simd128(
     strat: &Prefilter,
     haystack: &[u8],
 ) -> Option<usize> {
     let finder = &strat.kind.simd128;
     if haystack.len() < finder.min_haystack_len() {
         strat.find_simple(haystack)
     } else {
         finder.find_prefilter(haystack)
     }
 }

 /// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon
 /// prefilter.
 ///
 /// # Safety
 ///
 /// Callers must ensure that the `strat.kind.neon` union field is set.
 #[cfg(target_arch = "aarch64")]
 unsafe fn prefilter_kind_neon(
     strat: &Prefilter,
     haystack: &[u8],
 ) -> Option<usize> {
     let finder = &strat.kind.neon;
     if haystack.len() < finder.min_haystack_len() {
         strat.find_simple(haystack)
     } else {
         finder.find_prefilter(haystack)
     }
 }

 /// PrefilterState tracks state associated with the effectiveness of a
 /// prefilter. It is used to track how many bytes, on average, are skipped by
 /// the prefilter. If this average dips below a certain threshold over time,
 /// then the state renders the prefilter inert and stops using it.
 ///
 /// A prefilter state should be created for each search. (Where creating an
 /// iterator is treated as a single search.) A prefilter state should only be
 /// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert
 /// `PrefilterState`.
 #[derive(Clone, Copy, Debug)]
 pub(crate) struct PrefilterState {
     /// The number of skips that has been executed. This is always 1 greater
     /// than the actual number of skips. The special sentinel value of 0
     /// indicates that the prefilter is inert. This is useful to avoid
     /// additional checks to determine whether the prefilter is still
     /// "effective." Once a prefilter becomes inert, it should no longer be
     /// used (according to our heuristics).
     skips: u32,
     /// The total number of bytes that have been skipped.
     skipped: u32,
 }

 impl PrefilterState {
     /// The minimum number of skip attempts to try before considering whether
     /// a prefilter is effective or not.
     const MIN_SKIPS: u32 = 50;

     /// The minimum amount of bytes that skipping must average.
     ///
     /// This value was chosen based on varying it and checking
     /// the microbenchmarks. In particular, this can impact the
     /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set
     /// too low.
     const MIN_SKIP_BYTES: u32 = 8;

     /// Create a fresh prefilter state.
     #[inline]
     pub(crate) fn new() -> PrefilterState {
         PrefilterState { skips: 1, skipped: 0 }
     }

     /// Update this state with the number of bytes skipped on the last
     /// invocation of the prefilter.
     #[inline]
     fn update(&mut self, skipped: usize) {
         self.skips = self.skips.saturating_add(1);
         // We need to do this dance since it's technically possible for
         // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the
         // size of a prefilter state.)
         self.skipped = match u32::try_from(skipped) {
             Err(_) => core::u32::MAX,
             Ok(skipped) => self.skipped.saturating_add(skipped),
         };
     }

     /// Return true if and only if this state indicates that a prefilter is
     /// still effective.
     #[inline]
     fn is_effective(&mut self) -> bool {
         if self.is_inert() {
             return false;
         }
         if self.skips() < PrefilterState::MIN_SKIPS {
             return true;
         }
         if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() {
             return true;
         }

         // We're inert.
         self.skips = 0;
         false
     }

     /// Returns true if the prefilter this state represents should no longer
     /// be used.
     #[inline]
     fn is_inert(&self) -> bool {
         self.skips == 0
     }

     /// Returns the total number of times the prefilter has been used.
     #[inline]
     fn skips(&self) -> u32 {
         // Remember, `0` is a sentinel value indicating inertness, so we
         // always need to subtract `1` to get our actual number of skips.
         self.skips.saturating_sub(1)
     }
 }

 /// A combination of prefilter effectiveness state and the prefilter itself.
 #[derive(Debug)]
 pub(crate) struct Pre<'a> {
     /// State that tracks the effectiveness of a prefilter.
     prestate: &'a mut PrefilterState,
     /// The actual prefilter.
     prestrat: &'a Prefilter,
 }

 impl<'a> Pre<'a> {
     /// Call this prefilter on the given haystack with the given needle.
     #[inline]
     pub(crate) fn find(&mut self, haystack: &[u8]) -> Option<usize> {
         let result = self.prestrat.find(haystack);
         self.prestate.update(result.unwrap_or(haystack.len()));
         result
     }

     /// Return true if and only if this prefilter should be used.
     #[inline]
     pub(crate) fn is_effective(&mut self) -> bool {
         self.prestate.is_effective()
     }
 }

 /// Returns true if the needle has the right characteristics for a vector
 /// algorithm to handle the entirety of substring search.
 ///
 /// Vector algorithms can be used for prefilters for other substring search
 /// algorithms (like Two-Way), but they can also be used for substring search
 /// on their own. When used for substring search, vector algorithms will
 /// quickly identify candidate match positions (just like in the prefilter
 /// case), but instead of returning the candidate position they will try to
 /// confirm the match themselves. Confirmation happens via `memcmp`. This
 /// works well for short needles, but can break down when many false candidate
 /// positions are generated for large needles. Thus, we only permit vector
 /// algorithms to own substring search when the needle is of a certain length.
 #[inline]
 fn do_packed_search(needle: &[u8]) -> bool {
     /// The minimum length of a needle required for this algorithm. The minimum
     /// is 2 since a length of 1 should just use memchr and a length of 0 isn't
     /// a case handled by this searcher.
     const MIN_LEN: usize = 2;

     /// The maximum length of a needle required for this algorithm.
     ///
     /// In reality, there is no hard max here. The code below can handle any
     /// length needle. (Perhaps that suggests there are missing optimizations.)
     /// Instead, this is a heuristic and a bound guaranteeing our linear time
     /// complexity.
     ///
     /// It is a heuristic because when a candidate match is found, memcmp is
     /// run. For very large needles with lots of false positives, memcmp can
     /// make the code run quite slow.
     ///
     /// It is a bound because the worst case behavior with memcmp is
     /// multiplicative in the size of the needle and haystack, and we want
     /// to keep that additive. This bound ensures we still meet that bound
     /// theoretically, since it's just a constant. We aren't acting in bad
     /// faith here, memcmp on tiny needles is so fast that even in pathological
     /// cases (see pathological vector benchmarks), this is still just as fast
     /// or faster in practice.
     ///
     /// This specific number was chosen by tweaking a bit and running
     /// benchmarks. The rare-medium-needle, for example, gets about 5% faster
     /// by using this algorithm instead of a prefilter-accelerated Two-Way.
     /// There's also a theoretical desire to keep this number reasonably
     /// low, to mitigate the impact of pathological cases. I did try 64, and
     /// some benchmarks got a little better, and others (particularly the
     /// pathological ones), got a lot worse. So... 32 it is?
     const MAX_LEN: usize = 32;
     MIN_LEN <= needle.len() && needle.len() <= MAX_LEN
 }