| use crate::arch::all::{ |
| packedpair::{HeuristicFrequencyRank, Pair}, |
| rabinkarp, twoway, |
| }; |
| |
| #[cfg(target_arch = "aarch64")] |
| use crate::arch::aarch64::neon::packedpair as neon; |
| #[cfg(target_arch = "wasm32")] |
| use crate::arch::wasm32::simd128::packedpair as simd128; |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| use crate::arch::x86_64::{ |
| avx2::packedpair as avx2, sse2::packedpair as sse2, |
| }; |
| |
| /// A "meta" substring searcher. |
| /// |
| /// To a first approximation, this chooses what it believes to be the "best" |
| /// substring search implemnetation based on the needle at construction time. |
| /// Then, every call to `find` will execute that particular implementation. To |
| /// a second approximation, multiple substring search algorithms may be used, |
| /// depending on the haystack. For example, for supremely short haystacks, |
| /// Rabin-Karp is typically used. |
| /// |
| /// See the documentation on `Prefilter` for an explanation of the dispatching |
| /// mechanism. The quick summary is that an enum has too much overhead and |
| /// we can't use dynamic dispatch via traits because we need to work in a |
| /// core-only environment. (Dynamic dispatch works in core-only, but you |
| /// need `&dyn Trait` and we really need a `Box<dyn Trait>` here. The latter |
| /// requires `alloc`.) So instead, we use a union and an appropriately paired |
| /// free function to read from the correct field on the union and execute the |
| /// chosen substring search implementation. |
| #[derive(Clone)] |
| pub(crate) struct Searcher { |
| call: SearcherKindFn, |
| kind: SearcherKind, |
| rabinkarp: rabinkarp::Finder, |
| } |
| |
| impl Searcher { |
| /// Creates a new "meta" substring searcher that attempts to choose the |
| /// best algorithm based on the needle, heuristics and what the current |
| /// target supports. |
| #[inline] |
| pub(crate) fn new<R: HeuristicFrequencyRank>( |
| prefilter: PrefilterConfig, |
| ranker: R, |
| needle: &[u8], |
| ) -> Searcher { |
| let rabinkarp = rabinkarp::Finder::new(needle); |
| if needle.len() <= 1 { |
| return if needle.is_empty() { |
| trace!("building empty substring searcher"); |
| Searcher { |
| call: searcher_kind_empty, |
| kind: SearcherKind { empty: () }, |
| rabinkarp, |
| } |
| } else { |
| trace!("building one-byte substring searcher"); |
| debug_assert_eq!(1, needle.len()); |
| Searcher { |
| call: searcher_kind_one_byte, |
| kind: SearcherKind { one_byte: needle[0] }, |
| rabinkarp, |
| } |
| }; |
| } |
| let pair = match Pair::with_ranker(needle, &ranker) { |
| Some(pair) => pair, |
| None => return Searcher::twoway(needle, rabinkarp, None), |
| }; |
| debug_assert_ne!( |
| pair.index1(), |
| pair.index2(), |
| "pair offsets should not be equivalent" |
| ); |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| { |
| if let Some(pp) = avx2::Finder::with_pair(needle, pair) { |
| if do_packed_search(needle) { |
| trace!("building x86_64 AVX2 substring searcher"); |
| let kind = SearcherKind { avx2: pp }; |
| Searcher { call: searcher_kind_avx2, kind, rabinkarp } |
| } else if prefilter.is_none() { |
| Searcher::twoway(needle, rabinkarp, None) |
| } else { |
| let prestrat = Prefilter::avx2(pp, needle); |
| Searcher::twoway(needle, rabinkarp, Some(prestrat)) |
| } |
| } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) { |
| if do_packed_search(needle) { |
| trace!("building x86_64 SSE2 substring searcher"); |
| let kind = SearcherKind { sse2: pp }; |
| Searcher { call: searcher_kind_sse2, kind, rabinkarp } |
| } else if prefilter.is_none() { |
| Searcher::twoway(needle, rabinkarp, None) |
| } else { |
| let prestrat = Prefilter::sse2(pp, needle); |
| Searcher::twoway(needle, rabinkarp, Some(prestrat)) |
| } |
| } else if prefilter.is_none() { |
| Searcher::twoway(needle, rabinkarp, None) |
| } else { |
| // We're pretty unlikely to get to this point, but it is |
| // possible to be running on x86_64 without SSE2. Namely, it's |
| // really up to the OS whether it wants to support vector |
| // registers or not. |
| let prestrat = Prefilter::fallback(ranker, pair, needle); |
| Searcher::twoway(needle, rabinkarp, prestrat) |
| } |
| } |
| #[cfg(target_arch = "wasm32")] |
| { |
| if let Some(pp) = simd128::Finder::with_pair(needle, pair) { |
| if do_packed_search(needle) { |
| trace!("building wasm32 simd128 substring searcher"); |
| let kind = SearcherKind { simd128: pp }; |
| Searcher { call: searcher_kind_simd128, kind, rabinkarp } |
| } else if prefilter.is_none() { |
| Searcher::twoway(needle, rabinkarp, None) |
| } else { |
| let prestrat = Prefilter::simd128(pp, needle); |
| Searcher::twoway(needle, rabinkarp, Some(prestrat)) |
| } |
| } else if prefilter.is_none() { |
| Searcher::twoway(needle, rabinkarp, None) |
| } else { |
| let prestrat = Prefilter::fallback(ranker, pair, needle); |
| Searcher::twoway(needle, rabinkarp, prestrat) |
| } |
| } |
| #[cfg(target_arch = "aarch64")] |
| { |
| if let Some(pp) = neon::Finder::with_pair(needle, pair) { |
| if do_packed_search(needle) { |
| trace!("building aarch64 neon substring searcher"); |
| let kind = SearcherKind { neon: pp }; |
| Searcher { call: searcher_kind_neon, kind, rabinkarp } |
| } else if prefilter.is_none() { |
| Searcher::twoway(needle, rabinkarp, None) |
| } else { |
| let prestrat = Prefilter::neon(pp, needle); |
| Searcher::twoway(needle, rabinkarp, Some(prestrat)) |
| } |
| } else if prefilter.is_none() { |
| Searcher::twoway(needle, rabinkarp, None) |
| } else { |
| let prestrat = Prefilter::fallback(ranker, pair, needle); |
| Searcher::twoway(needle, rabinkarp, prestrat) |
| } |
| } |
| #[cfg(not(any( |
| all(target_arch = "x86_64", target_feature = "sse2"), |
| target_arch = "wasm32", |
| target_arch = "aarch64" |
| )))] |
| { |
| if prefilter.is_none() { |
| Searcher::twoway(needle, rabinkarp, None) |
| } else { |
| let prestrat = Prefilter::fallback(ranker, pair, needle); |
| Searcher::twoway(needle, rabinkarp, prestrat) |
| } |
| } |
| } |
| |
| /// Creates a new searcher that always uses the Two-Way algorithm. This is |
| /// typically used when vector algorithms are unavailable or inappropriate. |
| /// (For example, when the needle is "too long.") |
| /// |
| /// If a prefilter is given, then the searcher returned will be accelerated |
| /// by the prefilter. |
| #[inline] |
| fn twoway( |
| needle: &[u8], |
| rabinkarp: rabinkarp::Finder, |
| prestrat: Option<Prefilter>, |
| ) -> Searcher { |
| let finder = twoway::Finder::new(needle); |
| match prestrat { |
| None => { |
| trace!("building scalar two-way substring searcher"); |
| let kind = SearcherKind { two_way: finder }; |
| Searcher { call: searcher_kind_two_way, kind, rabinkarp } |
| } |
| Some(prestrat) => { |
| trace!( |
| "building scalar two-way \ |
| substring searcher with a prefilter" |
| ); |
| let two_way_with_prefilter = |
| TwoWayWithPrefilter { finder, prestrat }; |
| let kind = SearcherKind { two_way_with_prefilter }; |
| Searcher { |
| call: searcher_kind_two_way_with_prefilter, |
| kind, |
| rabinkarp, |
| } |
| } |
| } |
| } |
| |
| /// Searches the given haystack for the given needle. The needle given |
| /// should be the same as the needle that this finder was initialized |
| /// with. |
| /// |
| /// Inlining this can lead to big wins for latency, and #[inline] doesn't |
| /// seem to be enough in some cases. |
| #[inline(always)] |
| pub(crate) fn find( |
| &self, |
| prestate: &mut PrefilterState, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize> { |
| if haystack.len() < needle.len() { |
| None |
| } else { |
| // SAFETY: By construction, we've ensured that the function |
| // in `self.call` is properly paired with the union used in |
| // `self.kind`. |
| unsafe { (self.call)(self, prestate, haystack, needle) } |
| } |
| } |
| } |
| |
| impl core::fmt::Debug for Searcher { |
| fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| f.debug_struct("Searcher") |
| .field("call", &"<searcher function>") |
| .field("kind", &"<searcher kind union>") |
| .field("rabinkarp", &self.rabinkarp) |
| .finish() |
| } |
| } |
| |
| /// A union indicating one of several possible substring search implementations |
| /// that are in active use. |
| /// |
| /// This union should only be read by one of the functions prefixed with |
| /// `searcher_kind_`. Namely, the correct function is meant to be paired with |
| /// the union by the caller, such that the function always reads from the |
| /// designated union field. |
| #[derive(Clone, Copy)] |
| union SearcherKind { |
| empty: (), |
| one_byte: u8, |
| two_way: twoway::Finder, |
| two_way_with_prefilter: TwoWayWithPrefilter, |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| sse2: crate::arch::x86_64::sse2::packedpair::Finder, |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| avx2: crate::arch::x86_64::avx2::packedpair::Finder, |
| #[cfg(target_arch = "wasm32")] |
| simd128: crate::arch::wasm32::simd128::packedpair::Finder, |
| #[cfg(target_arch = "aarch64")] |
| neon: crate::arch::aarch64::neon::packedpair::Finder, |
| } |
| |
| /// A two-way substring searcher with a prefilter. |
| #[derive(Copy, Clone, Debug)] |
| struct TwoWayWithPrefilter { |
| finder: twoway::Finder, |
| prestrat: Prefilter, |
| } |
| |
| /// The type of a substring search function. |
| /// |
| /// # Safety |
| /// |
| /// When using a function of this type, callers must ensure that the correct |
| /// function is paired with the value populated in `SearcherKind` union. |
| type SearcherKindFn = unsafe fn( |
| searcher: &Searcher, |
| prestate: &mut PrefilterState, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize>; |
| |
| /// Reads from the `empty` field of `SearcherKind` to handle the case of |
| /// searching for the empty needle. Works on all platforms. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `searcher.kind.empty` union field is set. |
| unsafe fn searcher_kind_empty( |
| _searcher: &Searcher, |
| _prestate: &mut PrefilterState, |
| _haystack: &[u8], |
| _needle: &[u8], |
| ) -> Option<usize> { |
| Some(0) |
| } |
| |
| /// Reads from the `one_byte` field of `SearcherKind` to handle the case of |
| /// searching for a single byte needle. Works on all platforms. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `searcher.kind.one_byte` union field is set. |
| unsafe fn searcher_kind_one_byte( |
| searcher: &Searcher, |
| _prestate: &mut PrefilterState, |
| haystack: &[u8], |
| _needle: &[u8], |
| ) -> Option<usize> { |
| let needle = searcher.kind.one_byte; |
| crate::memchr(needle, haystack) |
| } |
| |
| /// Reads from the `two_way` field of `SearcherKind` to handle the case of |
| /// searching for an arbitrary needle without prefilter acceleration. Works on |
| /// all platforms. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `searcher.kind.two_way` union field is set. |
| unsafe fn searcher_kind_two_way( |
| searcher: &Searcher, |
| _prestate: &mut PrefilterState, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize> { |
| if rabinkarp::is_fast(haystack, needle) { |
| searcher.rabinkarp.find(haystack, needle) |
| } else { |
| searcher.kind.two_way.find(haystack, needle) |
| } |
| } |
| |
| /// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle |
| /// the case of searching for an arbitrary needle with prefilter acceleration. |
| /// Works on all platforms. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union |
| /// field is set. |
| unsafe fn searcher_kind_two_way_with_prefilter( |
| searcher: &Searcher, |
| prestate: &mut PrefilterState, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize> { |
| if rabinkarp::is_fast(haystack, needle) { |
| searcher.rabinkarp.find(haystack, needle) |
| } else { |
| let TwoWayWithPrefilter { ref finder, ref prestrat } = |
| searcher.kind.two_way_with_prefilter; |
| let pre = Pre { prestate, prestrat }; |
| finder.find_with_prefilter(Some(pre), haystack, needle) |
| } |
| } |
| |
| /// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2 |
| /// vectorized substring search implementation. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `searcher.kind.sse2` union field is set. |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| unsafe fn searcher_kind_sse2( |
| searcher: &Searcher, |
| _prestate: &mut PrefilterState, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize> { |
| let finder = &searcher.kind.sse2; |
| if haystack.len() < finder.min_haystack_len() { |
| searcher.rabinkarp.find(haystack, needle) |
| } else { |
| finder.find(haystack, needle) |
| } |
| } |
| |
| /// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2 |
| /// vectorized substring search implementation. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `searcher.kind.avx2` union field is set. |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| unsafe fn searcher_kind_avx2( |
| searcher: &Searcher, |
| _prestate: &mut PrefilterState, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize> { |
| let finder = &searcher.kind.avx2; |
| if haystack.len() < finder.min_haystack_len() { |
| searcher.rabinkarp.find(haystack, needle) |
| } else { |
| finder.find(haystack, needle) |
| } |
| } |
| |
| /// Reads from the `simd128` field of `SearcherKind` to execute the wasm32 |
| /// simd128 vectorized substring search implementation. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `searcher.kind.simd128` union field is set. |
| #[cfg(target_arch = "wasm32")] |
| unsafe fn searcher_kind_simd128( |
| searcher: &Searcher, |
| _prestate: &mut PrefilterState, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize> { |
| let finder = &searcher.kind.simd128; |
| if haystack.len() < finder.min_haystack_len() { |
| searcher.rabinkarp.find(haystack, needle) |
| } else { |
| finder.find(haystack, needle) |
| } |
| } |
| |
| /// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon |
| /// vectorized substring search implementation. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `searcher.kind.neon` union field is set. |
| #[cfg(target_arch = "aarch64")] |
| unsafe fn searcher_kind_neon( |
| searcher: &Searcher, |
| _prestate: &mut PrefilterState, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize> { |
| let finder = &searcher.kind.neon; |
| if haystack.len() < finder.min_haystack_len() { |
| searcher.rabinkarp.find(haystack, needle) |
| } else { |
| finder.find(haystack, needle) |
| } |
| } |
| |
| /// A reverse substring searcher. |
| #[derive(Clone, Debug)] |
| pub(crate) struct SearcherRev { |
| kind: SearcherRevKind, |
| rabinkarp: rabinkarp::FinderRev, |
| } |
| |
| /// The kind of the reverse searcher. |
| /// |
| /// For the reverse case, we don't do any SIMD acceleration or prefilters. |
| /// There is no specific technical reason why we don't, but rather don't do it |
| /// because it's not clear it's worth the extra code to do so. If you have a |
| /// use case for it, please file an issue. |
| /// |
| /// We also don't do the union trick as we do with the forward case and |
| /// prefilters. Basically for the same reason we don't have prefilters or |
| /// vector algorithms for reverse searching: it's not clear it's worth doing. |
| /// Please file an issue if you have a compelling use case for fast reverse |
| /// substring search. |
| #[derive(Clone, Debug)] |
| enum SearcherRevKind { |
| Empty, |
| OneByte { needle: u8 }, |
| TwoWay { finder: twoway::FinderRev }, |
| } |
| |
| impl SearcherRev { |
| /// Creates a new searcher for finding occurrences of the given needle in |
| /// reverse. That is, it reports the last (instead of the first) occurrence |
| /// of a needle in a haystack. |
| #[inline] |
| pub(crate) fn new(needle: &[u8]) -> SearcherRev { |
| let kind = if needle.len() <= 1 { |
| if needle.is_empty() { |
| trace!("building empty reverse substring searcher"); |
| SearcherRevKind::Empty |
| } else { |
| trace!("building one-byte reverse substring searcher"); |
| debug_assert_eq!(1, needle.len()); |
| SearcherRevKind::OneByte { needle: needle[0] } |
| } |
| } else { |
| trace!("building scalar two-way reverse substring searcher"); |
| let finder = twoway::FinderRev::new(needle); |
| SearcherRevKind::TwoWay { finder } |
| }; |
| let rabinkarp = rabinkarp::FinderRev::new(needle); |
| SearcherRev { kind, rabinkarp } |
| } |
| |
| /// Searches the given haystack for the last occurrence of the given |
| /// needle. The needle given should be the same as the needle that this |
| /// finder was initialized with. |
| #[inline] |
| pub(crate) fn rfind( |
| &self, |
| haystack: &[u8], |
| needle: &[u8], |
| ) -> Option<usize> { |
| if haystack.len() < needle.len() { |
| return None; |
| } |
| match self.kind { |
| SearcherRevKind::Empty => Some(haystack.len()), |
| SearcherRevKind::OneByte { needle } => { |
| crate::memrchr(needle, haystack) |
| } |
| SearcherRevKind::TwoWay { ref finder } => { |
| if rabinkarp::is_fast(haystack, needle) { |
| self.rabinkarp.rfind(haystack, needle) |
| } else { |
| finder.rfind(haystack, needle) |
| } |
| } |
| } |
| } |
| } |
| |
| /// Prefilter controls whether heuristics are used to accelerate searching. |
| /// |
| /// A prefilter refers to the idea of detecting candidate matches very quickly, |
| /// and then confirming whether those candidates are full matches. This |
| /// idea can be quite effective since it's often the case that looking for |
| /// candidates can be a lot faster than running a complete substring search |
| /// over the entire input. Namely, looking for candidates can be done with |
| /// extremely fast vectorized code. |
| /// |
| /// The downside of a prefilter is that it assumes false positives (which are |
| /// candidates generated by a prefilter that aren't matches) are somewhat rare |
| /// relative to the frequency of full matches. That is, if a lot of false |
| /// positives are generated, then it's possible for search time to be worse |
| /// than if the prefilter wasn't enabled in the first place. |
| /// |
| /// Another downside of a prefilter is that it can result in highly variable |
| /// performance, where some cases are extraordinarily fast and others aren't. |
| /// Typically, variable performance isn't a problem, but it may be for your use |
| /// case. |
| /// |
| /// The use of prefilters in this implementation does use a heuristic to detect |
| /// when a prefilter might not be carrying its weight, and will dynamically |
| /// disable its use. Nevertheless, this configuration option gives callers |
| /// the ability to disable prefilters if you have knowledge that they won't be |
| /// useful. |
| #[derive(Clone, Copy, Debug)] |
| #[non_exhaustive] |
| pub enum PrefilterConfig { |
| /// Never used a prefilter in substring search. |
| None, |
| /// Automatically detect whether a heuristic prefilter should be used. If |
| /// it is used, then heuristics will be used to dynamically disable the |
| /// prefilter if it is believed to not be carrying its weight. |
| Auto, |
| } |
| |
| impl Default for PrefilterConfig { |
| fn default() -> PrefilterConfig { |
| PrefilterConfig::Auto |
| } |
| } |
| |
| impl PrefilterConfig { |
| /// Returns true when this prefilter is set to the `None` variant. |
| fn is_none(&self) -> bool { |
| matches!(*self, PrefilterConfig::None) |
| } |
| } |
| |
| /// The implementation of a prefilter. |
| /// |
| /// This type encapsulates dispatch to one of several possible choices for a |
| /// prefilter. Generally speaking, all prefilters have the same approximate |
| /// algorithm: they choose a couple of bytes from the needle that are believed |
| /// to be rare, use a fast vector algorithm to look for those bytes and return |
| /// positions as candidates for some substring search algorithm (currently only |
| /// Two-Way) to confirm as a match or not. |
| /// |
| /// The differences between the algorithms are actually at the vector |
| /// implementation level. Namely, we need different routines based on both |
| /// which target architecture we're on and what CPU features are supported. |
| /// |
| /// The straight-forwardly obvious approach here is to use an enum, and make |
| /// `Prefilter::find` do case analysis to determine which algorithm was |
| /// selected and invoke it. However, I've observed that this leads to poor |
| /// codegen in some cases, especially in latency sensitive benchmarks. That is, |
| /// this approach comes with overhead that I wasn't able to eliminate. |
| /// |
| /// The second obvious approach is to use dynamic dispatch with traits. Doing |
| /// that in this context where `Prefilter` owns the selection generally |
| /// requires heap allocation, and this code is designed to run in core-only |
| /// environments. |
| /// |
| /// So we settle on using a union (that's `PrefilterKind`) and a function |
| /// pointer (that's `PrefilterKindFn`). We select the right function pointer |
| /// based on which field in the union we set, and that function in turn |
| /// knows which field of the union to access. The downside of this approach |
| /// is that it forces us to think about safety, but the upside is that |
| /// there are some nice latency improvements to benchmarks. (Especially the |
| /// `memmem/sliceslice/short` benchmark.) |
| /// |
| /// In cases where we've selected a vector algorithm and the haystack given |
| /// is too short, we fallback to the scalar version of `memchr` on the |
| /// `rarest_byte`. (The scalar version of `memchr` is still better than a naive |
| /// byte-at-a-time loop because it will read in `usize`-sized chunks at a |
| /// time.) |
| #[derive(Clone, Copy)] |
| struct Prefilter { |
| call: PrefilterKindFn, |
| kind: PrefilterKind, |
| rarest_byte: u8, |
| rarest_offset: u8, |
| } |
| |
| impl Prefilter { |
| /// Return a "fallback" prefilter, but only if it is believed to be |
| /// effective. |
| #[inline] |
| fn fallback<R: HeuristicFrequencyRank>( |
| ranker: R, |
| pair: Pair, |
| needle: &[u8], |
| ) -> Option<Prefilter> { |
| /// The maximum frequency rank permitted for the fallback prefilter. |
| /// If the rarest byte in the needle has a frequency rank above this |
| /// value, then no prefilter is used if the fallback prefilter would |
| /// otherwise be selected. |
| const MAX_FALLBACK_RANK: u8 = 250; |
| |
| trace!("building fallback prefilter"); |
| let rarest_offset = pair.index1(); |
| let rarest_byte = needle[usize::from(rarest_offset)]; |
| let rarest_rank = ranker.rank(rarest_byte); |
| if rarest_rank > MAX_FALLBACK_RANK { |
| None |
| } else { |
| let finder = crate::arch::all::packedpair::Finder::with_pair( |
| needle, |
| pair.clone(), |
| )?; |
| let call = prefilter_kind_fallback; |
| let kind = PrefilterKind { fallback: finder }; |
| Some(Prefilter { call, kind, rarest_byte, rarest_offset }) |
| } |
| } |
| |
| /// Return a prefilter using a x86_64 SSE2 vector algorithm. |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| #[inline] |
| fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter { |
| trace!("building x86_64 SSE2 prefilter"); |
| let rarest_offset = finder.pair().index1(); |
| let rarest_byte = needle[usize::from(rarest_offset)]; |
| Prefilter { |
| call: prefilter_kind_sse2, |
| kind: PrefilterKind { sse2: finder }, |
| rarest_byte, |
| rarest_offset, |
| } |
| } |
| |
| /// Return a prefilter using a x86_64 AVX2 vector algorithm. |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| #[inline] |
| fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter { |
| trace!("building x86_64 AVX2 prefilter"); |
| let rarest_offset = finder.pair().index1(); |
| let rarest_byte = needle[usize::from(rarest_offset)]; |
| Prefilter { |
| call: prefilter_kind_avx2, |
| kind: PrefilterKind { avx2: finder }, |
| rarest_byte, |
| rarest_offset, |
| } |
| } |
| |
| /// Return a prefilter using a wasm32 simd128 vector algorithm. |
| #[cfg(target_arch = "wasm32")] |
| #[inline] |
| fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter { |
| trace!("building wasm32 simd128 prefilter"); |
| let rarest_offset = finder.pair().index1(); |
| let rarest_byte = needle[usize::from(rarest_offset)]; |
| Prefilter { |
| call: prefilter_kind_simd128, |
| kind: PrefilterKind { simd128: finder }, |
| rarest_byte, |
| rarest_offset, |
| } |
| } |
| |
| /// Return a prefilter using a aarch64 neon vector algorithm. |
| #[cfg(target_arch = "aarch64")] |
| #[inline] |
| fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter { |
| trace!("building aarch64 neon prefilter"); |
| let rarest_offset = finder.pair().index1(); |
| let rarest_byte = needle[usize::from(rarest_offset)]; |
| Prefilter { |
| call: prefilter_kind_neon, |
| kind: PrefilterKind { neon: finder }, |
| rarest_byte, |
| rarest_offset, |
| } |
| } |
| |
| /// Return a *candidate* position for a match. |
| /// |
| /// When this returns an offset, it implies that a match could begin at |
| /// that offset, but it may not. That is, it is possible for a false |
| /// positive to be returned. |
| /// |
| /// When `None` is returned, then it is guaranteed that there are no |
| /// matches for the needle in the given haystack. That is, it is impossible |
| /// for a false negative to be returned. |
| /// |
| /// The purpose of this routine is to look for candidate matching positions |
| /// as quickly as possible before running a (likely) slower confirmation |
| /// step. |
| #[inline] |
| fn find(&self, haystack: &[u8]) -> Option<usize> { |
| // SAFETY: By construction, we've ensured that the function in |
| // `self.call` is properly paired with the union used in `self.kind`. |
| unsafe { (self.call)(self, haystack) } |
| } |
| |
| /// A "simple" prefilter that just looks for the occurrence of the rarest |
| /// byte from the needle. This is generally only used for very small |
| /// haystacks. |
| #[inline] |
| fn find_simple(&self, haystack: &[u8]) -> Option<usize> { |
| // We don't use crate::memchr here because the haystack should be small |
| // enough that memchr won't be able to use vector routines anyway. So |
| // we just skip straight to the fallback implementation which is likely |
| // faster. (A byte-at-a-time loop is only used when the haystack is |
| // smaller than `size_of::<usize>()`.) |
| crate::arch::all::memchr::One::new(self.rarest_byte) |
| .find(haystack) |
| .map(|i| i.saturating_sub(usize::from(self.rarest_offset))) |
| } |
| } |
| |
| impl core::fmt::Debug for Prefilter { |
| fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| f.debug_struct("Prefilter") |
| .field("call", &"<prefilter function>") |
| .field("kind", &"<prefilter kind union>") |
| .field("rarest_byte", &self.rarest_byte) |
| .field("rarest_offset", &self.rarest_offset) |
| .finish() |
| } |
| } |
| |
| /// A union indicating one of several possible prefilters that are in active |
| /// use. |
| /// |
| /// This union should only be read by one of the functions prefixed with |
| /// `prefilter_kind_`. Namely, the correct function is meant to be paired with |
| /// the union by the caller, such that the function always reads from the |
| /// designated union field. |
| #[derive(Clone, Copy)] |
| union PrefilterKind { |
| fallback: crate::arch::all::packedpair::Finder, |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| sse2: crate::arch::x86_64::sse2::packedpair::Finder, |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| avx2: crate::arch::x86_64::avx2::packedpair::Finder, |
| #[cfg(target_arch = "wasm32")] |
| simd128: crate::arch::wasm32::simd128::packedpair::Finder, |
| #[cfg(target_arch = "aarch64")] |
| neon: crate::arch::aarch64::neon::packedpair::Finder, |
| } |
| |
| /// The type of a prefilter function. |
| /// |
| /// # Safety |
| /// |
| /// When using a function of this type, callers must ensure that the correct |
| /// function is paired with the value populated in `PrefilterKind` union. |
| type PrefilterKindFn = |
| unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option<usize>; |
| |
| /// Reads from the `fallback` field of `PrefilterKind` to execute the fallback |
| /// prefilter. Works on all platforms. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `strat.kind.fallback` union field is set. |
| unsafe fn prefilter_kind_fallback( |
| strat: &Prefilter, |
| haystack: &[u8], |
| ) -> Option<usize> { |
| strat.kind.fallback.find_prefilter(haystack) |
| } |
| |
| /// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2 |
| /// prefilter. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `strat.kind.sse2` union field is set. |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| unsafe fn prefilter_kind_sse2( |
| strat: &Prefilter, |
| haystack: &[u8], |
| ) -> Option<usize> { |
| let finder = &strat.kind.sse2; |
| if haystack.len() < finder.min_haystack_len() { |
| strat.find_simple(haystack) |
| } else { |
| finder.find_prefilter(haystack) |
| } |
| } |
| |
| /// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2 |
| /// prefilter. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `strat.kind.avx2` union field is set. |
| #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] |
| unsafe fn prefilter_kind_avx2( |
| strat: &Prefilter, |
| haystack: &[u8], |
| ) -> Option<usize> { |
| let finder = &strat.kind.avx2; |
| if haystack.len() < finder.min_haystack_len() { |
| strat.find_simple(haystack) |
| } else { |
| finder.find_prefilter(haystack) |
| } |
| } |
| |
| /// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32 |
| /// simd128 prefilter. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `strat.kind.simd128` union field is set. |
| #[cfg(target_arch = "wasm32")] |
| unsafe fn prefilter_kind_simd128( |
| strat: &Prefilter, |
| haystack: &[u8], |
| ) -> Option<usize> { |
| let finder = &strat.kind.simd128; |
| if haystack.len() < finder.min_haystack_len() { |
| strat.find_simple(haystack) |
| } else { |
| finder.find_prefilter(haystack) |
| } |
| } |
| |
| /// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon |
| /// prefilter. |
| /// |
| /// # Safety |
| /// |
| /// Callers must ensure that the `strat.kind.neon` union field is set. |
| #[cfg(target_arch = "aarch64")] |
| unsafe fn prefilter_kind_neon( |
| strat: &Prefilter, |
| haystack: &[u8], |
| ) -> Option<usize> { |
| let finder = &strat.kind.neon; |
| if haystack.len() < finder.min_haystack_len() { |
| strat.find_simple(haystack) |
| } else { |
| finder.find_prefilter(haystack) |
| } |
| } |
| |
| /// PrefilterState tracks state associated with the effectiveness of a |
| /// prefilter. It is used to track how many bytes, on average, are skipped by |
| /// the prefilter. If this average dips below a certain threshold over time, |
| /// then the state renders the prefilter inert and stops using it. |
| /// |
| /// A prefilter state should be created for each search. (Where creating an |
| /// iterator is treated as a single search.) A prefilter state should only be |
| /// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert |
| /// `PrefilterState`. |
| #[derive(Clone, Copy, Debug)] |
| pub(crate) struct PrefilterState { |
| /// The number of skips that has been executed. This is always 1 greater |
| /// than the actual number of skips. The special sentinel value of 0 |
| /// indicates that the prefilter is inert. This is useful to avoid |
| /// additional checks to determine whether the prefilter is still |
| /// "effective." Once a prefilter becomes inert, it should no longer be |
| /// used (according to our heuristics). |
| skips: u32, |
| /// The total number of bytes that have been skipped. |
| skipped: u32, |
| } |
| |
| impl PrefilterState { |
| /// The minimum number of skip attempts to try before considering whether |
| /// a prefilter is effective or not. |
| const MIN_SKIPS: u32 = 50; |
| |
| /// The minimum amount of bytes that skipping must average. |
| /// |
| /// This value was chosen based on varying it and checking |
| /// the microbenchmarks. In particular, this can impact the |
| /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set |
| /// too low. |
| const MIN_SKIP_BYTES: u32 = 8; |
| |
| /// Create a fresh prefilter state. |
| #[inline] |
| pub(crate) fn new() -> PrefilterState { |
| PrefilterState { skips: 1, skipped: 0 } |
| } |
| |
| /// Update this state with the number of bytes skipped on the last |
| /// invocation of the prefilter. |
| #[inline] |
| fn update(&mut self, skipped: usize) { |
| self.skips = self.skips.saturating_add(1); |
| // We need to do this dance since it's technically possible for |
| // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the |
| // size of a prefilter state.) |
| self.skipped = match u32::try_from(skipped) { |
| Err(_) => core::u32::MAX, |
| Ok(skipped) => self.skipped.saturating_add(skipped), |
| }; |
| } |
| |
| /// Return true if and only if this state indicates that a prefilter is |
| /// still effective. |
| #[inline] |
| fn is_effective(&mut self) -> bool { |
| if self.is_inert() { |
| return false; |
| } |
| if self.skips() < PrefilterState::MIN_SKIPS { |
| return true; |
| } |
| if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { |
| return true; |
| } |
| |
| // We're inert. |
| self.skips = 0; |
| false |
| } |
| |
| /// Returns true if the prefilter this state represents should no longer |
| /// be used. |
| #[inline] |
| fn is_inert(&self) -> bool { |
| self.skips == 0 |
| } |
| |
| /// Returns the total number of times the prefilter has been used. |
| #[inline] |
| fn skips(&self) -> u32 { |
| // Remember, `0` is a sentinel value indicating inertness, so we |
| // always need to subtract `1` to get our actual number of skips. |
| self.skips.saturating_sub(1) |
| } |
| } |
| |
| /// A combination of prefilter effectiveness state and the prefilter itself. |
| #[derive(Debug)] |
| pub(crate) struct Pre<'a> { |
| /// State that tracks the effectiveness of a prefilter. |
| prestate: &'a mut PrefilterState, |
| /// The actual prefilter. |
| prestrat: &'a Prefilter, |
| } |
| |
| impl<'a> Pre<'a> { |
| /// Call this prefilter on the given haystack with the given needle. |
| #[inline] |
| pub(crate) fn find(&mut self, haystack: &[u8]) -> Option<usize> { |
| let result = self.prestrat.find(haystack); |
| self.prestate.update(result.unwrap_or(haystack.len())); |
| result |
| } |
| |
| /// Return true if and only if this prefilter should be used. |
| #[inline] |
| pub(crate) fn is_effective(&mut self) -> bool { |
| self.prestate.is_effective() |
| } |
| } |
| |
| /// Returns true if the needle has the right characteristics for a vector |
| /// algorithm to handle the entirety of substring search. |
| /// |
| /// Vector algorithms can be used for prefilters for other substring search |
| /// algorithms (like Two-Way), but they can also be used for substring search |
| /// on their own. When used for substring search, vector algorithms will |
| /// quickly identify candidate match positions (just like in the prefilter |
| /// case), but instead of returning the candidate position they will try to |
| /// confirm the match themselves. Confirmation happens via `memcmp`. This |
| /// works well for short needles, but can break down when many false candidate |
| /// positions are generated for large needles. Thus, we only permit vector |
| /// algorithms to own substring search when the needle is of a certain length. |
| #[inline] |
| fn do_packed_search(needle: &[u8]) -> bool { |
| /// The minimum length of a needle required for this algorithm. The minimum |
| /// is 2 since a length of 1 should just use memchr and a length of 0 isn't |
| /// a case handled by this searcher. |
| const MIN_LEN: usize = 2; |
| |
| /// The maximum length of a needle required for this algorithm. |
| /// |
| /// In reality, there is no hard max here. The code below can handle any |
| /// length needle. (Perhaps that suggests there are missing optimizations.) |
| /// Instead, this is a heuristic and a bound guaranteeing our linear time |
| /// complexity. |
| /// |
| /// It is a heuristic because when a candidate match is found, memcmp is |
| /// run. For very large needles with lots of false positives, memcmp can |
| /// make the code run quite slow. |
| /// |
| /// It is a bound because the worst case behavior with memcmp is |
| /// multiplicative in the size of the needle and haystack, and we want |
| /// to keep that additive. This bound ensures we still meet that bound |
| /// theoretically, since it's just a constant. We aren't acting in bad |
| /// faith here, memcmp on tiny needles is so fast that even in pathological |
| /// cases (see pathological vector benchmarks), this is still just as fast |
| /// or faster in practice. |
| /// |
| /// This specific number was chosen by tweaking a bit and running |
| /// benchmarks. The rare-medium-needle, for example, gets about 5% faster |
| /// by using this algorithm instead of a prefilter-accelerated Two-Way. |
| /// There's also a theoretical desire to keep this number reasonably |
| /// low, to mitigate the impact of pathological cases. I did try 64, and |
| /// some benchmarks got a little better, and others (particularly the |
| /// pathological ones), got a lot worse. So... 32 it is? |
| const MAX_LEN: usize = 32; |
| MIN_LEN <= needle.len() && needle.len() <= MAX_LEN |
| } |