src/memchr/x86/mod.rs - platform/external/rust/crates/memchr - Git at Google

 use super::fallback;

 // We only use AVX when we can detect at runtime whether it's available, which
 // requires std.
 #[cfg(feature = "std")]
 mod avx;
 mod sse2;

 /// This macro employs a gcc-like "ifunc" trick where by upon first calling
 /// `memchr` (for example), CPU feature detection will be performed at runtime
 /// to determine the best implementation to use. After CPU feature detection
 /// is done, we replace `memchr`'s function pointer with the selection. Upon
 /// subsequent invocations, the CPU-specific routine is invoked directly, which
 /// skips the CPU feature detection and subsequent branch that's required.
 ///
 /// While this typically doesn't matter for rare occurrences or when used on
 /// larger haystacks, `memchr` can be called in tight loops where the overhead
 /// of this branch can actually add up *and is measurable*. This trick was
 /// necessary to bring this implementation up to glibc's speeds for the 'tiny'
 /// benchmarks, for example.
 ///
 /// At some point, I expect the Rust ecosystem will get a nice macro for doing
 /// exactly this, at which point, we can replace our hand-jammed version of it.
 ///
 /// N.B. The ifunc strategy does prevent function inlining of course, but
 /// on modern CPUs, you'll probably end up with the AVX2 implementation,
 /// which probably can't be inlined anyway---unless you've compiled your
 /// entire program with AVX2 enabled. However, even then, the various memchr
 /// implementations aren't exactly small, so inlining might not help anyway!
 ///
 /// # Safety
 ///
 /// Callers must ensure that fnty is function pointer type.
 #[cfg(feature = "std")]
 macro_rules! unsafe_ifunc {
     ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
         use std::{mem, sync::atomic::{AtomicPtr, Ordering}};

         type FnRaw = *mut ();

         static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);

         fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
             let fun =
                 if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
                     avx::$name as FnRaw
                 } else if cfg!(memchr_runtime_sse2) {
                     sse2::$name as FnRaw
                 } else {
                     fallback::$name as FnRaw
                 };
             FN.store(fun as FnRaw, Ordering::Relaxed);
             // SAFETY: By virtue of the caller contract, $fnty is a function
             // pointer, which is always safe to transmute with a *mut ().
             // Also, if 'fun is the AVX routine, then it is guaranteed to be
             // supported since we checked the avx2 feature.
             unsafe {
                 mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
             }
         }

         // SAFETY: By virtue of the caller contract, $fnty is a function
         // pointer, which is always safe to transmute with a *mut (). Also, if
         // 'fun is the AVX routine, then it is guaranteed to be supported since
         // we checked the avx2 feature.
         unsafe {
             let fun = FN.load(Ordering::Relaxed);
             mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
         }
     }}
 }

 /// When std isn't available to provide runtime CPU feature detection, or if
 /// runtime CPU feature detection has been explicitly disabled, then just
 /// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64
 /// targets, so no CPU feature detection is necessary.
 ///
 /// # Safety
 ///
 /// There are no safety requirements for this definition of the macro. It is
 /// safe for all inputs since it is restricted to either the fallback routine
 /// or the SSE routine, which is always safe to call on x86_64.
 #[cfg(not(feature = "std"))]
 macro_rules! unsafe_ifunc {
     ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
         if cfg!(memchr_runtime_sse2) {
             unsafe { sse2::$name($($needle),+, $haystack) }
         } else {
             fallback::$name($($needle),+, $haystack)
         }
     }}
 }

 #[inline(always)]
 pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
     unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
 }

 #[inline(always)]
 pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
     unsafe_ifunc!(
         fn(u8, u8, &[u8]) -> Option<usize>,
         memchr2,
         haystack,
         n1,
         n2
     )
 }

 #[inline(always)]
 pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
     unsafe_ifunc!(
         fn(u8, u8, u8, &[u8]) -> Option<usize>,
         memchr3,
         haystack,
         n1,
         n2,
         n3
     )
 }

 #[inline(always)]
 pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
     unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
 }

 #[inline(always)]
 pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
     unsafe_ifunc!(
         fn(u8, u8, &[u8]) -> Option<usize>,
         memrchr2,
         haystack,
         n1,
         n2
     )
 }

 #[inline(always)]
 pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
     unsafe_ifunc!(
         fn(u8, u8, u8, &[u8]) -> Option<usize>,
         memrchr3,
         haystack,
         n1,
         n2,
         n3
     )
 }
	use super::fallback;

	// We only use AVX when we can detect at runtime whether it's available, which
	// requires std.
	#[cfg(feature = "std")]
	mod avx;
	mod sse2;

	/// This macro employs a gcc-like "ifunc" trick where by upon first calling
	/// `memchr` (for example), CPU feature detection will be performed at runtime
	/// to determine the best implementation to use. After CPU feature detection
	/// is done, we replace `memchr`'s function pointer with the selection. Upon
	/// subsequent invocations, the CPU-specific routine is invoked directly, which
	/// skips the CPU feature detection and subsequent branch that's required.
	///
	/// While this typically doesn't matter for rare occurrences or when used on
	/// larger haystacks, `memchr` can be called in tight loops where the overhead
	/// of this branch can actually add up and is measurable. This trick was
	/// necessary to bring this implementation up to glibc's speeds for the 'tiny'
	/// benchmarks, for example.
	///
	/// At some point, I expect the Rust ecosystem will get a nice macro for doing
	/// exactly this, at which point, we can replace our hand-jammed version of it.
	///
	/// N.B. The ifunc strategy does prevent function inlining of course, but
	/// on modern CPUs, you'll probably end up with the AVX2 implementation,
	/// which probably can't be inlined anyway---unless you've compiled your
	/// entire program with AVX2 enabled. However, even then, the various memchr
	/// implementations aren't exactly small, so inlining might not help anyway!
	///
	/// # Safety
	///
	/// Callers must ensure that fnty is function pointer type.
	#[cfg(feature = "std")]
	macro_rules! unsafe_ifunc {
	($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
	use std::{mem, sync::atomic::{AtomicPtr, Ordering}};

	type FnRaw = *mut ();

	static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);

	fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
	let fun =
	if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
	avx::$name as FnRaw
	} else if cfg!(memchr_runtime_sse2) {
	sse2::$name as FnRaw
	} else {
	fallback::$name as FnRaw
	};
	FN.store(fun as FnRaw, Ordering::Relaxed);
	// SAFETY: By virtue of the caller contract, $fnty is a function
	// pointer, which is always safe to transmute with a *mut ().
	// Also, if 'fun is the AVX routine, then it is guaranteed to be
	// supported since we checked the avx2 feature.
	unsafe {
	mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
	}
	}

	// SAFETY: By virtue of the caller contract, $fnty is a function
	// pointer, which is always safe to transmute with a *mut (). Also, if
	// 'fun is the AVX routine, then it is guaranteed to be supported since
	// we checked the avx2 feature.
	unsafe {
	let fun = FN.load(Ordering::Relaxed);
	mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
	}
	}}
	}

	/// When std isn't available to provide runtime CPU feature detection, or if
	/// runtime CPU feature detection has been explicitly disabled, then just
	/// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64
	/// targets, so no CPU feature detection is necessary.
	///
	/// # Safety
	///
	/// There are no safety requirements for this definition of the macro. It is
	/// safe for all inputs since it is restricted to either the fallback routine
	/// or the SSE routine, which is always safe to call on x86_64.
	#[cfg(not(feature = "std"))]
	macro_rules! unsafe_ifunc {
	($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
	if cfg!(memchr_runtime_sse2) {
	unsafe { sse2::$name($($needle),+, $haystack) }
	} else {
	fallback::$name($($needle),+, $haystack)
	}
	}}
	}

	#[inline(always)]
	pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
	unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
	}

	#[inline(always)]
	pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
	unsafe_ifunc!(
	fn(u8, u8, &[u8]) -> Option<usize>,
	memchr2,
	haystack,
	n1,
	n2
	)
	}

	#[inline(always)]
	pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
	unsafe_ifunc!(
	fn(u8, u8, u8, &[u8]) -> Option<usize>,
	memchr3,
	haystack,
	n1,
	n2,
	n3
	)
	}

	#[inline(always)]
	pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
	unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
	}

	#[inline(always)]
	pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
	unsafe_ifunc!(
	fn(u8, u8, &[u8]) -> Option<usize>,
	memrchr2,
	haystack,
	n1,
	n2
	)
	}

	#[inline(always)]
	pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
	unsafe_ifunc!(
	fn(u8, u8, u8, &[u8]) -> Option<usize>,
	memrchr3,
	haystack,
	n1,
	n2,
	n3
	)
	}