vendor/criterion/src/stats/univariate/kde/mod.rs - toolchain/rustc - Git at Google

 //! Kernel density estimation

 pub mod kernel;

 use self::kernel::Kernel;
 use crate::stats::float::Float;
 use crate::stats::univariate::Sample;
 #[cfg(feature = "rayon")]
 use rayon::prelude::*;

 /// Univariate kernel density estimator
 pub struct Kde<'a, A, K>
 where
     A: Float,
     K: Kernel<A>,
 {
     bandwidth: A,
     kernel: K,
     sample: &'a Sample<A>,
 }

 impl<'a, A, K> Kde<'a, A, K>
 where
     A: 'a + Float,
     K: Kernel<A>,
 {
     /// Creates a new kernel density estimator from the `sample`, using a kernel and estimating
     /// the bandwidth using the method `bw`
     pub fn new(sample: &'a Sample<A>, kernel: K, bw: Bandwidth) -> Kde<'a, A, K> {
         Kde {
             bandwidth: bw.estimate(sample),
             kernel,
             sample,
         }
     }

     /// Returns the bandwidth used by the estimator
     pub fn bandwidth(&self) -> A {
         self.bandwidth
     }

     /// Maps the KDE over `xs`
     ///
     /// - Multihreaded
     pub fn map(&self, xs: &[A]) -> Box<[A]> {
         #[cfg(feature = "rayon")]
         let iter = xs.par_iter();

         #[cfg(not(feature = "rayon"))]
         let iter = xs.iter();

         iter.map(|&x| self.estimate(x))
             .collect::<Vec<_>>()
             .into_boxed_slice()
     }

     /// Estimates the probability density of `x`
     pub fn estimate(&self, x: A) -> A {
         let _0 = A::cast(0);
         let slice = self.sample;
         let h = self.bandwidth;
         let n = A::cast(slice.len());

         let sum = slice
             .iter()
             .fold(_0, |acc, &x_i| acc + self.kernel.evaluate((x - x_i) / h));

         sum / (h * n)
     }
 }

 /// Method to estimate the bandwidth
 pub enum Bandwidth {
     /// Use Silverman's rule of thumb to estimate the bandwidth from the sample
     Silverman,
 }

 impl Bandwidth {
     fn estimate<A: Float>(self, sample: &Sample<A>) -> A {
         match self {
             Bandwidth::Silverman => {
                 let factor = A::cast(4. / 3.);
                 let exponent = A::cast(1. / 5.);
                 let n = A::cast(sample.len());
                 let sigma = sample.std_dev(None);

                 sigma * (factor / n).powf(exponent)
             }
         }
     }
 }

 #[cfg(test)]
 macro_rules! test {
     ($ty:ident) => {
         mod $ty {
             use approx::relative_eq;
             use quickcheck::quickcheck;
             use quickcheck::TestResult;

             use crate::stats::univariate::kde::kernel::Gaussian;
             use crate::stats::univariate::kde::{Bandwidth, Kde};
             use crate::stats::univariate::Sample;

             // The [-inf inf] integral of the estimated PDF should be one
             quickcheck! {
                 fn integral(size: u8, start: u8) -> TestResult {
                     let size = size as usize;
                     let start = start as usize;
                     const DX: $ty = 1e-3;

                     if let Some(v) = crate::stats::test::vec::<$ty>(size, start) {
                         let slice = &v[start..];
                         let data = Sample::new(slice);
                         let kde = Kde::new(data, Gaussian, Bandwidth::Silverman);
                         let h = kde.bandwidth();
                         // NB Obviously a [-inf inf] integral is not feasible, but this range works
                         // quite well
                         let (a, b) = (data.min() - 5. * h, data.max() + 5. * h);

                         let mut acc = 0.;
                         let mut x = a;
                         let mut y = kde.estimate(a);

                         while x < b {
                             acc += DX * y / 2.;

                             x += DX;
                             y = kde.estimate(x);

                             acc += DX * y / 2.;
                         }

                         TestResult::from_bool(relative_eq!(acc, 1., epsilon = 2e-5))
                     } else {
                         TestResult::discard()
                     }
                 }
             }
         }
     };
 }

 #[cfg(test)]
 mod test {
     test!(f32);
     test!(f64);
 }
	//! Kernel density estimation

	pub mod kernel;

	use self::kernel::Kernel;
	use crate::stats::float::Float;
	use crate::stats::univariate::Sample;
	#[cfg(feature = "rayon")]
	use rayon::prelude::*;

	/// Univariate kernel density estimator
	pub struct Kde<'a, A, K>
	where
	A: Float,
	K: Kernel<A>,
	{
	bandwidth: A,
	kernel: K,
	sample: &'a Sample<A>,
	}

	impl<'a, A, K> Kde<'a, A, K>
	where
	A: 'a + Float,
	K: Kernel<A>,
	{
	/// Creates a new kernel density estimator from the `sample`, using a kernel and estimating
	/// the bandwidth using the method `bw`
	pub fn new(sample: &'a Sample<A>, kernel: K, bw: Bandwidth) -> Kde<'a, A, K> {
	Kde {
	bandwidth: bw.estimate(sample),
	kernel,
	sample,
	}
	}

	/// Returns the bandwidth used by the estimator
	pub fn bandwidth(&self) -> A {
	self.bandwidth
	}

	/// Maps the KDE over `xs`
	///
	/// - Multihreaded
	pub fn map(&self, xs: &[A]) -> Box<[A]> {
	#[cfg(feature = "rayon")]
	let iter = xs.par_iter();

	#[cfg(not(feature = "rayon"))]
	let iter = xs.iter();

	iter.map(\|&x\| self.estimate(x))
	.collect::<Vec<_>>()
	.into_boxed_slice()
	}

	/// Estimates the probability density of `x`
	pub fn estimate(&self, x: A) -> A {
	let _0 = A::cast(0);
	let slice = self.sample;
	let h = self.bandwidth;
	let n = A::cast(slice.len());

	let sum = slice
	.iter()
	.fold(_0, \|acc, &x_i\| acc + self.kernel.evaluate((x - x_i) / h));

	sum / (h * n)
	}
	}

	/// Method to estimate the bandwidth
	pub enum Bandwidth {
	/// Use Silverman's rule of thumb to estimate the bandwidth from the sample
	Silverman,
	}

	impl Bandwidth {
	fn estimate<A: Float>(self, sample: &Sample<A>) -> A {
	match self {
	Bandwidth::Silverman => {
	let factor = A::cast(4. / 3.);
	let exponent = A::cast(1. / 5.);
	let n = A::cast(sample.len());
	let sigma = sample.std_dev(None);

	sigma * (factor / n).powf(exponent)
	}
	}
	}
	}

	#[cfg(test)]
	macro_rules! test {
	($ty:ident) => {
	mod $ty {
	use approx::relative_eq;
	use quickcheck::quickcheck;
	use quickcheck::TestResult;

	use crate::stats::univariate::kde::kernel::Gaussian;
	use crate::stats::univariate::kde::{Bandwidth, Kde};
	use crate::stats::univariate::Sample;

	// The [-inf inf] integral of the estimated PDF should be one
	quickcheck! {
	fn integral(size: u8, start: u8) -> TestResult {
	let size = size as usize;
	let start = start as usize;
	const DX: $ty = 1e-3;

	if let Some(v) = crate::stats::test::vec::<$ty>(size, start) {
	let slice = &v[start..];
	let data = Sample::new(slice);
	let kde = Kde::new(data, Gaussian, Bandwidth::Silverman);
	let h = kde.bandwidth();
	// NB Obviously a [-inf inf] integral is not feasible, but this range works
	// quite well
	let (a, b) = (data.min() - 5. * h, data.max() + 5. * h);

	let mut acc = 0.;
	let mut x = a;
	let mut y = kde.estimate(a);

	while x < b {
	acc += DX * y / 2.;

	x += DX;
	y = kde.estimate(x);

	acc += DX * y / 2.;
	}

	TestResult::from_bool(relative_eq!(acc, 1., epsilon = 2e-5))
	} else {
	TestResult::discard()
	}
	}
	}
	}
	};
	}

	#[cfg(test)]
	mod test {
	test!(f32);
	test!(f64);
	}