vendor/criterion/src/stats/univariate/outliers/tukey.rs - toolchain/rustc - Git at Google

 //! Tukey's method
 //!
 //! The original method uses two "fences" to classify the data. All the observations "inside" the
 //! fences are considered "normal", and the rest are considered outliers.
 //!
 //! The fences are computed from the quartiles of the sample, according to the following formula:
 //!
 //! ``` ignore
 //! // q1, q3 are the first and third quartiles
 //! let iqr = q3 - q1;  // The interquartile range
 //! let (f1, f2) = (q1 - 1.5 * iqr, q3 + 1.5 * iqr);  // the "fences"
 //!
 //! let is_outlier = |x| if x > f1 && x < f2 { true } else { false };
 //! ```
 //!
 //! The classifier provided here adds two extra outer fences:
 //!
 //! ``` ignore
 //! let (f3, f4) = (q1 - 3 * iqr, q3 + 3 * iqr);  // the outer "fences"
 //! ```
 //!
 //! The extra fences add a sense of "severity" to the classification. Data points outside of the
 //! outer fences are considered "severe" outliers, whereas points outside the inner fences are just
 //! "mild" outliers, and, as the original method, everything inside the inner fences is considered
 //! "normal" data.
 //!
 //! Some ASCII art for the visually oriented people:
 //!
 //! ``` ignore
 //!          LOW-ish                NORMAL-ish                 HIGH-ish
 //!         x   |       +    |  o o  o    o   o o  o  |        +   |   x
 //!             f3           f1                       f2           f4
 //!
 //! Legend:
 //! o: "normal" data (not an outlier)
 //! +: "mild" outlier
 //! x: "severe" outlier
 //! ```

 use std::iter::IntoIterator;
 use std::ops::{Deref, Index};
 use std::slice;

 use crate::stats::float::Float;
 use crate::stats::univariate::Sample;

 use self::Label::*;

 /// A classified/labeled sample.
 ///
 /// The labeled data can be accessed using the indexing operator. The order of the data points is
 /// retained.
 ///
 /// NOTE: Due to limitations in the indexing traits, only the label is returned. Once the
 /// `IndexGet` trait lands in stdlib, the indexing operation will return a `(data_point, label)`
 /// pair.
 #[derive(Clone, Copy)]
 pub struct LabeledSample<'a, A>
 where
     A: Float,
 {
     fences: (A, A, A, A),
     sample: &'a Sample<A>,
 }

 impl<'a, A> LabeledSample<'a, A>
 where
     A: Float,
 {
     /// Returns the number of data points per label
     ///
     /// - Time: `O(length)`
     #[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
     pub fn count(&self) -> (usize, usize, usize, usize, usize) {
         let (mut los, mut lom, mut noa, mut him, mut his) = (0, 0, 0, 0, 0);

         for (_, label) in self {
             match label {
                 LowSevere => {
                     los += 1;
                 }
                 LowMild => {
                     lom += 1;
                 }
                 NotAnOutlier => {
                     noa += 1;
                 }
                 HighMild => {
                     him += 1;
                 }
                 HighSevere => {
                     his += 1;
                 }
             }
         }

         (los, lom, noa, him, his)
     }

     /// Returns the fences used to classify the outliers
     pub fn fences(&self) -> (A, A, A, A) {
         self.fences
     }

     /// Returns an iterator over the labeled data
     pub fn iter(&self) -> Iter<'a, A> {
         Iter {
             fences: self.fences,
             iter: self.sample.iter(),
         }
     }
 }

 impl<'a, A> Deref for LabeledSample<'a, A>
 where
     A: Float,
 {
     type Target = Sample<A>;

     fn deref(&self) -> &Sample<A> {
         self.sample
     }
 }

 // FIXME Use the `IndexGet` trait
 impl<'a, A> Index<usize> for LabeledSample<'a, A>
 where
     A: Float,
 {
     type Output = Label;

     #[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
     fn index(&self, i: usize) -> &Label {
         static LOW_SEVERE: Label = LowSevere;
         static LOW_MILD: Label = LowMild;
         static HIGH_MILD: Label = HighMild;
         static HIGH_SEVERE: Label = HighSevere;
         static NOT_AN_OUTLIER: Label = NotAnOutlier;

         let x = self.sample[i];
         let (lost, lomt, himt, hist) = self.fences;

         if x < lost {
             &LOW_SEVERE
         } else if x > hist {
             &HIGH_SEVERE
         } else if x < lomt {
             &LOW_MILD
         } else if x > himt {
             &HIGH_MILD
         } else {
             &NOT_AN_OUTLIER
         }
     }
 }

 impl<'a, 'b, A> IntoIterator for &'b LabeledSample<'a, A>
 where
     A: Float,
 {
     type Item = (A, Label);
     type IntoIter = Iter<'a, A>;

     fn into_iter(self) -> Iter<'a, A> {
         self.iter()
     }
 }

 /// Iterator over the labeled data
 pub struct Iter<'a, A>
 where
     A: Float,
 {
     fences: (A, A, A, A),
     iter: slice::Iter<'a, A>,
 }

 impl<'a, A> Iterator for Iter<'a, A>
 where
     A: Float,
 {
     type Item = (A, Label);

     #[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
     fn next(&mut self) -> Option<(A, Label)> {
         self.iter.next().map(|&x| {
             let (lost, lomt, himt, hist) = self.fences;

             let label = if x < lost {
                 LowSevere
             } else if x > hist {
                 HighSevere
             } else if x < lomt {
                 LowMild
             } else if x > himt {
                 HighMild
             } else {
                 NotAnOutlier
             };

             (x, label)
         })
     }

     fn size_hint(&self) -> (usize, Option<usize>) {
         self.iter.size_hint()
     }
 }

 /// Labels used to classify outliers
 pub enum Label {
     /// A "mild" outlier in the "high" spectrum
     HighMild,
     /// A "severe" outlier in the "high" spectrum
     HighSevere,
     /// A "mild" outlier in the "low" spectrum
     LowMild,
     /// A "severe" outlier in the "low" spectrum
     LowSevere,
     /// A normal data point
     NotAnOutlier,
 }

 impl Label {
     /// Checks if the data point has an "unusually" high value
     pub fn is_high(&self) -> bool {
         matches!(*self, HighMild | HighSevere)
     }

     /// Checks if the data point is labeled as a "mild" outlier
     pub fn is_mild(&self) -> bool {
         matches!(*self, HighMild | LowMild)
     }

     /// Checks if the data point has an "unusually" low value
     pub fn is_low(&self) -> bool {
         matches!(*self, LowMild | LowSevere)
     }

     /// Checks if the data point is labeled as an outlier
     pub fn is_outlier(&self) -> bool {
         matches!(*self, NotAnOutlier)
     }

     /// Checks if the data point is labeled as a "severe" outlier
     pub fn is_severe(&self) -> bool {
         matches!(*self, HighSevere | LowSevere)
     }
 }

 /// Classifies the sample, and returns a labeled sample.
 ///
 /// - Time: `O(N log N) where N = length`
 pub fn classify<A>(sample: &Sample<A>) -> LabeledSample<'_, A>
 where
     A: Float,
     usize: cast::From<A, Output = Result<usize, cast::Error>>,
 {
     let (q1, _, q3) = sample.percentiles().quartiles();
     let iqr = q3 - q1;

     // Mild
     let k_m = A::cast(1.5_f32);
     // Severe
     let k_s = A::cast(3);

     LabeledSample {
         fences: (
             q1 - k_s * iqr,
             q1 - k_m * iqr,
             q3 + k_m * iqr,
             q3 + k_s * iqr,
         ),
         sample,
     }
 }
	//! Tukey's method
	//!
	//! The original method uses two "fences" to classify the data. All the observations "inside" the
	//! fences are considered "normal", and the rest are considered outliers.
	//!
	//! The fences are computed from the quartiles of the sample, according to the following formula:
	//!
	//! ``` ignore
	//! // q1, q3 are the first and third quartiles
	//! let iqr = q3 - q1; // The interquartile range
	//! let (f1, f2) = (q1 - 1.5 * iqr, q3 + 1.5 * iqr); // the "fences"
	//!
	//! let is_outlier = \|x\| if x > f1 && x < f2 { true } else { false };
	//! ```
	//!
	//! The classifier provided here adds two extra outer fences:
	//!
	//! ``` ignore
	//! let (f3, f4) = (q1 - 3 * iqr, q3 + 3 * iqr); // the outer "fences"
	//! ```
	//!
	//! The extra fences add a sense of "severity" to the classification. Data points outside of the
	//! outer fences are considered "severe" outliers, whereas points outside the inner fences are just
	//! "mild" outliers, and, as the original method, everything inside the inner fences is considered
	//! "normal" data.
	//!
	//! Some ASCII art for the visually oriented people:
	//!
	//! ``` ignore
	//! LOW-ish NORMAL-ish HIGH-ish
	//! x \| + \| o o o o o o o \| + \| x
	//! f3 f1 f2 f4
	//!
	//! Legend:
	//! o: "normal" data (not an outlier)
	//! +: "mild" outlier
	//! x: "severe" outlier
	//! ```

	use std::iter::IntoIterator;
	use std::ops::{Deref, Index};
	use std::slice;

	use crate::stats::float::Float;
	use crate::stats::univariate::Sample;

	use self::Label::*;

	/// A classified/labeled sample.
	///
	/// The labeled data can be accessed using the indexing operator. The order of the data points is
	/// retained.
	///
	/// NOTE: Due to limitations in the indexing traits, only the label is returned. Once the
	/// `IndexGet` trait lands in stdlib, the indexing operation will return a `(data_point, label)`
	/// pair.
	#[derive(Clone, Copy)]
	pub struct LabeledSample<'a, A>
	where
	A: Float,
	{
	fences: (A, A, A, A),
	sample: &'a Sample<A>,
	}

	impl<'a, A> LabeledSample<'a, A>
	where
	A: Float,
	{
	/// Returns the number of data points per label
	///
	/// - Time: `O(length)`
	#[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
	pub fn count(&self) -> (usize, usize, usize, usize, usize) {
	let (mut los, mut lom, mut noa, mut him, mut his) = (0, 0, 0, 0, 0);

	for (_, label) in self {
	match label {
	LowSevere => {
	los += 1;
	}
	LowMild => {
	lom += 1;
	}
	NotAnOutlier => {
	noa += 1;
	}
	HighMild => {
	him += 1;
	}
	HighSevere => {
	his += 1;
	}
	}
	}

	(los, lom, noa, him, his)
	}

	/// Returns the fences used to classify the outliers
	pub fn fences(&self) -> (A, A, A, A) {
	self.fences
	}

	/// Returns an iterator over the labeled data
	pub fn iter(&self) -> Iter<'a, A> {
	Iter {
	fences: self.fences,
	iter: self.sample.iter(),
	}
	}
	}

	impl<'a, A> Deref for LabeledSample<'a, A>
	where
	A: Float,
	{
	type Target = Sample<A>;

	fn deref(&self) -> &Sample<A> {
	self.sample
	}
	}

	// FIXME Use the `IndexGet` trait
	impl<'a, A> Index<usize> for LabeledSample<'a, A>
	where
	A: Float,
	{
	type Output = Label;

	#[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
	fn index(&self, i: usize) -> &Label {
	static LOW_SEVERE: Label = LowSevere;
	static LOW_MILD: Label = LowMild;
	static HIGH_MILD: Label = HighMild;
	static HIGH_SEVERE: Label = HighSevere;
	static NOT_AN_OUTLIER: Label = NotAnOutlier;

	let x = self.sample[i];
	let (lost, lomt, himt, hist) = self.fences;

	if x < lost {
	&LOW_SEVERE
	} else if x > hist {
	&HIGH_SEVERE
	} else if x < lomt {
	&LOW_MILD
	} else if x > himt {
	&HIGH_MILD
	} else {
	&NOT_AN_OUTLIER
	}
	}
	}

	impl<'a, 'b, A> IntoIterator for &'b LabeledSample<'a, A>
	where
	A: Float,
	{
	type Item = (A, Label);
	type IntoIter = Iter<'a, A>;

	fn into_iter(self) -> Iter<'a, A> {
	self.iter()
	}
	}

	/// Iterator over the labeled data
	pub struct Iter<'a, A>
	where
	A: Float,
	{
	fences: (A, A, A, A),
	iter: slice::Iter<'a, A>,
	}

	impl<'a, A> Iterator for Iter<'a, A>
	where
	A: Float,
	{
	type Item = (A, Label);

	#[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))]
	fn next(&mut self) -> Option<(A, Label)> {
	self.iter.next().map(\|&x\| {
	let (lost, lomt, himt, hist) = self.fences;

	let label = if x < lost {
	LowSevere
	} else if x > hist {
	HighSevere
	} else if x < lomt {
	LowMild
	} else if x > himt {
	HighMild
	} else {
	NotAnOutlier
	};

	(x, label)
	})
	}

	fn size_hint(&self) -> (usize, Option<usize>) {
	self.iter.size_hint()
	}
	}

	/// Labels used to classify outliers
	pub enum Label {
	/// A "mild" outlier in the "high" spectrum
	HighMild,
	/// A "severe" outlier in the "high" spectrum
	HighSevere,
	/// A "mild" outlier in the "low" spectrum
	LowMild,
	/// A "severe" outlier in the "low" spectrum
	LowSevere,
	/// A normal data point
	NotAnOutlier,
	}

	impl Label {
	/// Checks if the data point has an "unusually" high value
	pub fn is_high(&self) -> bool {
	matches!(*self, HighMild \| HighSevere)
	}

	/// Checks if the data point is labeled as a "mild" outlier
	pub fn is_mild(&self) -> bool {
	matches!(*self, HighMild \| LowMild)
	}

	/// Checks if the data point has an "unusually" low value
	pub fn is_low(&self) -> bool {
	matches!(*self, LowMild \| LowSevere)
	}

	/// Checks if the data point is labeled as an outlier
	pub fn is_outlier(&self) -> bool {
	matches!(*self, NotAnOutlier)
	}

	/// Checks if the data point is labeled as a "severe" outlier
	pub fn is_severe(&self) -> bool {
	matches!(*self, HighSevere \| LowSevere)
	}
	}

	/// Classifies the sample, and returns a labeled sample.
	///
	/// - Time: `O(N log N) where N = length`
	pub fn classify<A>(sample: &Sample<A>) -> LabeledSample<'_, A>
	where
	A: Float,
	usize: cast::From<A, Output = Result<usize, cast::Error>>,
	{
	let (q1, _, q3) = sample.percentiles().quartiles();
	let iqr = q3 - q1;

	// Mild
	let k_m = A::cast(1.5_f32);
	// Severe
	let k_s = A::cast(3);

	LabeledSample {
	fences: (
	q1 - k_s * iqr,
	q1 - k_m * iqr,
	q3 + k_m * iqr,
	q3 + k_s * iqr,
	),
	sample,
	}
	}