blob: 2351c9ef6870ba49a0b4efce4dd0f62c00775cd2 [file] [log] [blame]
//! Bivariate analysis
mod bootstrap;
pub mod regression;
mod resamples;
use crate::stats::bivariate::resamples::Resamples;
use crate::stats::float::Float;
use crate::stats::tuple::{Tuple, TupledDistributionsBuilder};
use crate::stats::univariate::Sample;
#[cfg(feature = "rayon")]
use rayon::iter::{IntoParallelIterator, ParallelIterator};
/// Bivariate `(X, Y)` data
///
/// Invariants:
///
/// - No `NaN`s in the data
/// - At least two data points in the set
pub struct Data<'a, X, Y>(&'a [X], &'a [Y]);
impl<'a, X, Y> Copy for Data<'a, X, Y> {}
#[cfg_attr(feature = "cargo-clippy", allow(clippy::expl_impl_clone_on_copy))]
impl<'a, X, Y> Clone for Data<'a, X, Y> {
fn clone(&self) -> Data<'a, X, Y> {
*self
}
}
impl<'a, X, Y> Data<'a, X, Y> {
/// Returns the length of the data set
pub fn len(&self) -> usize {
self.0.len()
}
/// Iterate over the data set
pub fn iter(&self) -> Pairs<'a, X, Y> {
Pairs {
data: *self,
state: 0,
}
}
}
impl<'a, X, Y> Data<'a, X, Y>
where
X: Float,
Y: Float,
{
/// Creates a new data set from two existing slices
pub fn new(xs: &'a [X], ys: &'a [Y]) -> Data<'a, X, Y> {
assert!(
xs.len() == ys.len()
&& xs.len() > 1
&& xs.iter().all(|x| !x.is_nan())
&& ys.iter().all(|y| !y.is_nan())
);
Data(xs, ys)
}
// TODO Remove the `T` parameter in favor of `S::Output`
/// Returns the bootstrap distributions of the parameters estimated by the `statistic`
///
/// - Multi-threaded
/// - Time: `O(nresamples)`
/// - Memory: `O(nresamples)`
pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions
where
S: Fn(Data<X, Y>) -> T + Sync,
T: Tuple + Send,
T::Distributions: Send,
T::Builder: Send,
{
#[cfg(feature = "rayon")]
{
(0..nresamples)
.into_par_iter()
.map_init(
|| Resamples::new(*self),
|resamples, _| statistic(resamples.next()),
)
.fold(
|| T::Builder::new(0),
|mut sub_distributions, sample| {
sub_distributions.push(sample);
sub_distributions
},
)
.reduce(
|| T::Builder::new(0),
|mut a, mut b| {
a.extend(&mut b);
a
},
)
.complete()
}
#[cfg(not(feature = "rayon"))]
{
let mut resamples = Resamples::new(*self);
(0..nresamples)
.map(|_| statistic(resamples.next()))
.fold(T::Builder::new(0), |mut sub_distributions, sample| {
sub_distributions.push(sample);
sub_distributions
})
.complete()
}
}
/// Returns a view into the `X` data
pub fn x(&self) -> &'a Sample<X> {
Sample::new(self.0)
}
/// Returns a view into the `Y` data
pub fn y(&self) -> &'a Sample<Y> {
Sample::new(self.1)
}
}
/// Iterator over `Data`
pub struct Pairs<'a, X: 'a, Y: 'a> {
data: Data<'a, X, Y>,
state: usize,
}
impl<'a, X, Y> Iterator for Pairs<'a, X, Y> {
type Item = (&'a X, &'a Y);
fn next(&mut self) -> Option<(&'a X, &'a Y)> {
if self.state < self.data.len() {
let i = self.state;
self.state += 1;
// This is safe because i will always be < self.data.{0,1}.len()
debug_assert!(i < self.data.0.len());
debug_assert!(i < self.data.1.len());
unsafe { Some((self.data.0.get_unchecked(i), self.data.1.get_unchecked(i))) }
} else {
None
}
}
}