1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
//! Mixed bootstrap

use std::cmp;

use float::Float;
use num_cpus;
use thread_scoped as thread;

use tuple::{Tuple, TupledDistributionsBuilder};
use univariate::resamples::Resamples;
use univariate::Sample;

/// Performs a *mixed* two-sample bootstrap
pub fn bootstrap<A, T, S>(
    a: &Sample<A>,
    b: &Sample<A>,
    nresamples: usize,
    statistic: S,
) -> T::Distributions
where
    A: Float,
    S: Fn(&Sample<A>, &Sample<A>) -> T + Sync,
    T: Tuple,
    T::Distributions: Send,
    T::Builder: Send,
{
    let ncpus = num_cpus::get();
    let n_a = a.len();
    let n_b = b.len();
    let mut c = Vec::with_capacity(n_a + n_b);
    c.extend_from_slice(a);
    c.extend_from_slice(b);

    unsafe {
        let c = Sample::new(&c);

        // TODO need some sensible threshold to trigger the multi-threaded path
        if ncpus > 1 && nresamples > n_a {
            let granularity = nresamples / ncpus + 1;
            let statistic = &statistic;

            let chunks = (0..ncpus)
                .map(|i| {
                    let mut sub_distributions: T::Builder =
                        TupledDistributionsBuilder::new(granularity);
                    let offset = i * granularity;

                    thread::scoped(move || {
                        let end = cmp::min(offset + granularity, nresamples);
                        let mut resamples = Resamples::new(c);

                        for _ in offset..end {
                            let resample = resamples.next();
                            let a: &Sample<A> = Sample::new(&resample[..n_a]);
                            let b: &Sample<A> = Sample::new(&resample[n_a..]);

                            sub_distributions.push(statistic(a, b))
                        }
                        sub_distributions
                    })
                })
                .collect::<Vec<_>>();

            let mut builder: T::Builder = TupledDistributionsBuilder::new(nresamples);
            for chunk in chunks {
                builder.extend(&mut (chunk.join()));
            }
            builder.complete()
        } else {
            let mut resamples = Resamples::new(c);
            let mut distributions: T::Builder = TupledDistributionsBuilder::new(nresamples);

            for _ in 0..nresamples {
                let resample = resamples.next();
                let a: &Sample<A> = Sample::new(&resample[..n_a]);
                let b: &Sample<A> = Sample::new(&resample[n_a..]);

                distributions.push(statistic(a, b))
            }

            distributions.complete()
        }
    }
}