-
Notifications
You must be signed in to change notification settings - Fork 0
/
statistiscs.hpp
115 lines (93 loc) · 3.57 KB
/
statistiscs.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//
// Simple library for measuring the performance of C++ code
// Based on http://hackage.haskell.org/package/criterion
//
// Copyright (C) 2014 Mykola Orliuk <[email protected]>
//
// This library is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation; either version 2.1 of the License, or (at
// your option) any later version.
//
// This library is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
// License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation,
// Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//
#pragma once
#include <tuple>
#include <functional>
#include <vector>
#include <random>
template <typename T>
T sqr(T x) { return x*x; }
struct estimate
{
double mean;
double stdev;
double lbound, ubound;
estimate() = default;
estimate(const std::vector<double> &sample)
{
const size_t n = sample.size();
mean = 0;
for (const auto &value : sample) mean += value;
mean /= n;
stdev = 0;
for (const auto &value : sample) stdev += sqr(value - mean);
stdev = sqrt(stdev / (n - 1));
// assume normal distribution
// 68.3% of the population is contained within 1 standard deviations from the mean.
// 95% of the population is contained within 2 standard deviations from the mean.
// 99.7% of the population is contained within 3 standard deviations from the mean.
lbound = mean - 3*stdev;
ubound = mean + 3*stdev;
}
template <typename T>
estimate(const std::vector<T> &sample, std::function<double(T)> accessor)
{
const size_t n = sample.size();
mean = 0;
for (const auto &value : sample) mean += value;
mean /= n;
stdev = 0;
for (const auto &value : sample) stdev += sqr(value - mean);
stdev = sqrt(stdev / (n - 1));
// assume normal distribution
// 68.3% of the population is contained within 1 standard deviations from the mean.
// 95% of the population is contained within 2 standard deviations from the mean.
// 99.7% of the population is contained within 3 standard deviations from the mean.
lbound = mean - 3*stdev;
ubound = mean + 3*stdev;
}
};
template <typename T>
using estimator = std::function<double(std::vector<T> &)>;
template <size_t M, typename T>
std::array<estimate, M> bootstrap(std::vector<T> &sample, size_t resamples, std::array<estimator<T>, M> estimators)
{
const size_t n = sample.size();
// re-sample and estimate our sample
std::default_random_engine rng;
std::uniform_int_distribution<size_t> index_dist(0, n - 1);
std::array<std::vector<double>, M> esamples;
for (auto &esample : esamples) esample.resize(resamples);
for (size_t i = 0; i < resamples; ++i)
{
std::vector<T> bsample;
bsample.reserve(n);
for (size_t j = 0; j < n; ++j)
{
bsample.emplace_back(sample[index_dist(rng)]);
}
for (size_t k = 0; k < M; ++k) esamples[k][i] = estimators[k](bsample);
}
// lets calculate mean and stdev
std::array<estimate, M> estimates;
for (size_t k = 0; k < M; ++k) estimates[k] = {esamples[k]};
return estimates;
}