#include <stdio.h> #include <math.h> #include <gsl/gsl_rng.h> /** Initialize and return a random number generator. */ gsl_rng *init_rng(void) { const gsl_rng_type *T; gsl_rng *r; gsl_rng_env_setup(); T = gsl_rng_default; r = gsl_rng_alloc(T); printf("# rng name: %s\n", gsl_rng_name(r)); printf("# rng seed: %lu\n", gsl_rng_default_seed); printf("# rng first variate: %lu\n", gsl_rng_get(r)); return r; } #include <gsl/gsl_histogram.h> #include <gsl/gsl_randist.h> const size_t SAMPLES = 10000; const double STDDEV = 2.0; /** Allocate a histogram and populate it by sampling from a Gaussian distribtuion. */ gsl_histogram *fill_hist(gsl_rng * r, size_t bins) { const double XMIN = -5; const double XMAX = +5; gsl_histogram *h; double m = 0, v = 0; size_t i; h = gsl_histogram_alloc(bins); gsl_histogram_set_ranges_uniform(h, XMIN, XMAX); for (i = 0; i < bins * SAMPLES; i++) { double x = gsl_ran_gaussian(r, STDDEV); gsl_histogram_increment(h, x); m += x; v += x * x; } printf("# histogram mean: %g\n", gsl_histogram_mean(h)); printf("# histogram stddev: %g\n", gsl_histogram_sigma(h)); m /= i; v /= (i - 1); printf("# Monte Carlo mean: %g\n", m); printf("# Monte Carlo stddev: %g\n", sqrt(v)); return h; } #include <gsl/gsl_cdf.h> /** Compute a chi-squared statistic over the histogram and return it. */ double hist_x2(gsl_histogram * h) { const size_t bins = gsl_histogram_bins(h); double x2 = 0; size_t i; for (i = 0; i < bins; i++) { double o, e, lo, hi, res; /* get observed counts */ o = gsl_histogram_get(h, i); /* compute expected counts from the CDF */ gsl_histogram_get_range(h, i, &lo, &hi); e = gsl_cdf_gaussian_P(hi, STDDEV) - gsl_cdf_gaussian_P(lo, STDDEV); e *= bins * SAMPLES; /* printf("# [%f; %f) %g %g\n", lo, hi, o, e); */ /* compute chi-squared residuals; strictly speaking we should now test if e < 5 and balk if it is */ res = (o - e) / sqrt(e); printf("%-+5g %5g %12.6f %-+g\n", lo, o, e, res); x2 += res * res; } return x2; } /** Compare histogram of values sampled from a random number generator with expected counts under a theoretical model. Use a chi-squared test to determine whether the observed samples and the theoretical model are different. The null hypothesis is that the samples were drawn from the theoretical model. (Strictly speaking this is silly, because the chi-squared test doesn't have anything to say about whether the null hypothesis is true.) */ int main(void) { const size_t BINS = 200; gsl_rng *r; gsl_histogram *h; double x2, p; r = init_rng(); h = fill_hist(r, BINS); x2 = hist_x2(h); printf("# X^2 statistic: %g\n", x2); printf("# X^2 critical value (5%%, %d df): %f\n", BINS - 1, gsl_cdf_chisq_Qinv(0.05, BINS - 1)); p = gsl_cdf_chisq_Q(x2, BINS - 1); printf("# X^2 p-value: %g\n", p); if (p >= 0.05) { printf("# Cannot reject null hypothesis at 5%% level.\n"); } else { printf("# Null hypothesis rejected at 5%% level.\n"); } return 0; }