Playing with Toys
Contents
Playing with Toys¶
As of v0.6.0
, pyhf
now supports toys! A lot of kinks have been discovered and worked out and we’re grateful to our ATLAS colleagues for beta-testing this in the meantime. We don’t believe that there may not be any more bugs, but we feel confident that we can release the current implementation.
Simple Model (again)¶
import numpy as np
import pyhf
model = pyhf.simplemodels.uncorrelated_background(
signal=[5.0, 10.0], bkg=[50.0, 60.0], bkg_uncertainty=[5.0, 12.0]
)
Hypothesis Testing (revisited)¶
And like in the first exercise we did, let’s refresh what the hypothesis test looked like using \(\tilde{q}_\mu\):
CLs_obs, CLs_exp = pyhf.infer.hypotest(
1.0, # null hypothesis
[53.0, 65.0] + model.config.auxdata,
model,
test_stat="qtilde",
return_expected_set=True,
)
print(f" Observed CLs: {CLs_obs:.4f}")
for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
print(f"Expected CLs({n_sigma:2d} σ): {expected_value:.4f}")
Observed CLs: 0.4957
Expected CLs(-2 σ): 0.0832
Expected CLs(-1 σ): 0.1828
Expected CLs( 0 σ): 0.3705
Expected CLs( 1 σ): 0.6437
Expected CLs( 2 σ): 0.8854
So the question is, does the asymptotic approximation hold in this example? The standard assumption is that you have enough “statistics” (meaning enough events) to use the large-N approximation. So let’s use the toy-based calculator instead and compute the same values as above and see if they match in the asymptotic case (we certainly hope they mostly do here!)
Note
For readability in the Jupyter Book the hypotest
has track_progress=False
. If you’re running this notebook yourself you might want to set track_progress=True
to enable the progress bar.
CLs_obs, CLs_exp = pyhf.infer.hypotest(
1.0, # null hypothesis
[53.0, 65.0] + model.config.auxdata,
model,
test_stat="qtilde",
return_expected_set=True,
calctype="toybased",
ntoys=1000,
track_progress=False,
)
print(f" Observed CLs: {CLs_obs:.4f}")
for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
print(f"Expected CLs({n_sigma:2d} σ): {expected_value:.4f}")
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Input In [3], in <cell line: 1>()
----> 1 CLs_obs, CLs_exp = pyhf.infer.hypotest(
2 1.0, # null hypothesis
3 [53.0, 65.0] + model.config.auxdata,
4 model,
5 test_stat="qtilde",
6 return_expected_set=True,
7 calctype="toybased",
8 ntoys=1000,
9 track_progress=False,
10 )
11 print(f" Observed CLs: {CLs_obs:.4f}")
12 for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/__init__.py:160, in hypotest(poi_test, data, pdf, init_pars, par_bounds, fixed_params, calctype, return_tail_probs, return_expected, return_expected_set, **kwargs)
149 calc = utils.create_calculator(
150 calctype,
151 data,
(...)
156 **kwargs,
157 )
159 teststat = calc.teststatistic(poi_test)
--> 160 sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions(poi_test)
162 tb, _ = get_backend()
163 CLsb_obs, CLb_obs, CLs_obs = tuple(
164 tb.astensor(pvalue)
165 for pvalue in calc.pvalues(
166 teststat, sig_plus_bkg_distribution, bkg_only_distribution
167 )
168 )
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/calculators.py:739, in ToyCalculator.distributions(self, poi_test, track_progress)
736 bkg_teststat = []
737 for sample in tqdm.tqdm(bkg_sample, **tqdm_options, desc='Background-like'):
738 bkg_teststat.append(
--> 739 teststat_func(
740 poi_test,
741 sample,
742 self.pdf,
743 self.init_pars,
744 self.par_bounds,
745 self.fixed_params,
746 )
747 )
749 s_plus_b = EmpiricalDistribution(tensorlib.astensor(signal_teststat))
750 b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat))
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/test_statistics.py:189, in qmu_tilde(mu, data, pdf, init_pars, par_bounds, fixed_params)
184 if par_bounds[pdf.config.poi_index][0] != 0:
185 log.warning(
186 'qmu_tilde test statistic used for fit configuration with POI not bounded at zero.\n'
187 + 'Use the qmu test statistic (pyhf.infer.test_statistics.qmu) instead.'
188 )
--> 189 return _qmu_like(mu, data, pdf, init_pars, par_bounds, fixed_params)
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/test_statistics.py:25, in _qmu_like(mu, data, pdf, init_pars, par_bounds, fixed_params)
17 """
18 Clipped version of _tmu_like where the returned test statistic
19 is 0 if muhat > 0 else tmu_like_stat.
(...)
22 qmu_tilde. Otherwise this is qmu (no tilde).
23 """
24 tensorlib, optimizer = get_backend()
---> 25 tmu_like_stat, (_, muhatbhat) = _tmu_like(
26 mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=True
27 )
28 qmu_like_stat = tensorlib.where(
29 muhatbhat[pdf.config.poi_index] > mu, tensorlib.astensor(0.0), tmu_like_stat
30 )
31 return qmu_like_stat
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/test_statistics.py:47, in _tmu_like(mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars)
43 tensorlib, optimizer = get_backend()
44 mubhathat, fixed_poi_fit_lhood_val = fixed_poi_fit(
45 mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_val=True
46 )
---> 47 muhatbhat, unconstrained_fit_lhood_val = fit(
48 data, pdf, init_pars, par_bounds, fixed_params, return_fitted_val=True
49 )
50 log_likelihood_ratio = fixed_poi_fit_lhood_val - unconstrained_fit_lhood_val
51 tmu_like_stat = tensorlib.astensor(
52 tensorlib.clip(log_likelihood_ratio, 0.0, max_value=None)
53 )
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/mle.py:131, in fit(data, pdf, init_pars, par_bounds, fixed_params, **kwargs)
124 # get fixed vals from the model
125 fixed_vals = [
126 (index, init)
127 for index, (init, is_fixed) in enumerate(zip(init_pars, fixed_params))
128 if is_fixed
129 ]
--> 131 return opt.minimize(
132 twice_nll, data, pdf, init_pars, par_bounds, fixed_vals, **kwargs
133 )
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/optimize/mixins.py:184, in OptimizerMixin.minimize(self, objective, data, pdf, init_pars, par_bounds, fixed_vals, return_fitted_val, return_result_obj, return_uncertainties, return_correlations, do_grad, do_stitch, **kwargs)
181 par_names[index] = None
182 par_names = [name for name in par_names if name]
--> 184 result = self._internal_minimize(
185 **minimizer_kwargs, options=kwargs, par_names=par_names
186 )
187 result = self._internal_postprocess(
188 result, stitch_pars, return_uncertainties=return_uncertainties
189 )
191 _returns = [result.x]
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/optimize/mixins.py:50, in OptimizerMixin._internal_minimize(self, func, x0, do_grad, bounds, fixed_vals, options, par_names)
31 def _internal_minimize(
32 self,
33 func,
(...)
39 par_names=None,
40 ):
42 minimizer = self._get_minimizer(
43 func,
44 x0,
(...)
48 par_names=par_names,
49 )
---> 50 result = self._minimize(
51 minimizer,
52 func,
53 x0,
54 do_grad=do_grad,
55 bounds=bounds,
56 fixed_vals=fixed_vals,
57 options=options,
58 )
60 try:
61 assert result.success
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/optimize/opt_scipy.py:93, in scipy_optimizer._minimize(self, minimizer, func, x0, do_grad, bounds, fixed_vals, options)
90 else:
91 constraints = []
---> 93 return minimizer(
94 func,
95 x0,
96 method=method,
97 jac=do_grad,
98 bounds=bounds,
99 constraints=constraints,
100 tol=tolerance,
101 options=dict(maxiter=maxiter, disp=bool(verbose), **solver_options),
102 )
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_minimize.py:701, in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
698 res = _minimize_cobyla(fun, x0, args, constraints, callback=callback,
699 **options)
700 elif meth == 'slsqp':
--> 701 res = _minimize_slsqp(fun, x0, args, jac, bounds,
702 constraints, callback=callback, **options)
703 elif meth == 'trust-constr':
704 res = _minimize_trustregion_constr(fun, x0, args, jac, hess, hessp,
705 bounds, constraints,
706 callback=callback, **options)
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_slsqp_py.py:432, in _minimize_slsqp(func, x0, args, jac, bounds, constraints, maxiter, ftol, iprint, disp, eps, callback, finite_diff_rel_step, **unknown_options)
429 c = _eval_constraint(x, cons)
431 if mode == -1: # gradient evaluation required
--> 432 g = append(wrapped_grad(x), 0.0)
433 a = _eval_con_normals(x, cons, la, n, m, meq, mieq)
435 if majiter > majiter_prev:
436 # call callback if major iteration has incremented
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_optimize.py:277, in _clip_x_for_func.<locals>.eval(x)
275 def eval(x):
276 x = _check_clip_x(x, bounds)
--> 277 return func(x)
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:273, in ScalarFunction.grad(self, x)
271 if not np.array_equal(x, self.x):
272 self._update_x_impl(x)
--> 273 self._update_grad()
274 return self.g
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:256, in ScalarFunction._update_grad(self)
254 def _update_grad(self):
255 if not self.g_updated:
--> 256 self._update_grad_impl()
257 self.g_updated = True
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:173, in ScalarFunction.__init__.<locals>.update_grad()
171 self._update_fun()
172 self.ngev += 1
--> 173 self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
174 **finite_diff_options)
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_numdiff.py:496, in approx_derivative(fun, x0, method, rel_step, abs_step, f0, bounds, sparsity, as_linear_operator, args, kwargs)
490 h = np.where(dx == 0,
491 _eps_for_method(x0.dtype, f0.dtype, method) *
492 sign_x0 * np.maximum(1.0, np.abs(x0)),
493 h)
495 if method == '2-point':
--> 496 h, use_one_sided = _adjust_scheme_to_bounds(
497 x0, h, 1, '1-sided', lb, ub)
498 elif method == '3-point':
499 h, use_one_sided = _adjust_scheme_to_bounds(
500 x0, h, 1, '2-sided', lb, ub)
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_numdiff.py:50, in _adjust_scheme_to_bounds(x0, h, num_steps, scheme, lb, ub)
47 else:
48 raise ValueError("`scheme` must be '1-sided' or '2-sided'.")
---> 50 if np.all((lb == -np.inf) & (ub == np.inf)):
51 return h, use_one_sided
53 h_total = h * num_steps
File <__array_function__ internals>:180, in all(*args, **kwargs)
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/numpy/core/fromnumeric.py:2489, in all(a, axis, out, keepdims, where)
2406 @array_function_dispatch(_all_dispatcher)
2407 def all(a, axis=None, out=None, keepdims=np._NoValue, *, where=np._NoValue):
2408 """
2409 Test whether all array elements along a given axis evaluate to True.
2410
(...)
2487
2488 """
-> 2489 return _wrapreduction(a, np.logical_and, 'all', axis, None, out,
2490 keepdims=keepdims, where=where)
File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/numpy/core/fromnumeric.py:86, in _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs)
83 else:
84 return reduction(axis=axis, out=out, **passkwargs)
---> 86 return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
KeyboardInterrupt:
You’ll notice that this time, a progress bar pops up! This is running the fits for each of these toys. We’re hard at work to find new ways to improve the performance of this, but evaluating 50+ toys/s is not very bad for the initial implementation.
Overall, this is not so bad given that we’re only running 1000 toys. What does this look like in a case where we have lower statistics?
Hypothesis Testing (low stats)¶
model_low = pyhf.simplemodels.uncorrelated_background(
signal=[0.5, 1.0], bkg=[5.0, 6.0], bkg_uncertainty=[0.5, 1.2]
)
In the asymptotics case:
CLs_obs, CLs_exp = pyhf.infer.hypotest(
1.0, # null hypothesis
[5.0, 7.0] + model.config.auxdata,
model,
test_stat="qtilde",
return_expected_set=True,
calctype="asymptotics",
)
print(f" Observed CLs: {CLs_obs:.4f}")
for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
print(f"Expected CLs({n_sigma:2d} σ): {expected_value:.4f}")
And now throwing 2000 toys:
CLs_obs, CLs_exp = pyhf.infer.hypotest(
1.0, # null hypothesis
[5.0, 7.0] + model.config.auxdata,
model,
test_stat="qtilde",
return_expected_set=True,
calctype="toybased",
ntoys=1000,
track_progress=False,
)
print(f" Observed CLs: {CLs_obs:.4f}")
for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
print(f"Expected CLs({n_sigma:2d} σ): {expected_value:.4f}")
And as you can see in the case of lower statistics, the asymptotic approximation starts failing!