Playing with Toys

As of v0.6.0, pyhf now supports toys! A lot of kinks have been discovered and worked out and we’re grateful to our ATLAS colleagues for beta-testing this in the meantime. We don’t believe that there may not be any more bugs, but we feel confident that we can release the current implementation.

Simple Model (again)

import numpy as np
import pyhf

model = pyhf.simplemodels.uncorrelated_background(
    signal=[5.0, 10.0], bkg=[50.0, 60.0], bkg_uncertainty=[5.0, 12.0]
)

Hypothesis Testing (revisited)

And like in the first exercise we did, let’s refresh what the hypothesis test looked like using \(\tilde{q}_\mu\):

CLs_obs, CLs_exp = pyhf.infer.hypotest(
    1.0,  # null hypothesis
    [53.0, 65.0] + model.config.auxdata,
    model,
    test_stat="qtilde",
    return_expected_set=True,
)
print(f"      Observed CLs: {CLs_obs:.4f}")
for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
    print(f"Expected CLs({n_sigma:2d} σ): {expected_value:.4f}")
      Observed CLs: 0.4957
Expected CLs(-2 σ): 0.0832
Expected CLs(-1 σ): 0.1828
Expected CLs( 0 σ): 0.3705
Expected CLs( 1 σ): 0.6437
Expected CLs( 2 σ): 0.8854

So the question is, does the asymptotic approximation hold in this example? The standard assumption is that you have enough “statistics” (meaning enough events) to use the large-N approximation. So let’s use the toy-based calculator instead and compute the same values as above and see if they match in the asymptotic case (we certainly hope they mostly do here!)

Note

For readability in the Jupyter Book the hypotest has track_progress=False. If you’re running this notebook yourself you might want to set track_progress=True to enable the progress bar.

CLs_obs, CLs_exp = pyhf.infer.hypotest(
    1.0,  # null hypothesis
    [53.0, 65.0] + model.config.auxdata,
    model,
    test_stat="qtilde",
    return_expected_set=True,
    calctype="toybased",
    ntoys=1000,
    track_progress=False,
)
print(f"      Observed CLs: {CLs_obs:.4f}")
for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
    print(f"Expected CLs({n_sigma:2d} σ): {expected_value:.4f}")
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Input In [3], in <cell line: 1>()
----> 1 CLs_obs, CLs_exp = pyhf.infer.hypotest(
      2     1.0,  # null hypothesis
      3     [53.0, 65.0] + model.config.auxdata,
      4     model,
      5     test_stat="qtilde",
      6     return_expected_set=True,
      7     calctype="toybased",
      8     ntoys=1000,
      9     track_progress=False,
     10 )
     11 print(f"      Observed CLs: {CLs_obs:.4f}")
     12 for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/__init__.py:160, in hypotest(poi_test, data, pdf, init_pars, par_bounds, fixed_params, calctype, return_tail_probs, return_expected, return_expected_set, **kwargs)
    149 calc = utils.create_calculator(
    150     calctype,
    151     data,
   (...)
    156     **kwargs,
    157 )
    159 teststat = calc.teststatistic(poi_test)
--> 160 sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions(poi_test)
    162 tb, _ = get_backend()
    163 CLsb_obs, CLb_obs, CLs_obs = tuple(
    164     tb.astensor(pvalue)
    165     for pvalue in calc.pvalues(
    166         teststat, sig_plus_bkg_distribution, bkg_only_distribution
    167     )
    168 )

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/calculators.py:739, in ToyCalculator.distributions(self, poi_test, track_progress)
    736 bkg_teststat = []
    737 for sample in tqdm.tqdm(bkg_sample, **tqdm_options, desc='Background-like'):
    738     bkg_teststat.append(
--> 739         teststat_func(
    740             poi_test,
    741             sample,
    742             self.pdf,
    743             self.init_pars,
    744             self.par_bounds,
    745             self.fixed_params,
    746         )
    747     )
    749 s_plus_b = EmpiricalDistribution(tensorlib.astensor(signal_teststat))
    750 b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat))

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/test_statistics.py:189, in qmu_tilde(mu, data, pdf, init_pars, par_bounds, fixed_params)
    184 if par_bounds[pdf.config.poi_index][0] != 0:
    185     log.warning(
    186         'qmu_tilde test statistic used for fit configuration with POI not bounded at zero.\n'
    187         + 'Use the qmu test statistic (pyhf.infer.test_statistics.qmu) instead.'
    188     )
--> 189 return _qmu_like(mu, data, pdf, init_pars, par_bounds, fixed_params)

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/test_statistics.py:25, in _qmu_like(mu, data, pdf, init_pars, par_bounds, fixed_params)
     17 """
     18 Clipped version of _tmu_like where the returned test statistic
     19 is 0 if muhat > 0 else tmu_like_stat.
   (...)
     22 qmu_tilde. Otherwise this is qmu (no tilde).
     23 """
     24 tensorlib, optimizer = get_backend()
---> 25 tmu_like_stat, (_, muhatbhat) = _tmu_like(
     26     mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=True
     27 )
     28 qmu_like_stat = tensorlib.where(
     29     muhatbhat[pdf.config.poi_index] > mu, tensorlib.astensor(0.0), tmu_like_stat
     30 )
     31 return qmu_like_stat

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/test_statistics.py:47, in _tmu_like(mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars)
     43 tensorlib, optimizer = get_backend()
     44 mubhathat, fixed_poi_fit_lhood_val = fixed_poi_fit(
     45     mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_val=True
     46 )
---> 47 muhatbhat, unconstrained_fit_lhood_val = fit(
     48     data, pdf, init_pars, par_bounds, fixed_params, return_fitted_val=True
     49 )
     50 log_likelihood_ratio = fixed_poi_fit_lhood_val - unconstrained_fit_lhood_val
     51 tmu_like_stat = tensorlib.astensor(
     52     tensorlib.clip(log_likelihood_ratio, 0.0, max_value=None)
     53 )

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/infer/mle.py:131, in fit(data, pdf, init_pars, par_bounds, fixed_params, **kwargs)
    124 # get fixed vals from the model
    125 fixed_vals = [
    126     (index, init)
    127     for index, (init, is_fixed) in enumerate(zip(init_pars, fixed_params))
    128     if is_fixed
    129 ]
--> 131 return opt.minimize(
    132     twice_nll, data, pdf, init_pars, par_bounds, fixed_vals, **kwargs
    133 )

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/optimize/mixins.py:184, in OptimizerMixin.minimize(self, objective, data, pdf, init_pars, par_bounds, fixed_vals, return_fitted_val, return_result_obj, return_uncertainties, return_correlations, do_grad, do_stitch, **kwargs)
    181         par_names[index] = None
    182     par_names = [name for name in par_names if name]
--> 184 result = self._internal_minimize(
    185     **minimizer_kwargs, options=kwargs, par_names=par_names
    186 )
    187 result = self._internal_postprocess(
    188     result, stitch_pars, return_uncertainties=return_uncertainties
    189 )
    191 _returns = [result.x]

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/optimize/mixins.py:50, in OptimizerMixin._internal_minimize(self, func, x0, do_grad, bounds, fixed_vals, options, par_names)
     31 def _internal_minimize(
     32     self,
     33     func,
   (...)
     39     par_names=None,
     40 ):
     42     minimizer = self._get_minimizer(
     43         func,
     44         x0,
   (...)
     48         par_names=par_names,
     49     )
---> 50     result = self._minimize(
     51         minimizer,
     52         func,
     53         x0,
     54         do_grad=do_grad,
     55         bounds=bounds,
     56         fixed_vals=fixed_vals,
     57         options=options,
     58     )
     60     try:
     61         assert result.success

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/pyhf/optimize/opt_scipy.py:93, in scipy_optimizer._minimize(self, minimizer, func, x0, do_grad, bounds, fixed_vals, options)
     90 else:
     91     constraints = []
---> 93 return minimizer(
     94     func,
     95     x0,
     96     method=method,
     97     jac=do_grad,
     98     bounds=bounds,
     99     constraints=constraints,
    100     tol=tolerance,
    101     options=dict(maxiter=maxiter, disp=bool(verbose), **solver_options),
    102 )

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_minimize.py:701, in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
    698     res = _minimize_cobyla(fun, x0, args, constraints, callback=callback,
    699                             **options)
    700 elif meth == 'slsqp':
--> 701     res = _minimize_slsqp(fun, x0, args, jac, bounds,
    702                           constraints, callback=callback, **options)
    703 elif meth == 'trust-constr':
    704     res = _minimize_trustregion_constr(fun, x0, args, jac, hess, hessp,
    705                                        bounds, constraints,
    706                                        callback=callback, **options)

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_slsqp_py.py:432, in _minimize_slsqp(func, x0, args, jac, bounds, constraints, maxiter, ftol, iprint, disp, eps, callback, finite_diff_rel_step, **unknown_options)
    429     c = _eval_constraint(x, cons)
    431 if mode == -1:  # gradient evaluation required
--> 432     g = append(wrapped_grad(x), 0.0)
    433     a = _eval_con_normals(x, cons, la, n, m, meq, mieq)
    435 if majiter > majiter_prev:
    436     # call callback if major iteration has incremented

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_optimize.py:277, in _clip_x_for_func.<locals>.eval(x)
    275 def eval(x):
    276     x = _check_clip_x(x, bounds)
--> 277     return func(x)

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:273, in ScalarFunction.grad(self, x)
    271 if not np.array_equal(x, self.x):
    272     self._update_x_impl(x)
--> 273 self._update_grad()
    274 return self.g

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:256, in ScalarFunction._update_grad(self)
    254 def _update_grad(self):
    255     if not self.g_updated:
--> 256         self._update_grad_impl()
    257         self.g_updated = True

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:173, in ScalarFunction.__init__.<locals>.update_grad()
    171 self._update_fun()
    172 self.ngev += 1
--> 173 self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
    174                            **finite_diff_options)

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_numdiff.py:496, in approx_derivative(fun, x0, method, rel_step, abs_step, f0, bounds, sparsity, as_linear_operator, args, kwargs)
    490     h = np.where(dx == 0,
    491                  _eps_for_method(x0.dtype, f0.dtype, method) *
    492                  sign_x0 * np.maximum(1.0, np.abs(x0)),
    493                  h)
    495 if method == '2-point':
--> 496     h, use_one_sided = _adjust_scheme_to_bounds(
    497         x0, h, 1, '1-sided', lb, ub)
    498 elif method == '3-point':
    499     h, use_one_sided = _adjust_scheme_to_bounds(
    500         x0, h, 1, '2-sided', lb, ub)

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/scipy/optimize/_numdiff.py:50, in _adjust_scheme_to_bounds(x0, h, num_steps, scheme, lb, ub)
     47 else:
     48     raise ValueError("`scheme` must be '1-sided' or '2-sided'.")
---> 50 if np.all((lb == -np.inf) & (ub == np.inf)):
     51     return h, use_one_sided
     53 h_total = h * num_steps

File <__array_function__ internals>:180, in all(*args, **kwargs)

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/numpy/core/fromnumeric.py:2489, in all(a, axis, out, keepdims, where)
   2406 @array_function_dispatch(_all_dispatcher)
   2407 def all(a, axis=None, out=None, keepdims=np._NoValue, *, where=np._NoValue):
   2408     """
   2409     Test whether all array elements along a given axis evaluate to True.
   2410 
   (...)
   2487 
   2488     """
-> 2489     return _wrapreduction(a, np.logical_and, 'all', axis, None, out,
   2490                           keepdims=keepdims, where=where)

File /__t/Python/3.9.13/x64/lib/python3.9/site-packages/numpy/core/fromnumeric.py:86, in _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs)
     83         else:
     84             return reduction(axis=axis, out=out, **passkwargs)
---> 86 return ufunc.reduce(obj, axis, dtype, out, **passkwargs)

KeyboardInterrupt: 

You’ll notice that this time, a progress bar pops up! This is running the fits for each of these toys. We’re hard at work to find new ways to improve the performance of this, but evaluating 50+ toys/s is not very bad for the initial implementation.

Overall, this is not so bad given that we’re only running 1000 toys. What does this look like in a case where we have lower statistics?

Hypothesis Testing (low stats)

model_low = pyhf.simplemodels.uncorrelated_background(
    signal=[0.5, 1.0], bkg=[5.0, 6.0], bkg_uncertainty=[0.5, 1.2]
)

In the asymptotics case:

CLs_obs, CLs_exp = pyhf.infer.hypotest(
    1.0,  # null hypothesis
    [5.0, 7.0] + model.config.auxdata,
    model,
    test_stat="qtilde",
    return_expected_set=True,
    calctype="asymptotics",
)
print(f"      Observed CLs: {CLs_obs:.4f}")
for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
    print(f"Expected CLs({n_sigma:2d} σ): {expected_value:.4f}")

And now throwing 2000 toys:

CLs_obs, CLs_exp = pyhf.infer.hypotest(
    1.0,  # null hypothesis
    [5.0, 7.0] + model.config.auxdata,
    model,
    test_stat="qtilde",
    return_expected_set=True,
    calctype="toybased",
    ntoys=1000,
    track_progress=False,
)
print(f"      Observed CLs: {CLs_obs:.4f}")
for expected_value, n_sigma in zip(CLs_exp, np.arange(-2, 3)):
    print(f"Expected CLs({n_sigma:2d} σ): {expected_value:.4f}")

And as you can see in the case of lower statistics, the asymptotic approximation starts failing!