Source code for moderndive.infer.pvalue
"""p-values from a simulated null distribution."""
from __future__ import annotations
from typing import TYPE_CHECKING
import numpy as np
import polars as pl
if TYPE_CHECKING:
from .core import Distribution
# Direction aliases accepted by infer.
_RIGHT = {"right", "greater"}
_LEFT = {"left", "less"}
_BOTH = {"two-sided", "two_sided", "both", "two sided"}
def _as_float(obs_stat) -> float:
return float(obs_stat)
[docs]
def get_p_value(
distribution: Distribution,
obs_stat,
direction: str,
) -> pl.DataFrame:
"""Return a one-row frame with the simulation-based ``p_value``.
``direction`` is one of ``right``/``greater``, ``left``/``less``, or
``two-sided``. The two-sided p-value uses infer's convention: twice the
smaller one-sided tail proportion, capped at 1.
"""
stats = distribution.stats
obs = _as_float(obs_stat)
direction = direction.lower()
if direction in _RIGHT:
p = float(np.mean(stats >= obs))
elif direction in _LEFT:
p = float(np.mean(stats <= obs))
elif direction in _BOTH:
p_right = float(np.mean(stats >= obs))
p_left = float(np.mean(stats <= obs))
p = min(1.0, 2.0 * min(p_right, p_left))
else:
raise ValueError("direction must be 'right'/'greater', 'left'/'less', or 'two-sided'")
return pl.DataFrame({"p_value": [p]})
def get_fit_p_value(fit, obs_stat, direction: str = "two-sided") -> pl.DataFrame:
"""Per-term simulation p-values: compare a null fit distribution to observed.
``obs_stat`` is the observed :class:`FitResult` (one estimate per term).
"""
observed = {row["term"]: row["estimate"] for row in obs_stat.data.iter_rows(named=True)}
direction = direction.lower()
out_terms, out_p = [], []
for term, sub in fit.data.group_by("term"):
term = term[0] if isinstance(term, tuple) else term
null_vals = sub["estimate"].to_numpy()
obs = float(observed[term])
if direction in _RIGHT:
p = float(np.mean(null_vals >= obs))
elif direction in _LEFT:
p = float(np.mean(null_vals <= obs))
else:
center = float(np.mean(null_vals))
p = float(np.mean(np.abs(null_vals - center) >= abs(obs - center)))
out_terms.append(term)
out_p.append(p)
return pl.DataFrame({"term": out_terms, "p_value": out_p}).sort("term")