Source code for moderndive.infer.theoretical

"""Theory-based distributions for the infer grammar (``assume()``).

Mirrors R ``infer::assume()``: set a theoretical sampling distribution (``t``,
``z``, ``F``, ``Chisq``) that can be visualized and used to compute a p-value
without simulation.
"""

from __future__ import annotations

from dataclasses import dataclass

import polars as pl

_RIGHT = {"right", "greater"}
_LEFT = {"left", "less"}
_BOTH = {"two-sided", "two_sided", "both", "two sided"}
_SYMMETRIC = {"t", "z"}


[docs] @dataclass(frozen=True) class TheoreticalDistribution: """A named theoretical distribution (from :func:`assume`).""" distribution: str df: object | None = None # scalar for t/Chisq; (df1, df2) for F def _dist(self): from scipy import stats name = self.distribution.lower() if name in ("t", "two-sample t"): return stats.t(df=self.df) if name == "z": return stats.norm() if name == "f": df1, df2 = self.df return stats.f(df1, df2) if name in ("chisq", "chi-squared", "chi-square"): return stats.chi2(df=self.df) raise ValueError(f"unknown theoretical distribution {self.distribution!r}")
[docs] def get_p_value(self, obs_stat, direction: str) -> pl.DataFrame: """Theory-based p-value for a (standardized) observed statistic.""" dist = self._dist() obs = float(obs_stat) d = direction.lower() name = self.distribution.lower() if name in ("f", "chisq", "chi-squared", "chi-square"): p = float(dist.sf(obs)) # these tests are inherently one-sided (right) elif d in _RIGHT: p = float(dist.sf(obs)) elif d in _LEFT: p = float(dist.cdf(obs)) elif d in _BOTH: p = float(2 * min(dist.cdf(obs), dist.sf(obs))) else: raise ValueError("direction must be right/greater, left/less, or two-sided") return pl.DataFrame({"p_value": [min(p, 1.0)]})
[docs] def visualize(self, bins: int = 100, *, engine: str = "plotly"): """Plot the theoretical density curve (plotly by default; engine="plotnine").""" from .viz import visualize_theoretical return visualize_theoretical(self, bins=bins, engine=engine)
[docs] def assume(distribution: str, df: object | None = None) -> TheoreticalDistribution: """Set a theoretical distribution (``"t"``, ``"z"``, ``"F"``, ``"Chisq"``). ``df`` is the degrees of freedom: a scalar for ``t``/``Chisq``, a ``(df1, df2)`` tuple for ``F``, and unused for ``z``. """ return TheoreticalDistribution(distribution=distribution, df=df)