Source code for moderndive.theory
"""Theory-based inference wrappers (scipy.stats).
The book deliberately teaches simulation-based inference first, then ties results
back to the traditional theory-based methods (t-distribution CIs, the two-sample
test, normal approximations). These helpers provide those theory-based companions
so the chapters can draw the simulation-vs-theory comparison.
All functions return small polars frames with tidy column names.
"""
from __future__ import annotations
import numpy as np
import polars as pl
__all__ = [
"t_test_one_sample",
"t_test_two_sample",
"t_confidence_interval",
"prop_test_two_sample",
]
def _arr(x) -> np.ndarray:
if isinstance(x, pl.Series):
return x.drop_nulls().to_numpy()
return np.asarray(x, dtype=float)
[docs]
def t_test_one_sample(x, mu: float = 0.0, alternative: str = "two-sided") -> pl.DataFrame:
"""One-sample t-test of H0: mean == ``mu``."""
from scipy import stats
a = _arr(x)
res = stats.ttest_1samp(a, popmean=mu, alternative=alternative)
return pl.DataFrame(
{
"statistic": [float(res.statistic)],
"df": [float(a.size - 1)],
"p_value": [float(res.pvalue)],
}
)
[docs]
def t_confidence_interval(x, level: float = 0.95) -> pl.DataFrame:
"""Theory-based t confidence interval for a single mean."""
from scipy import stats
a = _arr(x)
n = a.size
mean = float(a.mean())
se = float(a.std(ddof=1) / np.sqrt(n))
tcrit = float(stats.t.ppf(1 - (1 - level) / 2, df=n - 1))
return pl.DataFrame({"lower_ci": [mean - tcrit * se], "upper_ci": [mean + tcrit * se]})
[docs]
def t_test_two_sample(
x, y, alternative: str = "two-sided", equal_var: bool = False
) -> pl.DataFrame:
"""Two-sample (Welch by default) t-test of equal means."""
from scipy import stats
a, b = _arr(x), _arr(y)
res = stats.ttest_ind(a, b, equal_var=equal_var, alternative=alternative)
return pl.DataFrame({"statistic": [float(res.statistic)], "p_value": [float(res.pvalue)]})
[docs]
def prop_test_two_sample(
successes: tuple[int, int], totals: tuple[int, int], alternative: str = "two-sided"
) -> pl.DataFrame:
"""Two-sample z-test for a difference in proportions (normal approximation)."""
from scipy import stats
x1, x2 = successes
n1, n2 = totals
p1, p2 = x1 / n1, x2 / n2
p_pool = (x1 + x2) / (n1 + n2)
se = np.sqrt(p_pool * (1 - p_pool) * (1 / n1 + 1 / n2))
z = (p1 - p2) / se
if alternative in ("greater", "right"):
p = float(stats.norm.sf(z))
elif alternative in ("less", "left"):
p = float(stats.norm.cdf(z))
else:
p = float(2 * stats.norm.sf(abs(z)))
return pl.DataFrame({"estimate": [p1 - p2], "statistic": [float(z)], "p_value": [p]})