Source code for moderndive.infer.intervals

"""Confidence intervals from a simulated (bootstrap) distribution."""

from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np
import polars as pl

if TYPE_CHECKING:
    from .core import Distribution



[docs]
def get_confidence_interval(
    distribution: Distribution,
    level: float = 0.95,
    type: str = "percentile",
    *,
    point_estimate: float | None = None,
) -> pl.DataFrame:
    """Return a one-row frame with ``lower_ci`` and ``upper_ci``.

    - ``type="percentile"``: the ``(1-level)/2`` and ``1-(1-level)/2`` quantiles
      of the bootstrap distribution.
    - ``type="se"``: ``point_estimate ± z* · SE`` where SE is the SD of the
      bootstrap distribution (requires ``point_estimate``).
    """
    stats = distribution.stats
    if type == "percentile":
        alpha = 1.0 - level
        lower = float(np.quantile(stats, alpha / 2))
        upper = float(np.quantile(stats, 1 - alpha / 2))
    elif type in ("se", "standard error"):
        if point_estimate is None:
            raise ValueError("type='se' requires `point_estimate`")
        from scipy.stats import norm

        z = float(norm.ppf(1 - (1 - level) / 2))
        se = float(np.std(stats, ddof=1))
        lower = float(point_estimate) - z * se
        upper = float(point_estimate) + z * se
    elif type in ("bias-corrected", "bias_corrected", "bc"):
        if point_estimate is None:
            raise ValueError("type='bias-corrected' requires `point_estimate`")
        from scipy.stats import norm

        alpha = 1.0 - level
        # Bias-correction factor from the fraction of replicates below the estimate.
        prop_less = float(np.mean(stats < float(point_estimate)))
        prop_less = min(max(prop_less, 1e-9), 1 - 1e-9)
        z0 = float(norm.ppf(prop_less))
        lo_q = float(norm.cdf(2 * z0 + norm.ppf(alpha / 2)))
        hi_q = float(norm.cdf(2 * z0 + norm.ppf(1 - alpha / 2)))
        lower = float(np.quantile(stats, lo_q))
        upper = float(np.quantile(stats, hi_q))
    else:
        raise ValueError("type must be 'percentile', 'se', or 'bias-corrected'")
    return pl.DataFrame({"lower_ci": [lower], "upper_ci": [upper]})



def get_fit_confidence_interval(fit, level: float = 0.95) -> pl.DataFrame:
    """Per-term percentile confidence intervals from a bootstrap fit distribution."""
    alpha = 1.0 - level
    return (
        fit.data.group_by("term")
        .agg(
            lower_ci=pl.col("estimate").quantile(alpha / 2),
            upper_ci=pl.col("estimate").quantile(1 - alpha / 2),
        )
        .sort("term")
    )