Skip to content

OpenActuarial

extremeloss

OpenActuarial/docs

extremeloss¶

Extreme-value theory for loss data: peaks-over-threshold (generalized Pareto) and block-maxima (generalized extreme value) tail fitting, tail risk measures (VaR/TVaR, return levels), importance-sampling estimators for rare events, and bootstrap uncertainty.

Quickstart¶

import numpy as np
import extremeloss as el

losses = np.random.default_rng(0).pareto(2.0, 50_000) * 100_000

# peaks-over-threshold: fit a generalized Pareto tail above a high threshold
fit = el.fit_pot(losses, threshold=np.quantile(losses, 0.95))
fit.xi, fit.beta                       # GPD shape and scale
el.return_level(1000, fit)             # the 1-in-1000 return level
el.gpd_var(0.999, fit.threshold, fit.xi, fit.beta, fit.exceedance_fraction)

Choosing a threshold¶

import extremeloss as el

el.threshold_diagnostic_table(losses)  # stability of xi across candidate thresholds
el.mean_excess(losses)                 # mean-excess (should be ~linear in the GPD range)

Bootstrap uncertainty¶

Tail estimates from few exceedances are noisy; bootstrap the risk measures rather than trusting a point estimate.

import extremeloss as el

el.bootstrap_var(losses, 0.99)         # CI on the 99% VaR
el.bootstrap_tail_probability(losses, threshold=500_000)

API reference¶

extremeloss ¶

BootstrapResult `dataclass` ¶

Bootstrap uncertainty summary for a scalar statistic.

Source code in extremeloss/results.py

@dataclass(slots=True)
class BootstrapResult:
    """Bootstrap uncertainty summary for a scalar statistic."""

    estimate: float
    bootstrap_estimates: np.ndarray
    method: str = "percentile"
    ci: tuple[float, float] | None = None
    stderr: float | None = None
    alpha: float | None = None

    def summary(self) -> dict[str, Any]:
        out: dict[str, Any] = {
            "estimate": float(self.estimate),
            "method": self.method,
            "n_bootstrap": int(self.bootstrap_estimates.size),
        }
        if self.stderr is not None:
            out["stderr"] = float(self.stderr)
        if self.ci is not None:
            out["ci"] = (float(self.ci[0]), float(self.ci[1]))
        if self.alpha is not None:
            out["alpha"] = float(self.alpha)
        return out

GEVFit `dataclass` ¶

Fitted generalized extreme value distribution for block maxima.

Source code in extremeloss/results.py

@dataclass(slots=True)
class GEVFit:
    """Fitted generalized extreme value distribution for block maxima."""

    xi: float
    loc: float
    scale: float
    n_blocks: int
    block_size: int | None = None
    fit_method: str = "mle"
    covariance: np.ndarray | None = None

    def return_level(self, period: float) -> float:
        if period <= 1.0:
            raise ValueError("period must exceed 1.0")
        p = 1.0 - 1.0 / period
        return float(genextreme.ppf(p, c=-self.xi, loc=self.loc, scale=self.scale))

    def cdf(self, x: float) -> float:
        return float(genextreme.cdf(x, c=-self.xi, loc=self.loc, scale=self.scale))

    def summary(self) -> dict[str, Any]:
        out: dict[str, Any] = {
            "xi": float(self.xi),
            "loc": float(self.loc),
            "scale": float(self.scale),
            "n_blocks": int(self.n_blocks),
            "fit_method": self.fit_method,
        }
        if self.block_size is not None:
            out["block_size"] = int(self.block_size)
        if self.covariance is not None:
            out["covariance"] = np.asarray(self.covariance, dtype=float)
        return out

GPDFit `dataclass` ¶

Fitted generalized Pareto distribution above a threshold.

Source code in extremeloss/results.py

@dataclass(slots=True)
class GPDFit:
    """Fitted generalized Pareto distribution above a threshold."""

    threshold: float
    xi: float
    beta: float
    exceedance_fraction: float
    n_exceedances: int
    fit_method: str = "mle"
    covariance: np.ndarray | None = None

    def tail_probability(self, x: float) -> float:
        from .evt.gpd import gpd_tail_probability

        return gpd_tail_probability(
            x=x,
            threshold=self.threshold,
            xi=self.xi,
            beta=self.beta,
            exceedance_fraction=self.exceedance_fraction,
        )

    def var(self, p: float) -> float:
        from .evt.gpd import gpd_var

        return gpd_var(
            p=p,
            threshold=self.threshold,
            xi=self.xi,
            beta=self.beta,
            exceedance_fraction=self.exceedance_fraction,
        )

    def tvar(self, p: float) -> float:
        from .evt.gpd import gpd_tvar

        return gpd_tvar(
            p=p,
            threshold=self.threshold,
            xi=self.xi,
            beta=self.beta,
            exceedance_fraction=self.exceedance_fraction,
        )

    def return_level(self, period: float) -> float:
        if period <= 1.0:
            raise ValueError("period must exceed 1.0")
        return self.var(1.0 - 1.0 / period)

    def summary(self) -> dict[str, Any]:
        out: dict[str, Any] = {
            "threshold": float(self.threshold),
            "xi": float(self.xi),
            "beta": float(self.beta),
            "exceedance_fraction": float(self.exceedance_fraction),
            "n_exceedances": int(self.n_exceedances),
            "fit_method": self.fit_method,
        }
        if self.covariance is not None:
            out["covariance"] = np.asarray(self.covariance, dtype=float)
        return out

GPDTail `dataclass` ¶

Conditional generalized-Pareto tail on [threshold, inf), for splicing.

A thin distribution object wrapping scipy.stats.genpareto with shape xi, scale beta and location threshold. Unlike :class:GPDFit (which carries the exceedance rate and reports unconditional VaR/TVaR), this represents the tail conditional on X > threshold: cdf(threshold) == 0 and the density integrates to one over [threshold, inf). It exposes the cdf / pdf / sample / quantile / mean / variance interface that :class:lossmodels.SplicedSeverity consumes as a tail. Moments raise when they do not exist (xi >= 1 for the mean, xi >= 1/2 for the variance), mirroring heavy-tailed severities.

Source code in extremeloss/results.py

@dataclass(slots=True)
class GPDTail:
    """Conditional generalized-Pareto tail on ``[threshold, inf)``, for splicing.

    A thin distribution object wrapping ``scipy.stats.genpareto`` with shape
    ``xi``, scale ``beta`` and location ``threshold``. Unlike :class:`GPDFit`
    (which carries the exceedance rate and reports *unconditional* VaR/TVaR),
    this represents the tail *conditional* on ``X > threshold``: ``cdf(threshold)
    == 0`` and the density integrates to one over ``[threshold, inf)``. It
    exposes the ``cdf`` / ``pdf`` / ``sample`` / ``quantile`` / ``mean`` /
    ``variance`` interface that :class:`lossmodels.SplicedSeverity` consumes as a
    tail. Moments raise when they do not exist (``xi >= 1`` for the mean,
    ``xi >= 1/2`` for the variance), mirroring heavy-tailed severities.
    """

    threshold: float
    xi: float
    beta: float

    @classmethod
    def from_fit(cls, fit: "GPDFit") -> "GPDTail":
        """Build a conditional tail from a fitted :class:`GPDFit`."""
        return cls(threshold=float(fit.threshold), xi=float(fit.xi), beta=float(fit.beta))

    def _frozen(self):
        return genpareto(c=self.xi, loc=self.threshold, scale=self.beta)

    def pdf(self, x):
        return _scalarize(self._frozen().pdf(np.asarray(x, dtype=float)), x)

    def cdf(self, x):
        return _scalarize(self._frozen().cdf(np.asarray(x, dtype=float)), x)

    def quantile(self, p):
        return _scalarize(self._frozen().ppf(np.asarray(p, dtype=float)), p)

    def ppf(self, p):
        return self.quantile(p)

    def sample(self, size: int = 1) -> np.ndarray:
        if size <= 0:
            raise ValueError("size must be positive.")
        return genpareto.rvs(c=self.xi, loc=self.threshold, scale=self.beta, size=size)

    def mean(self) -> float:
        if self.xi >= 1.0:
            raise ValueError("mean does not exist for xi >= 1.")
        return float(self.threshold + self.beta / (1.0 - self.xi))

    def variance(self) -> float:
        if self.xi >= 0.5:
            raise ValueError("variance does not exist for xi >= 1/2.")
        return float(self.beta ** 2 / ((1.0 - self.xi) ** 2 * (1.0 - 2.0 * self.xi)))

    def summary(self) -> dict[str, float]:
        return {"threshold": float(self.threshold), "xi": float(self.xi), "beta": float(self.beta)}

from_fit `classmethod` ¶

from_fit(fit: 'GPDFit') -> 'GPDTail'

Build a conditional tail from a fitted :class:GPDFit.

Source code in extremeloss/results.py

@classmethod
def from_fit(cls, fit: "GPDFit") -> "GPDTail":
    """Build a conditional tail from a fitted :class:`GPDFit`."""
    return cls(threshold=float(fit.threshold), xi=float(fit.xi), beta=float(fit.beta))

TailEstimateResult `dataclass` ¶

Container for tail-estimation results.

Source code in extremeloss/results.py

@dataclass(slots=True)
class TailEstimateResult:
    """Container for tail-estimation results."""

    estimate: float
    method: str
    stderr: float | None = None
    ci: tuple[float, float] | None = None
    n: int | None = None
    effective_n: float | None = None
    threshold: float | None = None
    quantile: float | None = None
    diagnostics: dict[str, Any] = field(default_factory=dict)

    def summary(self) -> dict[str, Any]:
        out: dict[str, Any] = {
            "estimate": float(self.estimate),
            "method": self.method,
        }
        if self.stderr is not None:
            out["stderr"] = float(self.stderr)
        if self.ci is not None:
            out["ci"] = (float(self.ci[0]), float(self.ci[1]))
        if self.n is not None:
            out["n"] = int(self.n)
        if self.effective_n is not None:
            out["effective_n"] = float(self.effective_n)
        if self.threshold is not None:
            out["threshold"] = float(self.threshold)
        if self.quantile is not None:
            out["quantile"] = float(self.quantile)
        if self.diagnostics:
            out["diagnostics"] = dict(self.diagnostics)
        return out

ThresholdScan `dataclass` ¶

Threshold-diagnostic results across a grid of thresholds.

Source code in extremeloss/results.py

@dataclass(slots=True)
class ThresholdScan:
    """Threshold-diagnostic results across a grid of thresholds."""

    thresholds: np.ndarray
    mean_excess: np.ndarray
    xi: np.ndarray
    beta: np.ndarray
    n_exceedances: np.ndarray

    def to_dict(self) -> dict[str, np.ndarray]:
        return {
            "thresholds": np.asarray(self.thresholds, dtype=float),
            "mean_excess": np.asarray(self.mean_excess, dtype=float),
            "xi": np.asarray(self.xi, dtype=float),
            "beta": np.asarray(self.beta, dtype=float),
            "n_exceedances": np.asarray(self.n_exceedances, dtype=int),
        }

estimate_tail_probability ¶

estimate_tail_probability(
    data,
    threshold: float,
    *,
    size: int | None = None,
    alpha: float = 0.05
) -> TailEstimateResult

Estimate P(X > threshold) from simulated or observed losses.

Source code in extremeloss/estimation/rare_event.py

def estimate_tail_probability(
    data,
    threshold: float,
    *,
    size: int | None = None,
    alpha: float = 0.05,
) -> TailEstimateResult:
    """Estimate P(X > threshold) from simulated or observed losses."""
    validate_threshold(threshold)
    validate_alpha(alpha)
    losses = coerce_losses(data, size=size)
    indicators = (losses > threshold).astype(float)
    estimate = float(np.mean(indicators))
    stderr = float(np.std(indicators, ddof=0) / math.sqrt(losses.size))
    ci = _normal_ci(estimate, stderr, alpha)
    return TailEstimateResult(
        estimate=estimate,
        method="empirical",
        stderr=stderr,
        ci=ci,
        n=int(losses.size),
        threshold=float(threshold),
        diagnostics={"n_exceedances": int(np.sum(indicators))},
    )

estimate_tail_probability_cmc ¶

estimate_tail_probability_cmc(
    conditional_probabilities,
    *,
    threshold: float | None = None,
    alpha: float = 0.05
) -> TailEstimateResult

Estimate an exceedance probability from conditional exceedance probabilities.

Parameters¶

conditional_probabilities: Samples of P(X > u | Y) from a conditioning variable Y. threshold: Optional tail threshold associated with the conditional probabilities.

Source code in extremeloss/estimation/conditional_mc.py

def estimate_tail_probability_cmc(
    conditional_probabilities,
    *,
    threshold: float | None = None,
    alpha: float = 0.05,
) -> TailEstimateResult:
    """Estimate an exceedance probability from conditional exceedance probabilities.

    Parameters
    ----------
    conditional_probabilities:
        Samples of P(X > u | Y) from a conditioning variable Y.
    threshold:
        Optional tail threshold associated with the conditional probabilities.
    """
    validate_alpha(alpha)
    if threshold is not None:
        validate_threshold(threshold)
    probs = validate_probabilities(conditional_probabilities, name="conditional_probabilities")
    estimate = float(np.mean(probs))
    stderr = float(np.std(probs, ddof=0) / math.sqrt(probs.size))
    return TailEstimateResult(
        estimate=estimate,
        method="conditional_monte_carlo",
        stderr=stderr,
        ci=_normal_ci(estimate, stderr, alpha),
        n=int(probs.size),
        threshold=float(threshold) if threshold is not None else None,
        diagnostics={
            "min_conditional_probability": float(np.min(probs)),
            "max_conditional_probability": float(np.max(probs)),
        },
    )

estimate_tvar_cmc ¶

estimate_tvar_cmc(
    conditional_tail_expectations,
    *,
    q: float,
    threshold: float | None = None,
    alpha: float = 0.05
) -> TailEstimateResult

Estimate TVaR from conditional expectations of tail losses.

conditional_tail_expectations should contain draws of E[X | X >= VaR_q, Y] or another conditionally unbiased TVaR contribution.

Source code in extremeloss/estimation/conditional_mc.py

def estimate_tvar_cmc(
    conditional_tail_expectations,
    *,
    q: float,
    threshold: float | None = None,
    alpha: float = 0.05,
) -> TailEstimateResult:
    """Estimate TVaR from conditional expectations of tail losses.

    `conditional_tail_expectations` should contain draws of E[X | X >= VaR_q, Y]
    or another conditionally unbiased TVaR contribution.
    """
    validate_q(q)
    validate_alpha(alpha)
    if threshold is not None:
        validate_threshold(threshold)
    vals = np.asarray(conditional_tail_expectations, dtype=float)
    if vals.ndim != 1 or vals.size == 0:
        raise ValueError("conditional_tail_expectations must be a non-empty 1D array")
    estimate = float(np.mean(vals))
    stderr = float(np.std(vals, ddof=0) / math.sqrt(vals.size))
    return TailEstimateResult(
        estimate=estimate,
        method="conditional_monte_carlo",
        stderr=stderr,
        ci=_normal_ci(estimate, stderr, alpha),
        n=int(vals.size),
        threshold=float(threshold) if threshold is not None else None,
        quantile=float(q),
        diagnostics={
            "min_conditional_expectation": float(np.min(vals)),
            "max_conditional_expectation": float(np.max(vals)),
        },
    )

fit_gpd ¶

fit_gpd(
    excesses, threshold: float = 0.0, method: str = "mle"
) -> GPDFit

Fit a generalized Pareto distribution to excess losses.

Source code in extremeloss/evt/gpd.py

def fit_gpd(excesses, threshold: float = 0.0, method: str = "mle") -> GPDFit:
    """Fit a generalized Pareto distribution to excess losses."""
    if method != "mle":
        raise ValueError("only method='mle' is currently supported")
    validate_threshold(threshold)
    x = as_1d_float_array(excesses, name="excesses")
    if np.any(x <= 0.0):
        raise ValueError("excesses must be strictly positive")
    xi_hat, loc_hat, beta_hat = genpareto.fit(x, floc=0.0)
    if loc_hat != 0.0:
        raise RuntimeError("GPD fit returned nonzero location despite floc=0")
    return GPDFit(
        threshold=float(threshold),
        xi=float(xi_hat),
        beta=float(beta_hat),
        exceedance_fraction=1.0,
        n_exceedances=int(x.size),
        fit_method=method,
    )

fit_spliced_gpd ¶

fit_spliced_gpd(
    body,
    data,
    *,
    threshold: float,
    weight: float | None = None
)

Fit a GPD tail above threshold (peaks-over-threshold) and splice it onto body, returning a lossmodels.SplicedSeverity.

Parameters¶

body : severity model Any fitted body severity (e.g. a lossmodels Lognormal). data : array-like Loss sample used to fit the tail and, by default, to set the body mass. threshold : float Peaks-over-threshold cutoff u. weight : float, optional Body mass P(X <= u). Defaults to 1 - exceedance_fraction from the POT fit (the empirical fraction at or below the threshold), consistent with the fitted exceedance rate.

Requires the lossmodels package.

Source code in extremeloss/integration.py

def fit_spliced_gpd(body, data, *, threshold: float, weight: float | None = None):
    """Fit a GPD tail above ``threshold`` (peaks-over-threshold) and splice it
    onto ``body``, returning a ``lossmodels.SplicedSeverity``.

    Parameters
    ----------
    body : severity model
        Any fitted body severity (e.g. a ``lossmodels`` ``Lognormal``).
    data : array-like
        Loss sample used to fit the tail and, by default, to set the body mass.
    threshold : float
        Peaks-over-threshold cutoff ``u``.
    weight : float, optional
        Body mass ``P(X <= u)``. Defaults to ``1 - exceedance_fraction`` from the
        POT fit (the empirical fraction at or below the threshold), consistent
        with the fitted exceedance rate.

    Requires the ``lossmodels`` package.
    """
    SplicedSeverity = _require_spliced_severity()
    losses = as_1d_float_array(data, name="data")
    fit = fit_pot(losses, threshold=threshold)
    tail = GPDTail.from_fit(fit)
    if weight is None:
        weight = float(1.0 - fit.exceedance_fraction)
    return SplicedSeverity(body=body, tail=tail, threshold=float(fit.threshold), weight=weight)

splice_gpd_tail ¶

splice_gpd_tail(body, fit, *, weight: float | None = None)

Splice an already-fitted GPD tail (a :class:GPDFit) onto body.

Returns a lossmodels.SplicedSeverity whose body is body (any fitted body severity) and whose tail is the conditional GPD of fit above its threshold. The mixing weight defaults to the body mass implied by the fit, 1 - fit.exceedance_fraction (i.e. P(X <= threshold)).

Source code in extremeloss/integration.py

def splice_gpd_tail(body, fit, *, weight: float | None = None):
    """Splice an already-fitted GPD tail (a :class:`GPDFit`) onto ``body``.

    Returns a ``lossmodels.SplicedSeverity`` whose body is ``body`` (any fitted
    body severity) and whose tail is the conditional GPD of ``fit`` above its
    threshold. The mixing weight defaults to the body mass implied by the fit,
    ``1 - fit.exceedance_fraction`` (i.e. ``P(X <= threshold)``).
    """
    SplicedSeverity = _require_spliced_severity()
    tail = GPDTail.from_fit(fit)
    if weight is None:
        weight = float(1.0 - fit.exceedance_fraction)
    return SplicedSeverity(body=body, tail=tail, threshold=float(fit.threshold), weight=weight)

sample_lossmodel ¶

sample_lossmodel(model, size: int) -> np.ndarray

Sample losses from a lossmodels-style severity or aggregate model.

Source code in extremeloss/integration.py

def sample_lossmodel(model, size: int) -> np.ndarray:
    """Sample losses from a lossmodels-style severity or aggregate model."""
    return coerce_losses(model, size=size)