extremeloss
Extreme-value theory for loss data: peaks-over-threshold (generalized Pareto) and
block-maxima (generalized extreme value) tail fitting, tail risk measures (VaR/TVaR,
return levels), importance-sampling estimators for rare events, and bootstrap
uncertainty.
Quickstart
import numpy as np
import extremeloss as el
losses = np.random.default_rng(0).pareto(2.0, 50_000) * 100_000
# peaks-over-threshold: fit a generalized Pareto tail above a high threshold
fit = el.fit_pot(losses, threshold=np.quantile(losses, 0.95))
fit.xi, fit.beta # GPD shape and scale
el.return_level(1000, fit) # the 1-in-1000 return level
el.gpd_var(0.999, fit.threshold, fit.xi, fit.beta, fit.exceedance_fraction)
Choosing a threshold
import extremeloss as el
el.threshold_diagnostic_table(losses) # stability of xi across candidate thresholds
el.mean_excess(losses) # mean-excess (should be ~linear in the GPD range)
Bootstrap uncertainty
Tail estimates from few exceedances are noisy; bootstrap the risk measures rather than
trusting a point estimate.
import extremeloss as el
el.bootstrap_var(losses, 0.99) # CI on the 99% VaR
el.bootstrap_tail_probability(losses, threshold=500_000)
API reference
extremeloss
BootstrapResult
dataclass
Bootstrap uncertainty summary for a scalar statistic.
Source code in extremeloss/results.py
| @dataclass(slots=True)
class BootstrapResult:
"""Bootstrap uncertainty summary for a scalar statistic."""
estimate: float
bootstrap_estimates: np.ndarray
method: str = "percentile"
ci: tuple[float, float] | None = None
stderr: float | None = None
alpha: float | None = None
def summary(self) -> dict[str, Any]:
out: dict[str, Any] = {
"estimate": float(self.estimate),
"method": self.method,
"n_bootstrap": int(self.bootstrap_estimates.size),
}
if self.stderr is not None:
out["stderr"] = float(self.stderr)
if self.ci is not None:
out["ci"] = (float(self.ci[0]), float(self.ci[1]))
if self.alpha is not None:
out["alpha"] = float(self.alpha)
return out
|
GEVFit
dataclass
Fitted generalized extreme value distribution for block maxima.
Source code in extremeloss/results.py
| @dataclass(slots=True)
class GEVFit:
"""Fitted generalized extreme value distribution for block maxima."""
xi: float
loc: float
scale: float
n_blocks: int
block_size: int | None = None
fit_method: str = "mle"
covariance: np.ndarray | None = None
def return_level(self, period: float) -> float:
if period <= 1.0:
raise ValueError("period must exceed 1.0")
p = 1.0 - 1.0 / period
return float(genextreme.ppf(p, c=-self.xi, loc=self.loc, scale=self.scale))
def cdf(self, x: float) -> float:
return float(genextreme.cdf(x, c=-self.xi, loc=self.loc, scale=self.scale))
def summary(self) -> dict[str, Any]:
out: dict[str, Any] = {
"xi": float(self.xi),
"loc": float(self.loc),
"scale": float(self.scale),
"n_blocks": int(self.n_blocks),
"fit_method": self.fit_method,
}
if self.block_size is not None:
out["block_size"] = int(self.block_size)
if self.covariance is not None:
out["covariance"] = np.asarray(self.covariance, dtype=float)
return out
|
GPDFit
dataclass
Fitted generalized Pareto distribution above a threshold.
Source code in extremeloss/results.py
| @dataclass(slots=True)
class GPDFit:
"""Fitted generalized Pareto distribution above a threshold."""
threshold: float
xi: float
beta: float
exceedance_fraction: float
n_exceedances: int
fit_method: str = "mle"
covariance: np.ndarray | None = None
def tail_probability(self, x: float) -> float:
from .evt.gpd import gpd_tail_probability
return gpd_tail_probability(
x=x,
threshold=self.threshold,
xi=self.xi,
beta=self.beta,
exceedance_fraction=self.exceedance_fraction,
)
def var(self, p: float) -> float:
from .evt.gpd import gpd_var
return gpd_var(
p=p,
threshold=self.threshold,
xi=self.xi,
beta=self.beta,
exceedance_fraction=self.exceedance_fraction,
)
def tvar(self, p: float) -> float:
from .evt.gpd import gpd_tvar
return gpd_tvar(
p=p,
threshold=self.threshold,
xi=self.xi,
beta=self.beta,
exceedance_fraction=self.exceedance_fraction,
)
def return_level(self, period: float) -> float:
if period <= 1.0:
raise ValueError("period must exceed 1.0")
return self.var(1.0 - 1.0 / period)
def summary(self) -> dict[str, Any]:
out: dict[str, Any] = {
"threshold": float(self.threshold),
"xi": float(self.xi),
"beta": float(self.beta),
"exceedance_fraction": float(self.exceedance_fraction),
"n_exceedances": int(self.n_exceedances),
"fit_method": self.fit_method,
}
if self.covariance is not None:
out["covariance"] = np.asarray(self.covariance, dtype=float)
return out
|
GPDTail
dataclass
Conditional generalized-Pareto tail on [threshold, inf), for splicing.
A thin distribution object wrapping scipy.stats.genpareto with shape
xi, scale beta and location threshold. Unlike :class:GPDFit
(which carries the exceedance rate and reports unconditional VaR/TVaR),
this represents the tail conditional on X > threshold: cdf(threshold)
== 0 and the density integrates to one over [threshold, inf). It
exposes the cdf / pdf / sample / quantile / mean /
variance interface that :class:lossmodels.SplicedSeverity consumes as a
tail. Moments raise when they do not exist (xi >= 1 for the mean,
xi >= 1/2 for the variance), mirroring heavy-tailed severities.
Source code in extremeloss/results.py
| @dataclass(slots=True)
class GPDTail:
"""Conditional generalized-Pareto tail on ``[threshold, inf)``, for splicing.
A thin distribution object wrapping ``scipy.stats.genpareto`` with shape
``xi``, scale ``beta`` and location ``threshold``. Unlike :class:`GPDFit`
(which carries the exceedance rate and reports *unconditional* VaR/TVaR),
this represents the tail *conditional* on ``X > threshold``: ``cdf(threshold)
== 0`` and the density integrates to one over ``[threshold, inf)``. It
exposes the ``cdf`` / ``pdf`` / ``sample`` / ``quantile`` / ``mean`` /
``variance`` interface that :class:`lossmodels.SplicedSeverity` consumes as a
tail. Moments raise when they do not exist (``xi >= 1`` for the mean,
``xi >= 1/2`` for the variance), mirroring heavy-tailed severities.
"""
threshold: float
xi: float
beta: float
@classmethod
def from_fit(cls, fit: "GPDFit") -> "GPDTail":
"""Build a conditional tail from a fitted :class:`GPDFit`."""
return cls(threshold=float(fit.threshold), xi=float(fit.xi), beta=float(fit.beta))
def _frozen(self):
return genpareto(c=self.xi, loc=self.threshold, scale=self.beta)
def pdf(self, x):
return _scalarize(self._frozen().pdf(np.asarray(x, dtype=float)), x)
def cdf(self, x):
return _scalarize(self._frozen().cdf(np.asarray(x, dtype=float)), x)
def quantile(self, p):
return _scalarize(self._frozen().ppf(np.asarray(p, dtype=float)), p)
def ppf(self, p):
return self.quantile(p)
def sample(self, size: int = 1) -> np.ndarray:
if size <= 0:
raise ValueError("size must be positive.")
return genpareto.rvs(c=self.xi, loc=self.threshold, scale=self.beta, size=size)
def mean(self) -> float:
if self.xi >= 1.0:
raise ValueError("mean does not exist for xi >= 1.")
return float(self.threshold + self.beta / (1.0 - self.xi))
def variance(self) -> float:
if self.xi >= 0.5:
raise ValueError("variance does not exist for xi >= 1/2.")
return float(self.beta ** 2 / ((1.0 - self.xi) ** 2 * (1.0 - 2.0 * self.xi)))
def summary(self) -> dict[str, float]:
return {"threshold": float(self.threshold), "xi": float(self.xi), "beta": float(self.beta)}
|
from_fit
classmethod
from_fit(fit: 'GPDFit') -> 'GPDTail'
Build a conditional tail from a fitted :class:GPDFit.
Source code in extremeloss/results.py
| @classmethod
def from_fit(cls, fit: "GPDFit") -> "GPDTail":
"""Build a conditional tail from a fitted :class:`GPDFit`."""
return cls(threshold=float(fit.threshold), xi=float(fit.xi), beta=float(fit.beta))
|
TailEstimateResult
dataclass
Container for tail-estimation results.
Source code in extremeloss/results.py
| @dataclass(slots=True)
class TailEstimateResult:
"""Container for tail-estimation results."""
estimate: float
method: str
stderr: float | None = None
ci: tuple[float, float] | None = None
n: int | None = None
effective_n: float | None = None
threshold: float | None = None
quantile: float | None = None
diagnostics: dict[str, Any] = field(default_factory=dict)
def summary(self) -> dict[str, Any]:
out: dict[str, Any] = {
"estimate": float(self.estimate),
"method": self.method,
}
if self.stderr is not None:
out["stderr"] = float(self.stderr)
if self.ci is not None:
out["ci"] = (float(self.ci[0]), float(self.ci[1]))
if self.n is not None:
out["n"] = int(self.n)
if self.effective_n is not None:
out["effective_n"] = float(self.effective_n)
if self.threshold is not None:
out["threshold"] = float(self.threshold)
if self.quantile is not None:
out["quantile"] = float(self.quantile)
if self.diagnostics:
out["diagnostics"] = dict(self.diagnostics)
return out
|
ThresholdScan
dataclass
Threshold-diagnostic results across a grid of thresholds.
Source code in extremeloss/results.py
| @dataclass(slots=True)
class ThresholdScan:
"""Threshold-diagnostic results across a grid of thresholds."""
thresholds: np.ndarray
mean_excess: np.ndarray
xi: np.ndarray
beta: np.ndarray
n_exceedances: np.ndarray
def to_dict(self) -> dict[str, np.ndarray]:
return {
"thresholds": np.asarray(self.thresholds, dtype=float),
"mean_excess": np.asarray(self.mean_excess, dtype=float),
"xi": np.asarray(self.xi, dtype=float),
"beta": np.asarray(self.beta, dtype=float),
"n_exceedances": np.asarray(self.n_exceedances, dtype=int),
}
|
estimate_tail_probability
estimate_tail_probability(
data,
threshold: float,
*,
size: int | None = None,
alpha: float = 0.05
) -> TailEstimateResult
Estimate P(X > threshold) from simulated or observed losses.
Source code in extremeloss/estimation/rare_event.py
| def estimate_tail_probability(
data,
threshold: float,
*,
size: int | None = None,
alpha: float = 0.05,
) -> TailEstimateResult:
"""Estimate P(X > threshold) from simulated or observed losses."""
validate_threshold(threshold)
validate_alpha(alpha)
losses = coerce_losses(data, size=size)
indicators = (losses > threshold).astype(float)
estimate = float(np.mean(indicators))
stderr = float(np.std(indicators, ddof=0) / math.sqrt(losses.size))
ci = _normal_ci(estimate, stderr, alpha)
return TailEstimateResult(
estimate=estimate,
method="empirical",
stderr=stderr,
ci=ci,
n=int(losses.size),
threshold=float(threshold),
diagnostics={"n_exceedances": int(np.sum(indicators))},
)
|
estimate_tail_probability_cmc
estimate_tail_probability_cmc(
conditional_probabilities,
*,
threshold: float | None = None,
alpha: float = 0.05
) -> TailEstimateResult
Estimate an exceedance probability from conditional exceedance probabilities.
Parameters
conditional_probabilities:
Samples of P(X > u | Y) from a conditioning variable Y.
threshold:
Optional tail threshold associated with the conditional probabilities.
Source code in extremeloss/estimation/conditional_mc.py
| def estimate_tail_probability_cmc(
conditional_probabilities,
*,
threshold: float | None = None,
alpha: float = 0.05,
) -> TailEstimateResult:
"""Estimate an exceedance probability from conditional exceedance probabilities.
Parameters
----------
conditional_probabilities:
Samples of P(X > u | Y) from a conditioning variable Y.
threshold:
Optional tail threshold associated with the conditional probabilities.
"""
validate_alpha(alpha)
if threshold is not None:
validate_threshold(threshold)
probs = validate_probabilities(conditional_probabilities, name="conditional_probabilities")
estimate = float(np.mean(probs))
stderr = float(np.std(probs, ddof=0) / math.sqrt(probs.size))
return TailEstimateResult(
estimate=estimate,
method="conditional_monte_carlo",
stderr=stderr,
ci=_normal_ci(estimate, stderr, alpha),
n=int(probs.size),
threshold=float(threshold) if threshold is not None else None,
diagnostics={
"min_conditional_probability": float(np.min(probs)),
"max_conditional_probability": float(np.max(probs)),
},
)
|
estimate_tvar_cmc
estimate_tvar_cmc(
conditional_tail_expectations,
*,
q: float,
threshold: float | None = None,
alpha: float = 0.05
) -> TailEstimateResult
Estimate TVaR from conditional expectations of tail losses.
conditional_tail_expectations should contain draws of E[X | X >= VaR_q, Y]
or another conditionally unbiased TVaR contribution.
Source code in extremeloss/estimation/conditional_mc.py
| def estimate_tvar_cmc(
conditional_tail_expectations,
*,
q: float,
threshold: float | None = None,
alpha: float = 0.05,
) -> TailEstimateResult:
"""Estimate TVaR from conditional expectations of tail losses.
`conditional_tail_expectations` should contain draws of E[X | X >= VaR_q, Y]
or another conditionally unbiased TVaR contribution.
"""
validate_q(q)
validate_alpha(alpha)
if threshold is not None:
validate_threshold(threshold)
vals = np.asarray(conditional_tail_expectations, dtype=float)
if vals.ndim != 1 or vals.size == 0:
raise ValueError("conditional_tail_expectations must be a non-empty 1D array")
estimate = float(np.mean(vals))
stderr = float(np.std(vals, ddof=0) / math.sqrt(vals.size))
return TailEstimateResult(
estimate=estimate,
method="conditional_monte_carlo",
stderr=stderr,
ci=_normal_ci(estimate, stderr, alpha),
n=int(vals.size),
threshold=float(threshold) if threshold is not None else None,
quantile=float(q),
diagnostics={
"min_conditional_expectation": float(np.min(vals)),
"max_conditional_expectation": float(np.max(vals)),
},
)
|
fit_gpd
fit_gpd(
excesses, threshold: float = 0.0, method: str = "mle"
) -> GPDFit
Fit a generalized Pareto distribution to excess losses.
Source code in extremeloss/evt/gpd.py
| def fit_gpd(excesses, threshold: float = 0.0, method: str = "mle") -> GPDFit:
"""Fit a generalized Pareto distribution to excess losses."""
if method != "mle":
raise ValueError("only method='mle' is currently supported")
validate_threshold(threshold)
x = as_1d_float_array(excesses, name="excesses")
if np.any(x <= 0.0):
raise ValueError("excesses must be strictly positive")
xi_hat, loc_hat, beta_hat = genpareto.fit(x, floc=0.0)
if loc_hat != 0.0:
raise RuntimeError("GPD fit returned nonzero location despite floc=0")
return GPDFit(
threshold=float(threshold),
xi=float(xi_hat),
beta=float(beta_hat),
exceedance_fraction=1.0,
n_exceedances=int(x.size),
fit_method=method,
)
|
fit_spliced_gpd
fit_spliced_gpd(
body,
data,
*,
threshold: float,
weight: float | None = None
)
Fit a GPD tail above threshold (peaks-over-threshold) and splice it
onto body, returning a lossmodels.SplicedSeverity.
Parameters
body : severity model
Any fitted body severity (e.g. a lossmodels Lognormal).
data : array-like
Loss sample used to fit the tail and, by default, to set the body mass.
threshold : float
Peaks-over-threshold cutoff u.
weight : float, optional
Body mass P(X <= u). Defaults to 1 - exceedance_fraction from the
POT fit (the empirical fraction at or below the threshold), consistent
with the fitted exceedance rate.
Requires the lossmodels package.
Source code in extremeloss/integration.py
| def fit_spliced_gpd(body, data, *, threshold: float, weight: float | None = None):
"""Fit a GPD tail above ``threshold`` (peaks-over-threshold) and splice it
onto ``body``, returning a ``lossmodels.SplicedSeverity``.
Parameters
----------
body : severity model
Any fitted body severity (e.g. a ``lossmodels`` ``Lognormal``).
data : array-like
Loss sample used to fit the tail and, by default, to set the body mass.
threshold : float
Peaks-over-threshold cutoff ``u``.
weight : float, optional
Body mass ``P(X <= u)``. Defaults to ``1 - exceedance_fraction`` from the
POT fit (the empirical fraction at or below the threshold), consistent
with the fitted exceedance rate.
Requires the ``lossmodels`` package.
"""
SplicedSeverity = _require_spliced_severity()
losses = as_1d_float_array(data, name="data")
fit = fit_pot(losses, threshold=threshold)
tail = GPDTail.from_fit(fit)
if weight is None:
weight = float(1.0 - fit.exceedance_fraction)
return SplicedSeverity(body=body, tail=tail, threshold=float(fit.threshold), weight=weight)
|
splice_gpd_tail
splice_gpd_tail(body, fit, *, weight: float | None = None)
Splice an already-fitted GPD tail (a :class:GPDFit) onto body.
Returns a lossmodels.SplicedSeverity whose body is body (any fitted
body severity) and whose tail is the conditional GPD of fit above its
threshold. The mixing weight defaults to the body mass implied by the fit,
1 - fit.exceedance_fraction (i.e. P(X <= threshold)).
Source code in extremeloss/integration.py
| def splice_gpd_tail(body, fit, *, weight: float | None = None):
"""Splice an already-fitted GPD tail (a :class:`GPDFit`) onto ``body``.
Returns a ``lossmodels.SplicedSeverity`` whose body is ``body`` (any fitted
body severity) and whose tail is the conditional GPD of ``fit`` above its
threshold. The mixing weight defaults to the body mass implied by the fit,
``1 - fit.exceedance_fraction`` (i.e. ``P(X <= threshold)``).
"""
SplicedSeverity = _require_spliced_severity()
tail = GPDTail.from_fit(fit)
if weight is None:
weight = float(1.0 - fit.exceedance_fraction)
return SplicedSeverity(body=body, tail=tail, threshold=float(fit.threshold), weight=weight)
|
sample_lossmodel
sample_lossmodel(model, size: int) -> np.ndarray
Sample losses from a lossmodels-style severity or aggregate model.
Source code in extremeloss/integration.py
| def sample_lossmodel(model, size: int) -> np.ndarray:
"""Sample losses from a lossmodels-style severity or aggregate model."""
return coerce_losses(model, size=size)
|