-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreturn_period.py
112 lines (85 loc) · 3.81 KB
/
return_period.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
:mod:`return_period` -- return levels for empirical/fitted distributions
========================================================================
.. module:: return_period
:synopsis: Offer functions for calculating return periods or return
levels using data and/or a fitted distribution.
.. moduleauthor:: Craig Arthur <[email protected]>
"""
from __future__ import division
import numpy as np
from distributions import empiricalPDF
from itertools import product
from scipy.stats import genpareto
from scipy.optimize import curve_fit
import logging
LOG = logging.getLogger(__name__)
OBS_PER_YEAR = 365.25
def returnLevels(intervals, mu, xi, sigma, rate, npyr=OBS_PER_YEAR):
"""
Calculate return levels for specified intervals for a distribution with
the given threshold, scale and shape parameters.
:param intervals: :class:`numpy.ndarray` or float of return period intervals
to evaluate return levels for.
:param float mu: Threshold parameter (also called location).
:param float chi: Shape parameter.
:param float sigma: Scale parameter.
:param float rate: Rate of exceedances (i.e. number of observations greater
than `mu`, divided by total number of observations).
:param float npyr: Number of observations per year.
:returns: return levels for the specified return intervals.
"""
rp = mu + (sigma / xi) * (np.power(intervals * npyr * rate, xi) - 1.)
return rp
def empiricalReturnPeriod(data, npyr=OBS_PER_YEAR):
"""
Returns the empirically-based return interval (in years) for a set
of observations.
It is assumed the data are daily observations.
The highest return period should be (approximately) len(``data``)/``npy``.
:param data: :class:`numpy.ndarray` containing the observed values (with
missing values removed).
:param float npy: Number of observations per year (default=365.25)
:returns: Return periods for the observed data.
:rtype: :class:`numpy.ndarray`
"""
nobs = len(data)
# Empirical return periods:
emprp = 1. / (1. - np.arange(1, nobs + 1, 1) / (nobs + 1)) / npyr
return emprp
def returnPeriodUncertainty(data, mu, xi, sigma, intervals):
"""
Calculate uncertainty around a fit, holding threshold fixed
:param data: :class:`numpy.ndarray` containing the observed values (with
missing values removed).
:param float mu: Threshold parameter (also called location).
:param float chi: Shape parameter.
:param float sigma: Scale parameter.
:param intervals: :class:`numpy.ndarray` or float of return period intervals
to evaluate return level uncertainties for.
:returns: Array of standard deviation values for each return period, based
on all permutations of shape and scale parameters with standard
errors.
:rtype: :class:`numpy.ndarray`
"""
sortedmax = np.sort(data[data > mu])
nobs = len(sortedmax)
rate = float(nobs)/float(len(data))
emppdf = empiricalPDF(data[data > mu])
# Perform the curve fitting, holding ``mu`` fixed and allowing
# ``xi`` and ``sigma`` to vary.
try:
popt, pcov = curve_fit(lambda x, xi, sigma: \
genpareto.pdf(x, xi, loc=mu, scale=sigma),
sortedmax, emppdf, (xi, sigma))
except RuntimeError as e:
LOG.exception("Curve fitting failed: %s", e)
return np.zeros(len(intervals))
sd = np.sqrt(np.diag(pcov))
svals = (sigma - sd[1], sigma, sigma + sd[1])
mvals = (mu, mu, mu)
xvals = (xi - sd[0], xi, xi + sd[0])
rpvalues = np.array([returnLevels(intervals, m, xii, s, rate) for
(s, m, xii) in product(svals, mvals, xvals)])
rpFitError = np.std(rpvalues, axis=0)
return rpFitError