Skip to content

Commit 095b222

Browse files
committedJan 4, 2021
Fix issue with zero precipitation for distributions that use Gamma function
1 parent f666fab commit 095b222

File tree

3 files changed

+214
-5
lines changed

3 files changed

+214
-5
lines changed
 

‎data/monthly_data_v2.csv

+194
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"date","precip"
2+
"1990-01-01",161
3+
"1990-02-01",40.4
4+
"1990-03-01",65
5+
"1990-04-01",83.8
6+
"1990-05-01",42.8
7+
"1990-06-01",53.4
8+
"1990-07-01",43.2
9+
"1990-08-01",37.4
10+
"1990-09-01",10.6
11+
"1990-10-01",1
12+
"1990-11-01",44.6
13+
"1990-12-01",94.6
14+
"1991-01-01",143
15+
"1991-02-01",25.8
16+
"1991-03-01",204.4
17+
"1991-04-01",247.4
18+
"1991-05-01",33.4
19+
"1991-06-01",36
20+
"1991-07-01",20
21+
"1991-08-01",27.6
22+
"1991-09-01",8.4
23+
"1991-10-01",24.6
24+
"1991-11-01",96.4
25+
"1991-12-01",192
26+
"1992-01-01",145.2
27+
"1992-02-01",38.8
28+
"1992-03-01",56.6
29+
"1992-04-01",103.6
30+
"1992-05-01",49.8
31+
"1992-06-01",54.2
32+
"1992-07-01",64.4
33+
"1992-08-01",0.4
34+
"1992-09-01",108.2
35+
"1992-10-01",13.6
36+
"1992-11-01",44.8
37+
"1992-12-01",49.2
38+
"1993-01-01",125
39+
"1993-02-01",5.2
40+
"1993-03-01",80.8
41+
"1993-04-01",113.4
42+
"1993-05-01",81.6
43+
"1993-06-01",52
44+
"1993-07-01",21.4
45+
"1993-08-01",22.2
46+
"1993-09-01",19.8
47+
"1993-10-01",50.8
48+
"1993-11-01",26.6
49+
"1993-12-01",141.6
50+
"1994-01-01",279.2
51+
"1994-02-01",131.2
52+
"1994-03-01",103.2
53+
"1994-04-01",31
54+
"1994-05-01",18.4
55+
"1994-06-01",14.4
56+
"1994-07-01",0.6
57+
"1994-08-01",17.4
58+
"1994-09-01",20.8
59+
"1994-10-01",57.2
60+
"1994-11-01",161.4
61+
"1994-12-01",221.8
62+
"1995-01-01",91
63+
"1995-02-01",34.8
64+
"1995-03-01",12.4
65+
"1995-04-01",48.8
66+
"1995-05-01",52.8
67+
"1995-06-01",89.2
68+
"1995-07-01",9.8
69+
"1995-08-01",23.4
70+
"1995-09-01",14.6
71+
"1995-10-01",129.8
72+
"1995-11-01",133.2
73+
"1995-12-01",78.2
74+
"1996-01-01",174
75+
"1996-02-01",106.6
76+
"1996-03-01",78.8
77+
"1996-04-01",48.4
78+
"1996-05-01",10
79+
"1996-06-01",43.8
80+
"1996-07-01",32.4
81+
"1996-08-01",55.2
82+
"1996-09-01",57
83+
"1996-10-01",21.4
84+
"1996-11-01",85.4
85+
"1996-12-01",82.6
86+
"1997-01-01",99
87+
"1997-02-01",57.2
88+
"1997-03-01",20.6
89+
"1997-04-01",3.2
90+
"1997-05-01",9
91+
"1997-06-01",112.6
92+
"1997-07-01",1
93+
"1997-08-01",91.2
94+
"1997-09-01",32.4
95+
"1997-10-01",55.2
96+
"1997-11-01",61.8
97+
"1997-12-01",79.2
98+
"1998-01-01",184.4
99+
"1998-02-01",216.8
100+
"1998-03-01",41.8
101+
"1998-04-01",16.8
102+
"1998-05-01",52
103+
"1998-06-01",121.8
104+
"1998-07-01",91.4
105+
"1998-08-01",15.8
106+
"1998-09-01",55.6
107+
"1998-10-01",55.2
108+
"1998-11-01",326.6
109+
"1998-12-01",62.6
110+
"1999-01-01",77.4
111+
"1999-02-01",131.8
112+
"1999-03-01",47.6
113+
"1999-04-01",195.2
114+
"1999-05-01",240.8
115+
"1999-06-01",87.6
116+
"1999-07-01",3.6
117+
"1999-08-01",4
118+
"1999-09-01",23.4
119+
"1999-10-01",57.4
120+
"1999-11-01",31.4
121+
"1999-12-01",172.2
122+
"2000-01-01",40.2
123+
"2000-02-01",272.4
124+
"2000-03-01",162.2
125+
"2000-04-01",36.4
126+
"2000-05-01",61.2
127+
"2000-06-01",6.2
128+
"2000-07-01",27.8
129+
"2000-08-01",104.6
130+
"2000-09-01",103.6
131+
"2000-10-01",306.4
132+
"2000-11-01",57.8
133+
"2000-12-01",479.8
134+
"2001-01-01",315.8
135+
"2001-02-01",157
136+
"2001-03-01",149.2
137+
"2001-04-01",100.2
138+
"2001-05-01",69.2
139+
"2001-06-01",10.6
140+
"2001-07-01",12.4
141+
"2001-08-01",76.8
142+
"2001-09-01",18.6
143+
"2001-10-01",117.8
144+
"2001-11-01",15.6
145+
"2001-12-01",132.4
146+
"2002-01-01",343.4
147+
"2002-02-01",162.2
148+
"2002-03-01",118.2
149+
"2002-04-01",149
150+
"2002-05-01",36.4
151+
"2002-06-01",122
152+
"2002-07-01",51.2
153+
"2002-08-01",0.2
154+
"2002-09-01",5.6
155+
"2002-10-01",21.4
156+
"2002-11-01",116.8
157+
"2002-12-01",50.8
158+
"2003-01-01",281.6
159+
"2003-02-01",250.6
160+
"2003-03-01",172.4
161+
"2003-04-01",104.6
162+
"2003-05-01",44
163+
"2003-06-01",59.4
164+
"2003-07-01",39
165+
"2003-08-01",0.2
166+
"2003-09-01",13.2
167+
"2003-10-01",22.6
168+
"2003-11-01",104.8
169+
"2003-12-01",19.2
170+
"2004-01-01",143.8
171+
"2004-02-01",15.8
172+
"2004-03-01",164.4
173+
"2004-04-01",14.2
174+
"2004-05-01",28.6
175+
"2004-06-01",16.2
176+
"2004-07-01",14.2
177+
"2004-08-01",93.2
178+
"2004-09-01",27.6
179+
"2004-10-01",5.8
180+
"2004-11-01",142
181+
"2004-12-01",124.4
182+
"2005-01-01",322.4
183+
"2005-02-01",274.8
184+
"2005-03-01",161.4
185+
"2005-04-01",146.2
186+
"2005-05-01",248.8
187+
"2005-06-01",34.6
188+
"2005-07-01",9.2
189+
"2005-08-01",27.6
190+
"2005-09-01",54.2
191+
"2005-10-01",55.8
192+
"2005-11-01",74.2
193+
"2005-12-01",36.2
194+
"2006-01-01",51.4

‎docs/wmo_199_en.pdf

2.97 MB
Binary file not shown.

‎standard_precip/base_sp.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BaseStandardIndex(object):
3131

3232
def __init__(self):
3333
self.distrb = None
34+
self.non_zero_distr = ['gam', 'pe3']
3435

3536
@staticmethod
3637
def rolling_window_sum(df: pd.DataFrame, span: int, window_type, center, **kwargs):
@@ -156,11 +157,19 @@ def fit_distribution(self, data: np.array, dist_type: str, fit_type: str='lmom',
156157
'''
157158
Fit given distribution to historical precipitation data.
158159
The fit is accomplished using either L-moments or MLE (Maximum Likelihood Estimation).
160+
161+
For distributions that use the Gamma Function (Gamma and Pearson 3)
159162
'''
160163

161164
# Get distribution type
162165
self.distrb = getattr(distr, dist_type)
163166

167+
# Determine zeros if distribution can not handle x = 0
168+
p_zero = None
169+
if dist_type in self.non_zero_distr:
170+
p_zero = data[data == 0].shape[0] / data.shape[0]
171+
data = data[data != 0]
172+
164173
# Fit distribution
165174
if fit_type == 'lmom':
166175
params = self.distrb.lmom_fit(data, **kwargs)
@@ -171,7 +180,7 @@ def fit_distribution(self, data: np.array, dist_type: str, fit_type: str='lmom',
171180
else:
172181
raise AttributeError(f"{fit_type} is not an option. Option fit_types are mle and lmom")
173182

174-
return params
183+
return params, p_zero
175184

176185
def calculate(self, df: pd.DataFrame, date_col: str, precip_cols: list, freq: str="M",
177186
freq_col: str=None, fit_type: str='lmom', dist_type: str='gam', **dist_kwargs):
@@ -245,6 +254,9 @@ def calculate(self, df: pd.DataFrame, date_col: str, precip_cols: list, freq: st
245254
if isinstance(precip_cols, str):
246255
precip_cols = [precip_cols]
247256

257+
# for p_col in precip_cols:
258+
# df.loc[df[p_col] == 0.0, p_col] = 0.001
259+
248260
self._df_copy = df[[date_col] + precip_cols]
249261
self._df_copy[date_col] = pd.to_datetime(self._df_copy[date_col])
250262

@@ -274,12 +286,12 @@ def calculate(self, df: pd.DataFrame, date_col: str, precip_cols: list, freq: st
274286
precip_sorted = np.sort(precip_single)[::-1]
275287

276288
# Fit distribution for particular series and month
277-
params = self.fit_distribution(
289+
params, p_zero = self.fit_distribution(
278290
precip_sorted, dist_type, fit_type, **dist_kwargs
279291
)
280292

281293
# Calculate SPI/SPEI
282-
spi = self.cdf_to_ppf(precip_single, params)
294+
spi = self.cdf_to_ppf(precip_single, params, p_zero)
283295
idx_col = f"{p}_calculated_index"
284296
precip_single_df[idx_col] = spi
285297
precip_single_df = precip_single_df[[date_col, idx_col]]
@@ -294,7 +306,7 @@ def calculate(self, df: pd.DataFrame, date_col: str, precip_cols: list, freq: st
294306

295307
return df_all
296308

297-
def cdf_to_ppf(self, data, params):
309+
def cdf_to_ppf(self, data, params, p_zero):
298310
'''
299311
Take the specific distributions fitted parameters and calculate the
300312
cdf. Apply the inverse normal distribution to the cdf to get the SPI
@@ -304,7 +316,10 @@ def cdf_to_ppf(self, data, params):
304316
'''
305317

306318
# Calculate the CDF of observed precipitation on a given time scale
307-
cdf = self.distrb.cdf(data, **params)
319+
if p_zero:
320+
cdf = p_zero + (1 - p_zero) * self.distrb.cdf(data, **params)
321+
else:
322+
cdf = self.distrb.cdf(data, **params)
308323

309324
# Apply inverse normal distribution
310325
norm_ppf = scs.norm.ppf(cdf)

0 commit comments

Comments
 (0)
Please sign in to comment.