-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_reference_pd.py
executable file
·109 lines (88 loc) · 3 KB
/
find_reference_pd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
import pandas
import numpy as np
import os
CROPLAND_FILE = "data/HOACropland.csv"
OUTPUT_FILE = "data/HOACropland_new.csv"
countries = [
"Kenya",
"Ethiopia",
"South Sudan",
"Sudan",
"Uganda",
"Somalia",
"Eritrea",
"Djibouti",
]
crops = [
"Maize",
"Sorghum",
"Cassava",
]
pds = [
"15",
"46",
"74",
"105",
"135",
"166",
"196",
"227",
"258",
"288",
"319",
"345",
]
def reference_pd(row):
'''
Find the date with maximum 5-month moving average yield to be the reference planting date.
'''
length = 12
half_window = 2
max_yield = -999
ref_day = -999
for m in range(length):
# Adjust to avoid using indices larger than 11
m = m - length if m >= length - half_window else m
yield_ma = row[np.r_[m - half_window:m + half_window + 1]].mean()
if yield_ma > max_yield:
max_yield = yield_ma
ref_day = row.index[m]
return (max_yield, ref_day)
# Open cropland file
cropland_df = pandas.read_csv(CROPLAND_FILE)
cropland_df.fillna("NaN", inplace=True) # Fill NaN values incase 'admin3' does not exist
for crop in crops:
# Create an empty data frame to contain all countries/planting dates
pd_df = pandas.DataFrame()
for country in countries:
print (country, crop)
first = 1
for pd in pds:
# Read output files
output_df = pandas.read_csv("outputs/%s.%s.%s.csv" % (country, crop, pd), usecols=range(1,6))
output_df.fillna("NaN", inplace=True) # Fill NaN values incase 'admin3' does not exist
# Calculate mean grain yield over all simulation years
output_df = pandas.DataFrame(output_df.groupby(["country", "admin1", "admin2", "admin3"]).mean())
# Rename the column to planting date, which can be merged into pd_df
output_df.rename(columns={"grain_yield": str(pd)}, inplace=True)
# Add all planting dates to one data frame
if first == 1:
_result_df = output_df
first = 0
else:
_result_df = _result_df.merge(output_df, how="inner", on=["country", "admin1", "admin2", "admin3"])
# Combine all results
pd_df = pd_df.append(_result_df)
# Find reference planting dates and yields
pd_df[["grain_yield","pd"]] = pd_df.apply(lambda row: pandas.Series(reference_pd(row)), axis=1)
# Remove yields of each month
pd_df.drop(columns=pds, inplace=True)
# Rename pd and grain_yield columns to each crop
crop = crop.strip().lower().replace(" ", "_")
pd_df.rename(columns={"pd": "%s_pd" %(crop), "grain_yield": "%s_grain_yield" %(crop)}, inplace=True)
# Add planting dates and yields to cropland data
cropland_df = cropland_df.merge(pd_df, how="inner", on=["country", "admin1", "admin2", "admin3"])
# Write to output file
cropland_df.replace("NaN", "", regex=True, inplace=True)
cropland_df.to_csv(OUTPUT_FILE, index=False)