-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathProcessGridData.py
346 lines (267 loc) · 11.1 KB
/
ProcessGridData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 10 09:24:00 2023
Last updated: Fri Jun 28 10:21:00 2023
@author: danikam
"""
# Import needed modules
import pandas as pd
from CommonTools import get_top_dir, mergeShapefile, saveShapefile, state_names_to_abbr
import os
LB_PER_KG = 2.20462
HOURS_PER_YEAR = 365 * 24
KWH_PER_MWH = 1000
G_PER_LB = 453.592
MW_PER_GW = 1000
MWH_PER_GWH = 1000
def read_ba_emissions_data(top_dir):
"""
Reads in grid emission intensities by balancing authorities from eGRIDS
Parameters
----------
top_dir (string): path to the top level of the git repo
Returns
-------
egrid_data (pd.DataFrame): A pandas dataframe containing the 2022 eGrid data for each subregion (lb CO2 / MWh)
"""
# Read in the data associated with each eGrids subregion
dataPath = f"{top_dir}/data/egrid2022_data.xlsx"
data = pd.ExcelFile(dataPath)
data_df = pd.read_excel(data, "SRL22", skiprows=[0])
# Select the columns of interest
# SUBRGN: eGRID sub-region acronym
# SRCO2RTA: eGRID subregion annual CO2 emission rate (lb CO2 / MWh)
data_df = data_df.filter(
["SUBRGN", "SRCO2RTA"], axis=1
) # , 'SRCLPR', 'SROLPR', 'SRGSPR', 'SRNCPR', 'SRHYPR', 'SRBMPR', 'SRWIPR', 'SRSOPR', 'SRGTPR', 'SROFPR', 'SROPPR'], axis=1)
# Rename SRCO2RTA in the merged dataframe to a more generic descriptor
# Note: SUBRGN naming needed to match associated column name in shapefile
data_df = data_df.rename(columns={"SRCO2RTA": "CO2_rate", "SUBRGN": "ZipSubregi"})
return data_df
def read_iso_emissions_data(top_dir):
'''
Reads in grid emission intensities by ISO from https://www.electricitymaps.com/data-portal (processed by Noman Bashir)
Parameters
----------
top_dir (string): path to the top level of the git repo
Returns
-------
iso_emissions_data_df (pd.DataFrame): Dataframe containing the emissions data for each US ISO, converted from units of gCO2eq / kWh to lb CO2eq / MWh
'''
columns = ['zoneName']
for hour in range(24):
columns.append(f'mean_{hour}')
columns.append(f'std_{hour}')
data_df = pd.DataFrame(columns=columns)
# Read in the data associated with each eGrids subregion
dataDir = f'{top_dir}/data/daily_carbon_intensity_data_usa'
for filename in os.listdir(dataDir):
filepath = os.path.join(dataDir, filename)
# Confirm that it's a file and not a directory
if os.path.isfile(filepath) and filename.endswith('.csv'):
data_iso_df = pd.read_csv(filepath)
row_dict = {
'zoneName': filename.split('_')[0]
}
for hour in range(24):
row_dict[f'mean_{hour}'] = data_iso_df['mean'].iloc[hour]
row_dict[f'std_{hour}'] = data_iso_df['std'].iloc[hour]
# Convert row_dict to a DataFrame
new_row_df = pd.DataFrame([row_dict])
# Append new_row_df to data_df
data_df = pd.concat([data_df, new_row_df], ignore_index=True)
return data_df
def read_state_emissions_data(top_dir):
"""
Reads in grid emission intensities by state from EIA
Parameters
----------
top_dir (string): path to the top level of the git repo
Returns
-------
data_df (pd.DataFrame): A pandas dataframe containing the 2022 emission intensity data (lb CO2 / MWh)
"""
# Read in the data associated with each eGrids subregion
dataPath = f"{top_dir}/data/emissions_region2022.xlsx"
data = pd.ExcelFile(dataPath)
data_df = pd.read_excel(data, "State", skiprows=[0])
# Select the columns of interest
data_df = data_df.filter(
[
"Year",
"Census Division and State",
"Kilograms of CO2 per Megawatthour of Generation",
],
axis=1,
)
# Rename the long columns
data_df = data_df.rename(
columns={
"Census Division and State": "STUSPS",
"Kilograms of CO2 per Megawatthour of Generation": "CO2_rate",
}
)
# Convert the CO2 rate from kg / MWh to lb / MWh
data_df["CO2_rate"] = data_df["CO2_rate"] * LB_PER_KG
# Select rows corresponding to year 2022
data_df = data_df[data_df["Year"] == 2022]
# Update the state name to its associated abbreviation
data_df = state_names_to_abbr(data_df, "STUSPS")
# Remove NaN rows that aren't associated with states
data_df = data_df.dropna()
return data_df
def read_state_capacity_data(top_dir):
"""
Reads in grid generating capacity by state
Parameters
----------
top_dir (string): path to the top level of the git repo
Returns
-------
state_capacity_data_df (pd.DataFrame): A pandas dataframe containing the 2022 electricity generating capacity (MW) for each state
"""
# Read in the data associated with each eGrids subregion
dataPath = f"{top_dir}/data/existcapacity_annual.xlsx"
data = pd.ExcelFile(dataPath)
data_df = pd.read_excel(data, "Existing Capacity", skiprows=[0])
state_capacity_data_df = data_df[
(data_df["Year"] == 2022)
& (data_df["Producer Type"] == "Total Electric Power Industry")
& (data_df["Fuel Source"] == "All Sources")
]
state_capacity_data_df = state_capacity_data_df.drop(
columns=[
"Fuel Source",
"Generators",
"Facilities",
"Nameplate Capacity (Megawatts)",
"Producer Type",
"Year",
]
)
state_capacity_data_df = state_capacity_data_df.rename(
columns={"State Code": "STUSPS", "Summer Capacity (Megawatts)": "Cap_MW"}
)
state_capacity_data_df["Cap_GW"] = (
state_capacity_data_df["Cap_MW"].astype("float") / MW_PER_GW
)
return state_capacity_data_df
def read_state_generation_data(top_dir):
"""
Reads in annual grid electricity generation by state
Parameters
----------
top_dir (string): path to the top level of the git repo
Returns
-------
state_generation_data_df (pd.DataFrame): A pandas dataframe containing electricity generated (GWh) in each state for 2022
"""
# Read in the data associated with each eGrids subregion
dataPath = f"{top_dir}/data/annual_generation_state.xls"
data = pd.ExcelFile(dataPath)
data_df = pd.read_excel(data, skiprows=[0])
state_generation_data_df = data_df[
(data_df["YEAR"] == 2022)
& (data_df["TYPE OF PRODUCER"] == "Total Electric Power Industry")
& (data_df["ENERGY SOURCE"] == "Total")
]
state_generation_data_df = state_generation_data_df.drop(
columns=["TYPE OF PRODUCER", "ENERGY SOURCE", "YEAR"]
)
state_generation_data_df = state_generation_data_df.rename(
columns={"STATE": "STUSPS", "GENERATION (Megawatthours)": "Ann_Gen"}
)
state_generation_data_df["Ann_Gen"] = (
state_generation_data_df["Ann_Gen"].astype("float") / MWH_PER_GWH
) # Convert from MWh to GWh
# Remove the US total
state_generation_data_df = state_generation_data_df[
state_generation_data_df["STUSPS"] != "US-Total"
]
return state_generation_data_df
def combine_gen_cap_data(state_capacity_data_df, state_generation_data_df):
"""
Combines generating and capacity data into a single dataframe, and also estimates the difference between the annual generating capacity and actual energy generated.
Parameters
----------
state_capacity_data_df (DataFrame): A pandas dataframe containing the 2022 electricity generating capacity (MW) for each state
state_generation_data_df (pd.DataFrame): A pandas dataframe containing electricity generated in each state for 2022
Returns
-------
state_gen_cap_data_df (pd.DataFrame): A pandas dataframe containing summary info
"""
# Combine the two dataframes
state_gen_cap_data_df = state_capacity_data_df.merge(
state_generation_data_df, on="STUSPS", how="left"
).dropna()
# Add a column with the theoretical annual generating capacity (GWh)
state_gen_cap_data_df["Ann_Cap"] = state_gen_cap_data_df["Cap_GW"] * HOURS_PER_YEAR
# Calculate the difference between theoretical and actual annual generation
state_gen_cap_data_df["Ann_Diff"] = (
state_gen_cap_data_df["Ann_Cap"] - state_gen_cap_data_df["Ann_Gen"]
)
# Calculate the ratio between theoretical maximum and actual annual generation
state_gen_cap_data_df["Ann_Rat"] = (
state_gen_cap_data_df["Ann_Cap"] / state_gen_cap_data_df["Ann_Gen"]
)
return state_gen_cap_data_df
def main():
# Get the path to the top level of the Git repo
top_dir = get_top_dir()
# Read in the grid CO2 intensity by balancing authority from eGRIDS
egrid_data = read_ba_emissions_data(top_dir)
# Read in the state-level CO2 intensity from EIA
eia_data = read_state_emissions_data(top_dir)
# Read in the 2022 state-level generating capacity from EIA
state_capacity_data = read_state_capacity_data(top_dir)
# Read in 2022 state-level annual electricity generated from EIA
state_generation_data = read_state_generation_data(top_dir)
# Read in ISO emission rate data for 2022
iso_emissions_data = read_iso_emissions_data(top_dir)
# Combine the capacity and generation data
state_gen_cap_data = combine_gen_cap_data(
state_capacity_data, state_generation_data
)
# Merge the eGrids data in with the shapefile with subregion borders
merged_dataframe_egrid = mergeShapefile(
egrid_data,
f"{top_dir}/data/eGRID2022_subregions/eGRID2022_Subregions.shp",
"ZipSubregi",
)
# Merge the state-level CO2 intensity data with the state borders shapefile
merged_dataframe_eia = mergeShapefile(
eia_data, f"{top_dir}/data/state_boundaries/tl_2012_us_state.shp", "STUSPS"
)
merged_dataframe_eia = merged_dataframe_eia.drop(
columns=["AWATER", "ALAND", "Shape_Area"]
)
# Merge the state-level 2022 capacity and generation data with the state borders shapefile
merged_dataframe_gen_cap = mergeShapefile(
state_gen_cap_data,
f"{top_dir}/data/state_boundaries/tl_2012_us_state.shp",
"STUSPS",
)
merged_dataframe_gen_cap = merged_dataframe_gen_cap.drop(
columns=["AWATER", "ALAND", "Shape_Area"]
)
# Merge the iso emission rate data for 2022 with the shapefile containing borders for the data source
merged_dataframe_iso_emissions = mergeShapefile(iso_emissions_data, f'{top_dir}/data/world.geojson', 'zoneName').dropna().drop(columns=['countryKey', 'countryName'])
# Save the merged shapefiles
saveShapefile(
merged_dataframe_egrid,
f"{top_dir}/data/egrid2022_subregions_merged/egrid2022_subregions_merged.shp",
)
saveShapefile(
merged_dataframe_eia,
f"{top_dir}/data/eia2022_state_merged/eia2022_state_merged.shp",
)
saveShapefile(
merged_dataframe_gen_cap,
f"{top_dir}/data/eia2022_state_merged/gen_cap_2022_state_merged.shp",
)
saveShapefile(
merged_dataframe_iso_emissions,
f"{top_dir}/data/emission_rate_by_iso.shp"
)
main()