diff --git a/datasets/gedi_extract_l2a.py b/datasets/gedi_extract_l2a.py deleted file mode 100644 index e49c9826e..000000000 --- a/datasets/gedi_extract_l2a.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright 2020 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from absl import app -from absl import logging -import h5py -import pandas as pd -import os - -import gedi_lib - -numeric_variables = ( - 'beam', - 'degrade_flag', - 'delta_time', - 'digital_elevation_model', - 'digital_elevation_model_srtm', - 'elev_highestreturn', - 'elev_lowestmode', - 'elevation_bias_flag', - 'energy_total', - - 'land_cover_data/landsat_treecover', - 'land_cover_data/landsat_water_persistence', - 'land_cover_data/leaf_off_doy', - 'land_cover_data/leaf_off_flag', - 'land_cover_data/leaf_on_cycle', - 'land_cover_data/leaf_on_doy', - 'land_cover_data/modis_nonvegetated', - 'land_cover_data/modis_nonvegetated_sd', - 'land_cover_data/modis_treecover', - 'land_cover_data/modis_treecover_sd', - 'land_cover_data/pft_class', - 'land_cover_data/region_class', - 'land_cover_data/urban_focal_window_size', - 'land_cover_data/urban_proportion', - - 'lat_highestreturn', - 'lat_lowestmode', - 'lon_highestreturn', - 'lon_lowestmode', - - 'num_detectedmodes', - 'quality_flag', - - 'selected_algorithm', - 'selected_mode', - 'selected_mode_flag', - - 'sensitivity', - 'solar_azimuth', - 'solar_elevation', - 'surface_flag' -) - -# Integer-valued columns that contained fill values instead of NA prior to -# Pandas updates past version 1.5. -integer_fill_variables = { - 'leaf_off_doy': 32767, - 'leaf_on_cycle': 255, - 'leaf_on_doy': 32767, -} - -string_variables = ('shot_number',) - -rh_names = tuple([f'rh{d}' for d in range(101)]) - - -def extract_values(input_paths: list[str], output_path: str) -> None: - """Extracts all rh (relative heights) from all algorithms and some qa flags. - - Args: - input_paths: GEDI L2A and GEDI L2B file paths - output_path: csv output file path - """ - l2a_path = input_paths[0] - l2b_path = input_paths[1] - - basename = os.path.basename(l2a_path) - if not basename.startswith('GEDI') or not basename.endswith('.h5'): - logging.error('Input path is not a GEDI filename: %s', l2a_path) - return - - with h5py.File(l2a_path, 'r') as l2a_hdf_fh: - with h5py.File(l2b_path, 'r') as l2b_hdf_fh: - with open(output_path, 'w') as csv_fh: - write_csv(l2a_hdf_fh, l2b_hdf_fh, csv_fh) - - -def write_csv(l2a_hdf_fh, l2b_hdf_fh, csv_file): - """Writes a single CSV file based on the contents of HDF file.""" - is_first = True - # Iterating over relative height percentage values from 0 to 100 - for k in l2a_hdf_fh.keys(): - if not k.startswith('BEAM'): - continue - print('\t', k) - - df = pd.DataFrame() - - for v in numeric_variables + string_variables: - gedi_lib.hdf_to_df(l2a_hdf_fh, k, v, df) - - rh = pd.DataFrame(l2a_hdf_fh[f'{k}/rh'], columns=rh_names) - df = pd.concat((df, rh), axis=1) - - gedi_lib.add_shot_number_breakdown(df) - # Add the incidence angle variables from the corresponding L2B file. - for l2b_var in gedi_lib.l2b_variables_for_l2a: - gedi_lib.hdf_to_df(l2b_hdf_fh, k, 'geolocation/' + l2b_var, df) - - # Filter our rows with nan values for lat_lowestmode or lon_lowestmode. - # Such rows are not ingestable into EE. - df = df[df.lat_lowestmode.notnull()] - df = df[df.lon_lowestmode.notnull()] - - # Correct fill replacements. - for df_key, fill_value in integer_fill_variables.items(): - df[df_key] = df[df_key].fillna(fill_value) - - df.to_csv( - csv_file, - float_format='%3.6f', - index=False, - header=is_first, - mode='a', - lineterminator='\n') - is_first = False - -def main(argv): - extract_values(argv[1], argv[2]) - -if __name__ == '__main__': - app.run(main) diff --git a/datasets/gedi_extract_l2b.py b/datasets/gedi_extract_l2b.py deleted file mode 100644 index ddc9ed0d7..000000000 --- a/datasets/gedi_extract_l2b.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2020 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from absl import app -from absl import logging -import h5py -import numpy as np -import pandas as pd -import os - -import gedi_lib - -numeric_variables = ( - 'cover', - 'pai', - 'fhd_normal', - 'pgap_theta', - 'beam', - 'shot_number', - 'l2b_quality_flag', - 'algorithmrun_flag', - 'selected_rg_algorithm', - 'selected_l2a_algorithm', - 'sensitivity', - 'geolocation/degrade_flag', - 'geolocation/delta_time', - 'geolocation/lat_lowestmode', - 'geolocation/lon_lowestmode', - 'geolocation/local_beam_azimuth', - 'geolocation/local_beam_elevation', - 'geolocation/solar_azimuth', - 'geolocation/solar_elevation', -) - -string_variables = ('shot_number',) - -cover_names = [f'cover_z{d}' for d in range(30)] -pai_names = [f'pai_z{d}' for d in range(30)] -pavd_names = [f'pavd_z{d}' for d in range(30)] - - -# pylint:disable=line-too-long -def extract_values(input_paths: list[str], output_path: str) -> None: - """Extracts all relative height values from all algorithms and some qa flags. - - Args: - input_paths: GEDI L2B file path in a single-element list - output_path: csv output file path - """ - assert len(input_paths) == 1 - input_path = input_paths[0] - basename = os.path.basename(input_path) - if not basename.startswith('GEDI') or not basename.endswith('.h5'): - logging.error('Input path is not a GEDI filename: %s', input_path) - return - - with h5py.File(input_path, 'r') as hdf_fh: - with open(output_path, 'w') as csv_fh: - write_csv(hdf_fh, csv_fh) - - -def write_csv(hdf_fh, csv_file): - """Writes a single CSV file based on the contents of HDF file.""" - is_first = True - # Iterating over metrics using a height profile defined for 30 slices. - for k in hdf_fh.keys(): - if not k.startswith('BEAM'): - continue - print('\t', k) - - df = pd.DataFrame() - - for v in numeric_variables + string_variables: - gedi_lib.hdf_to_df(hdf_fh, k, v, df) - - ds = hdf_fh[f'{k}/cover_z'] - cover_z = pd.DataFrame(ds, columns=cover_names) - # pytype: disable=wrong-arg-count # pandas-drop-duplicates-overloads - cover_z.replace(ds.attrs.get('_FillValue'), np.nan, inplace=True) - - ds = hdf_fh[f'{k}/pai_z'] - pai_z = pd.DataFrame(ds, columns=pai_names) - pai_z.replace(ds.attrs.get('_FillValue'), np.nan, inplace=True) - - ds = hdf_fh[f'{k}/pavd_z'] - pavd_z = pd.DataFrame(ds, columns=pavd_names) - pavd_z.replace(ds.attrs.get('_FillValue'), np.nan, inplace=True) - # pytype: enable=wrong-arg-count # pandas-drop-duplicates-overloads - - df = pd.concat((df, cover_z), axis=1) - df = pd.concat((df, pai_z), axis=1) - df = pd.concat((df, pavd_z), axis=1) - - # Filter our rows with nan values for lat_lowestmode or lon_lowestmode. - # Such rows are not ingestable into EE. - df = df[df.lat_lowestmode.notnull()] - df = df[df.lon_lowestmode.notnull()] - - gedi_lib.add_shot_number_breakdown(df) - - df.to_csv( - csv_file, - float_format='%3.6f', - index=False, - header=is_first, - mode='a', - lineterminator='\n', - ) - is_first = False - - -def main(argv): - extract_values(argv[1], argv[2]) - - -if __name__ == '__main__': - app.run(main) diff --git a/datasets/gedi_extract_l4a.py b/datasets/gedi_extract_l4a.py deleted file mode 100644 index 67e07dde8..000000000 --- a/datasets/gedi_extract_l4a.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright 2020 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from absl import app -from absl import logging -import h5py -import numpy as np -import pandas as pd - -import gedi_lib - -numeric_variables = ( - 'agbd', - 'agbd_pi_lower', - 'agbd_pi_upper', - 'agbd_se', - 'agbd_t', - 'agbd_t_se', - 'algorithm_run_flag', - 'beam', - 'channel', - 'degrade_flag', - 'delta_time', - 'elev_lowestmode', - 'l2_quality_flag', - 'l4_quality_flag', - 'lat_lowestmode', - 'lon_lowestmode', - 'master_frac', - 'master_int', - 'predictor_limit_flag', - 'response_limit_flag', - 'selected_algorithm', - 'selected_mode', - 'selected_mode_flag', - 'sensitivity', - 'solar_elevation', - 'surface_flag', -) - -string_variables = ( - 'shot_number', - 'predict_stratum', -) - -group_vars = ('agbd_prediction', 'geolocation', 'land_cover_data') - -group_var_dict = { - 'agbd_prediction': - ('agbd_a1', 'agbd_a10', 'agbd_a2', 'agbd_a3', 'agbd_a4', 'agbd_a5', - 'agbd_a6', 'agbd_pi_lower_a1', 'agbd_pi_lower_a10', 'agbd_pi_lower_a2', - 'agbd_pi_lower_a3', 'agbd_pi_lower_a4', 'agbd_pi_lower_a5', - 'agbd_pi_lower_a6', 'agbd_pi_upper_a1', 'agbd_pi_upper_a10', - 'agbd_pi_upper_a2', 'agbd_pi_upper_a3', 'agbd_pi_upper_a4', - 'agbd_pi_upper_a5', 'agbd_pi_upper_a6', 'agbd_se_a1', 'agbd_se_a10', - 'agbd_se_a2', 'agbd_se_a3', 'agbd_se_a4', 'agbd_se_a5', 'agbd_se_a6', - 'agbd_t_a1', 'agbd_t_a10', 'agbd_t_a2', 'agbd_t_a3', 'agbd_t_a4', - 'agbd_t_a5', 'agbd_t_a6', 'agbd_t_pi_lower_a1', 'agbd_t_pi_lower_a10', - 'agbd_t_pi_lower_a2', 'agbd_t_pi_lower_a3', 'agbd_t_pi_lower_a4', - 'agbd_t_pi_lower_a5', 'agbd_t_pi_lower_a6', 'agbd_t_pi_upper_a1', - 'agbd_t_pi_upper_a10', 'agbd_t_pi_upper_a2', 'agbd_t_pi_upper_a3', - 'agbd_t_pi_upper_a4', 'agbd_t_pi_upper_a5', 'agbd_t_pi_upper_a6', - 'agbd_t_se_a1', 'agbd_t_se_a10', 'agbd_t_se_a2', 'agbd_t_se_a3', - 'agbd_t_se_a4', 'agbd_t_se_a5', 'agbd_t_se_a6', - 'algorithm_run_flag_a1', 'algorithm_run_flag_a10', - 'algorithm_run_flag_a2', 'algorithm_run_flag_a3', - 'algorithm_run_flag_a4', 'algorithm_run_flag_a5', - 'algorithm_run_flag_a6', 'l2_quality_flag_a1', 'l2_quality_flag_a10', - 'l2_quality_flag_a2', 'l2_quality_flag_a3', 'l2_quality_flag_a4', - 'l2_quality_flag_a5', 'l2_quality_flag_a6', 'l4_quality_flag_a1', - 'l4_quality_flag_a10', 'l4_quality_flag_a2', 'l4_quality_flag_a3', - 'l4_quality_flag_a4', 'l4_quality_flag_a5', 'l4_quality_flag_a6', - 'predictor_limit_flag_a1', 'predictor_limit_flag_a10', - 'predictor_limit_flag_a2', 'predictor_limit_flag_a3', - 'predictor_limit_flag_a4', 'predictor_limit_flag_a5', - 'predictor_limit_flag_a6', 'response_limit_flag_a1', - 'response_limit_flag_a10', 'response_limit_flag_a2', - 'response_limit_flag_a3', 'response_limit_flag_a4', - 'response_limit_flag_a5', 'response_limit_flag_a6', 'selected_mode_a1', - 'selected_mode_a10', 'selected_mode_a2', 'selected_mode_a3', - 'selected_mode_a4', 'selected_mode_a5', 'selected_mode_a6', - 'selected_mode_flag_a1', 'selected_mode_flag_a10', - 'selected_mode_flag_a2', 'selected_mode_flag_a3', - 'selected_mode_flag_a4', 'selected_mode_flag_a5', - 'selected_mode_flag_a6'), - 'geolocation': - ('elev_lowestmode_a1', 'elev_lowestmode_a10', 'elev_lowestmode_a2', - 'elev_lowestmode_a3', 'elev_lowestmode_a4', 'elev_lowestmode_a5', - 'elev_lowestmode_a6', 'lat_lowestmode_a1', 'lat_lowestmode_a10', - 'lat_lowestmode_a2', 'lat_lowestmode_a3', 'lat_lowestmode_a4', - 'lat_lowestmode_a5', 'lat_lowestmode_a6', 'lon_lowestmode_a1', - 'lon_lowestmode_a10', 'lon_lowestmode_a2', 'lon_lowestmode_a3', - 'lon_lowestmode_a4', 'lon_lowestmode_a5', 'lon_lowestmode_a6', - 'sensitivity_a1', 'sensitivity_a10', 'sensitivity_a2', - 'sensitivity_a3', 'sensitivity_a4', 'sensitivity_a5', 'sensitivity_a6', - 'stale_return_flag'), - 'land_cover_data': - ('landsat_treecover', 'landsat_water_persistence', 'leaf_off_doy', - 'leaf_off_flag', 'leaf_on_cycle', 'leaf_on_doy', 'pft_class', - 'region_class', 'urban_focal_window_size', 'urban_proportion') -} - -filled_vars_float = [ - 'agbd', 'agbd_pi_lower', 'agbd_pi_upper', 'agbd_se', 'agbd_t', 'agbd_t_se', - 'agbd_a1', 'agbd_a10', 'agbd_a2', 'agbd_a3', 'agbd_a4', 'agbd_a5', - 'agbd_a6', 'agbd_pi_lower_a1', 'agbd_pi_lower_a10', 'agbd_pi_lower_a2', - 'agbd_pi_lower_a3', 'agbd_pi_lower_a4', 'agbd_pi_lower_a5', - 'agbd_pi_lower_a6', 'agbd_pi_upper_a1', 'agbd_pi_upper_a10', - 'agbd_pi_upper_a2', 'agbd_pi_upper_a3', 'agbd_pi_upper_a4', - 'agbd_pi_upper_a5', 'agbd_pi_upper_a6', 'agbd_se_a1', 'agbd_se_a10', - 'agbd_se_a2', 'agbd_se_a3', 'agbd_se_a4', 'agbd_se_a5', 'agbd_se_a6', - 'agbd_t_a1', 'agbd_t_a10', 'agbd_t_a2', 'agbd_t_a3', 'agbd_t_a4', - 'agbd_t_a5', 'agbd_t_a6', 'agbd_t_pi_lower_a1', 'agbd_t_pi_lower_a10', - 'agbd_t_pi_lower_a2', 'agbd_t_pi_lower_a3', 'agbd_t_pi_lower_a4', - 'agbd_t_pi_lower_a5', 'agbd_t_pi_lower_a6', 'agbd_t_pi_upper_a1', - 'agbd_t_pi_upper_a10', 'agbd_t_pi_upper_a2', 'agbd_t_pi_upper_a3', - 'agbd_t_pi_upper_a4', 'agbd_t_pi_upper_a5', 'agbd_t_pi_upper_a6', - 'agbd_t_se_a1', 'agbd_t_se_a10', 'agbd_t_se_a2', 'agbd_t_se_a3', - 'agbd_t_se_a4', 'agbd_t_se_a5', 'agbd_t_se_a6' -] - -filled_vars_byte = [ - 'predictor_limit_flag', - 'predictor_limit_flag_a1', - 'predictor_limit_flag_a10', - 'predictor_limit_flag_a2', - 'predictor_limit_flag_a3', - 'predictor_limit_flag_a4', - 'predictor_limit_flag_a5', - 'predictor_limit_flag_a6', - 'response_limit_flag', - 'response_limit_flag_a1', - 'response_limit_flag_a10', - 'response_limit_flag_a2', - 'response_limit_flag_a3', - 'response_limit_flag_a4', - 'response_limit_flag_a5', - 'response_limit_flag_a6' -] - - -def extract_values(input_paths: str, output_path: str) -> None: - """Extracts all variables from all algorithms. - - Args: - input_paths: GEDI L4A file paths - output_path: csv output file path - """ - assert len(input_paths) == 1 - l4a_path = input_paths[0] - basename = os.path.basename(l4a_path) - if not basename.startswith('GEDI') or not basename.endswith('.h5'): - logging.error('Input path is not a GEDI filename: %s', l4a_path) - return - - with h5py.File(l4a_path, 'r') as l4a_hdf_fh: - with open(output_path, 'w') as csv_fh: - write_csv(l4a_hdf_fh, csv_fh) - - -def write_csv(l4a_hdf_fh, csv_file): - """Writes a single CSV file based on the contents of HDF file.""" - is_first = True - # Iterating over relative height percentage values from 0 to 100 - for k in l4a_hdf_fh.keys(): - if not k.startswith('BEAM'): - continue - print('\t', k) - - df = pd.DataFrame() - - for v in numeric_variables + string_variables: - gedi_lib.hdf_to_df(l4a_hdf_fh, k, v, df) - - # Add the incidence angle variables from the corresponding L2B file. - for group_var in group_vars: - for l4a_var in group_var_dict[group_var]: - gedi_lib.hdf_to_df(l4a_hdf_fh, k, f'{group_var}/{l4a_var}', df) - df[filled_vars_float] = df.loc[:, filled_vars_float].replace( - to_replace=-9999, value=np.nan) - df[filled_vars_byte] = df.loc[:, filled_vars_byte].replace( - to_replace=255, value=np.nan) - - # Filter our rows with nan values for lat_lowestmode or lon_lowestmode. - # Such rows are not ingestable into EE. - df = df[df.lat_lowestmode.notnull()] - df = df[df.lon_lowestmode.notnull()] - gedi_lib.add_shot_number_breakdown(df) - - df.to_csv( - csv_file, - float_format='%3.6f', - index=False, - header=is_first, - mode='a', - line_terminator='\n') - is_first = False - - -def main(argv): - extract_values(argv[1], argv[2]) - -if __name__ == '__main__': - app.run(main) diff --git a/datasets/gedi_lib.py b/datasets/gedi_lib.py deleted file mode 100644 index 58dc6f195..000000000 --- a/datasets/gedi_lib.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright 2020 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -import os -import time -from typing import Any -from absl import flags -import attr -from dateutil import relativedelta -import h5py -import numpy as np -import pandas as pd -import pytz - -import ee - -l2b_variables_for_l2a = ('local_beam_azimuth', 'local_beam_elevation') - - -NUM_UTM_GRID_CELLS = flags.DEFINE_integer( - 'num_utm_grid_cells', 389, 'UTM grid cell count') - -ALLOW_GEDI_RASTERIZE_OVERWRITE = flags.DEFINE_bool( - 'allow_gedi_rasterize_overwrite', False, - 'Whether exported assets from gedi_rasterize are allowed to overwrite ' - 'existing assets.') - - -@attr.s -class ExportParameters: - """Arguments for starting export jobs.""" - asset_id: str = attr.ib() - image: Any = attr.ib() # ee.Image - pyramiding_policy: dict[str, str] = attr.ib() - crs: str = attr.ib() - region: Any = attr.ib() # ee.Geometry.Polygon | ee.Geometry.LinearRing - overwrite: bool = attr.ib() - - -def _start_task(export_params: ExportParameters) -> str: - """Starts an EE export task with the given parameters.""" - asset_id = export_params.asset_id - task = ee.batch.Export.image.toAsset( - image=export_params.image, - description=os.path.basename(asset_id), - assetId=asset_id, - region=export_params.region, - pyramidingPolicy=export_params.pyramiding_policy, - scale=25, - crs=export_params.crs, - maxPixels=1e13, - overwrite=export_params.overwrite) - - time.sleep(0.1) - task.start() - return task.status()['id'] - - -def rasterize_gedi_by_utm_zone(table_asset_ids, - raster_asset_id, - grid_cell_feature, - grill_month, - export_function, - overwrite=False): - """Creates and runs an EE export job. - - Args: - table_asset_ids: list of strings, table asset ids to rasterize - raster_asset_id: string, raster asset id to create - grid_cell_feature: ee.Feature - grill_month: datetime, the 1st of the month for the data to be rasterized - export_function: function to create export - overwrite: bool, if any of the assets can be replaced if they already exist - - Returns: - string, task id of the created task - """ - export_params = export_function(table_asset_ids, raster_asset_id, - grid_cell_feature, grill_month, overwrite) - return _start_task(export_params) - - -def add_shot_number_breakdown(df: pd.DataFrame) -> None: - """Adds fields obtained by breaking down shot_number. - - Example: from 154341234599141100 we obtain: - orbit_number = 15434 - beam_number = 12 (we ignore it) - minor_frame_number = 345 - shot_number_within_beam = 99141101 - - Args: - df: pd.DataFrame - """ - # It's simpler to use substrings than to do math. - df['shot_number_within_beam'] = [ - int(str(x)[-8:]) for x in df['shot_number']] - df['minor_frame_number'] = [int(str(x)[-11:-8]) for x in df['shot_number']] - # beam number, [-13:-11], is already in the 'beam' property - df['orbit_number'] = [int(str(x)[:-13]) for x in df['shot_number']] - - -def hdf_to_df( - hdf_fh: h5py.File, beam_key: str, var: str, df: pd.DataFrame) -> None: - """Copies data for a single var from an HDF file to a Pandas DataFrame. - - Args: - hdf_fh: h5 file handle - beam_key: a string like BEAM0110, first part of the HDF variable key - var: second part of the HDF variable key (also used for the dataframe key) - df: output Pandsa DataFrame - """ - if var.startswith('#'): - return - hdf_key = f'{beam_key}/{var}' - df_key = var.split('/')[-1] - - ds = hdf_fh[hdf_key] - df[df_key] = ds[:] - if len(df[df_key]) and isinstance(df[df_key][0], bytes): - df[df_key] = df[df_key].apply(lambda x: x.decode()) - df[df_key].replace([np.inf, -np.inf], np.nan, inplace=True) - if ds.attrs.get('_FillValue') is not None: - # We need to use pd.NA that works with integer types (np.nan does not) - df[df_key].replace(ds.attrs.get('_FillValue'), pd.NA, inplace=True) - - -def gedi_deltatime_epoch(dt): - return dt.timestamp() - (datetime.datetime(2018, 1, 1) - - datetime.datetime(1970, 1, 1)).total_seconds() - - -def timestamp_ms_for_datetime(dt): - return time.mktime(dt.timetuple()) * 1000 - - -def parse_date_from_gedi_filename(table_asset_id): - return pytz.utc.localize( - datetime.datetime.strptime( - os.path.basename(table_asset_id).split('_')[2], '%Y%j%H%M%S')) - - -def create_export( - table_asset_ids: list[str], - raster_asset_id: str, - raster_bands: list[str], - int_bands: list[str], - grid_cell_feature: Any, - grill_month: datetime.datetime, - overwrite: bool) -> ExportParameters: - """Creates an EE export job definition. - - Args: - table_asset_ids: list of strings, table asset ids to rasterize - raster_asset_id: string, raster asset id to create - raster_bands: list of raster bands, - int_bands: list of integer bands - grid_cell_feature: ee.Feature - grill_month: grilled month - overwrite: bool, if any of the assets can be replaced if they already exist - - Returns: - an ExportParameters object containing arguments for an export job. - """ - if not table_asset_ids: - raise ValueError('No table asset ids specified') - table_asset_dts = [] - for asset_id in table_asset_ids: - date_obj = parse_date_from_gedi_filename(asset_id) - table_asset_dts.append(date_obj) - # pylint:disable=g-tzinfo-datetime - # We don't care about pytz problems with DST - this is just UTC. - month_start = grill_month.replace(day=1) - # pylint:enable=g-tzinfo-datetime - month_end = month_start + relativedelta.relativedelta(months=1) - if all((date < month_start or date >= month_end) for date in table_asset_dts): - raise ValueError( - 'ALL the table files are outside of the expected month that is ranging' - ' from %s to %s' % (month_start, month_end)) - - right_month_dts = [ - dates for dates in table_asset_dts - if dates >= month_start and dates < month_end - ] - if len(right_month_dts) / len(table_asset_dts) < 0.55: - raise ValueError( - 'The majority of table ids are not in the requested month %s' % - grill_month) - - shots = [] - for table_asset_id in table_asset_ids: - shots.append(ee.FeatureCollection(table_asset_id)) - - box = grid_cell_feature.geometry().buffer(2500, 25).bounds() - # month_start and month_end are converted to epochs using the - # same temporal offset as "delta_time." - # pytype: disable=attribute-error - shots = ee.FeatureCollection(shots).flatten().filterBounds(box).filter( - ee.Filter.rangeContains( - 'delta_time', - gedi_deltatime_epoch(month_start), - gedi_deltatime_epoch(month_end)) - ) - # pytype: enable=attribute-error - # We use ee.Reducer.first() below, so this will pick the point with the - # higherst sensitivity. - shots = shots.sort('sensitivity', False) - - crs = grid_cell_feature.get('crs').getInfo() - - image_properties = { - 'month': grill_month.month, - 'year': grill_month.year, - 'version': 1, - 'system:time_start': timestamp_ms_for_datetime(month_start), - 'system:time_end': timestamp_ms_for_datetime(month_end), - 'table_asset_ids': table_asset_ids - } - - image = ( - shots.sort('sensitivity', False).reduceToImage( - raster_bands, - ee.Reducer.first().forEach(raster_bands)).reproject( - crs, None, 25).set(image_properties)) - - # This keeps the original (alphabetic) band order. - image_with_types = image.toDouble().addBands( - image.select(int_bands).toInt(), overwrite=True) - - return ExportParameters( - asset_id=raster_asset_id, - image=image_with_types.clip(box), - pyramiding_policy={'.default': 'sample'}, - crs=crs, - region=box, - overwrite=overwrite) diff --git a/datasets/gedi_rasterize_l2a.py b/datasets/gedi_rasterize_l2a.py deleted file mode 100644 index 36469c07d..000000000 --- a/datasets/gedi_rasterize_l2a.py +++ /dev/null @@ -1,269 +0,0 @@ -# Copyright 2020 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -from typing import Any - -from absl import app - -import ee -import gedi_lib - - -# From https://lpdaac.usgs.gov/products/gedi02_av002/ -# We list all known property names for safety, even though we might not -# be currently using all of them during rasterization. -INTEGER_PROPS = frozenset({ - 'beam', - 'channel', - 'degrade_flag', - 'elevation_bias_flag', - 'enable_select_mode', - 'l2a_alg_count', - 'landsat_water_persistence', - 'leaf_off_doy', - 'leaf_off_flag', - 'leaf_on_cycle', - 'leaf_on_doy', - 'master_int', - 'num_detectedmodes', - 'num_detectedmodes_aN', - 'ocean_calibration_shot_flag', - 'pft_class', - 'quality_flag', - 'quality_flag_aN', - 'region_class', - 'rh_aN', - 'rx_algrunflag', - 'rx_assess_flag', - 'rx_clipbin0', - 'rx_clipbin_count', - 'rx_estimate_bias', - 'rx_gflag', - 'rx_giters', - 'rx_maxpeakloc', - 'rx_nummodes', - 'rx_use_fixed_thresholds', - 'selected_algorithm', - 'selected_mode', - 'selected_mode_flag', - # Note that 'shot_number' is a long ingested as a string, so - # we don't rasterize it. - 'stale_return_flag', - 'state_return_flag', - 'surface_flag', - 'toploc_miss', - 'urban_focal_window_size', - 'urban_proportion', - # Fields added by splitting shot_number - 'minor_frame_number', - 'orbit_number', - 'shot_number_within_beam', -}) - -raster_bands = ( - 'beam', - 'degrade_flag', - 'delta_time', - 'digital_elevation_model', - 'digital_elevation_model_srtm', - 'elev_highestreturn', - 'elev_lowestmode', - 'elevation_bias_flag', - 'energy_total', - 'landsat_treecover', - 'landsat_water_persistence', - 'lat_highestreturn', - 'leaf_off_doy', - 'leaf_off_flag', - 'leaf_on_cycle', - 'leaf_on_doy', - 'lon_highestreturn', - 'modis_nonvegetated', - 'modis_nonvegetated_sd', - 'modis_treecover', - 'modis_treecover_sd', - 'num_detectedmodes', - 'pft_class', - 'quality_flag', - 'region_class', - 'selected_algorithm', - 'selected_mode', - 'selected_mode_flag', - 'sensitivity', - 'solar_azimuth', - 'solar_elevation', - 'surface_flag', - 'urban_focal_window_size', - 'urban_proportion', - 'rh0', - 'rh1', - 'rh2', - 'rh3', - 'rh4', - 'rh5', - 'rh6', - 'rh7', - 'rh8', - 'rh9', - # pylint:disable=line-too-long - 'rh10', - 'rh11', - 'rh12', - 'rh13', - 'rh14', - 'rh15', - 'rh16', - 'rh17', - 'rh18', - 'rh19', - 'rh20', - 'rh21', - 'rh22', - 'rh23', - 'rh24', - 'rh25', - 'rh26', - 'rh27', - 'rh28', - 'rh29', - 'rh30', - 'rh31', - 'rh32', - 'rh33', - 'rh34', - 'rh35', - 'rh36', - 'rh37', - 'rh38', - 'rh39', - 'rh40', - 'rh41', - 'rh42', - 'rh43', - 'rh44', - 'rh45', - 'rh46', - 'rh47', - 'rh48', - 'rh49', - 'rh50', - 'rh51', - 'rh52', - 'rh53', - 'rh54', - 'rh55', - 'rh56', - 'rh57', - 'rh58', - 'rh59', - 'rh60', - 'rh61', - 'rh62', - 'rh63', - 'rh64', - 'rh65', - 'rh66', - 'rh67', - 'rh68', - 'rh69', - 'rh70', - 'rh71', - 'rh72', - 'rh73', - 'rh74', - 'rh75', - 'rh76', - 'rh77', - 'rh78', - 'rh79', - 'rh80', - 'rh81', - 'rh82', - 'rh83', - 'rh84', - 'rh85', - 'rh86', - 'rh87', - 'rh88', - 'rh89', - 'rh90', - 'rh91', - 'rh92', - 'rh93', - 'rh94', - 'rh95', - 'rh96', - 'rh97', - 'rh98', - 'rh99', - # pylint:enable=line-too-long - 'rh100', - 'minor_frame_number', - 'orbit_number', - 'shot_number_within_beam', -) + gedi_lib.l2b_variables_for_l2a - - -int_bands = [p for p in raster_bands if p in INTEGER_PROPS] - - -def export_wrapper(table_asset_ids: list[str], raster_asset_id: str, - grid_cell_feature: Any, grill_month: datetime.datetime, - overwrite: bool) -> gedi_lib.ExportParameters: - """Creates an EE export job definition. - - Args: - table_asset_ids: list of strings, table asset ids to rasterize - raster_asset_id: string, raster asset id to create - grid_cell_feature: ee.Feature - grill_month: grilled month - overwrite: bool, if any of the assets can be replaced if they already exist - - Returns: - an ExportParameters object containing arguments for an export job. - """ - return gedi_lib.create_export( - table_asset_ids=table_asset_ids, - raster_asset_id=raster_asset_id, - raster_bands=list(raster_bands), - int_bands=int_bands, - grid_cell_feature=grid_cell_feature, - grill_month=grill_month, - overwrite=overwrite) - - -def main(argv): - start_id = 1 # First UTM grid cell id - ee.Initialize() - raster_collection = 'LARSE/GEDI/GEDI02_A_002_MONTHLY' - - for grid_cell_id in range(start_id, - start_id + gedi_lib.NUM_UTM_GRID_CELLS.value): - grid_cell_feature = ee.Feature( - ee.FeatureCollection( - 'users/yang/GEETables/GEDI/GEDI_UTM_GRIDS_LandOnly').filterMetadata( - 'grid_id', 'equals', grid_cell_id).first()) - with open(argv[1]) as fh: - gedi_lib.rasterize_gedi_by_utm_zone( - [x.strip() for x in fh], - raster_collection + '/' + '%03d' % grid_cell_id, - grid_cell_feature, - argv[2], - export_wrapper, - overwrite=gedi_lib.ALLOW_GEDI_RASTERIZE_OVERWRITE.value) - - -if __name__ == '__main__': - app.run(main) diff --git a/datasets/gedi_rasterize_l2b.py b/datasets/gedi_rasterize_l2b.py deleted file mode 100644 index 2acf6cd73..000000000 --- a/datasets/gedi_rasterize_l2b.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2020 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -from typing import Any - -from absl import app - -import ee -from google3.pyglib.function_utils import memoize -import gedi_lib - - -# From https://lpdaac.usgs.gov/products/gedi02_av002/ -# We list all known property names for safety, even though we might not -# be currently using all of them during rasterization. -INTEGER_PROPS = frozenset({ - 'algorithmrun_flag', - 'algorithmrun_flag_aN', - 'channel', - 'degrade_flag', - 'l2a_quality_flag', - 'l2b_quality_flag', - 'landsat_water_persistencoe', - 'leaf_off_flag', - 'leaf_on_cycle', - 'master_int', - 'num_detectedmodes', - 'pft_class', - 'region_class', - 'rg_eg_constraint_center_buffer', - 'rg_eg_flag_aN', - 'rg_eg_niter_aN', - 'selected_l2a_algorithm', - 'selected_mode', - 'selected_mode_flag', - 'selected_rg_algorithm', - # Note that 'shot_number' is a long ingested as a string, so - # we don't rasterize it. - 'stale_return_flag', - 'surface_flag', - 'urban_focal_window_size', - 'urban_proportion', - # Fields added by splitting shot_number - 'minor_frame_number', - 'orbit_number', - 'shot_number_within_beam', -}) - - -@memoize.Memoize() -def get_raster_bands(band): - return [band + str(count) for count in range(30)] - -# This is a subset of all available table properties. -raster_bands = [ - 'algorithmrun_flag', 'beam', 'cover' -] + get_raster_bands('cover_z') + [ - 'degrade_flag', 'delta_time', 'fhd_normal', 'l2b_quality_flag', - 'local_beam_azimuth', 'local_beam_elevation', 'pai' -] + get_raster_bands('pai_z') + get_raster_bands('pavd_z') + [ - 'pgap_theta', 'selected_l2a_algorithm', 'selected_rg_algorithm', - 'sensitivity', 'solar_azimuth', 'solar_elevation', - 'minor_frame_number', 'orbit_number', 'shot_number_within_beam' -] - -int_bands = [p for p in raster_bands if p in INTEGER_PROPS] - - -def export_wrapper(table_asset_ids: list[str], raster_asset_id: str, - grid_cell_feature: Any, grill_month: datetime.datetime, - overwrite: bool) -> gedi_lib.ExportParameters: - """Creates an EE export job definition. - - Args: - table_asset_ids: list of strings, table asset ids to rasterize - raster_asset_id: string, raster asset id to create - grid_cell_feature: ee.Feature - grill_month: grilled month - overwrite: bool, if any of the assets can be replaced if they already exist - - Returns: - an ExportParameters object containing arguments for an export job. - """ - return gedi_lib.create_export( - table_asset_ids=table_asset_ids, - raster_asset_id=raster_asset_id, - raster_bands=raster_bands, - int_bands=int_bands, - grid_cell_feature=grid_cell_feature, - grill_month=grill_month, - overwrite=overwrite) - - -def main(argv): - start_id = 1 # First UTM grid cell id - ee.Initialize() - raster_collection = 'LARSE/GEDI/GEDI02_B_002_MONTHLY' - - for grid_cell_id in range(start_id, - start_id + gedi_lib.NUM_UTM_GRID_CELLS.value): - grid_cell_feature = ee.Feature( - ee.FeatureCollection( - 'users/yang/GEETables/GEDI/GEDI_UTM_GRIDS_LandOnly').filterMetadata( - 'grid_id', 'equals', grid_cell_id).first()) - with open(argv[1]) as fh: - gedi_lib.rasterize_gedi_by_utm_zone( - [x.strip() for x in fh], - raster_collection + '/' + '%03d' % grid_cell_id, - grid_cell_feature, - argv[2], - export_wrapper, - overwrite=gedi_lib.ALLOW_GEDI_RASTERIZE_OVERWRITE.value) - - -if __name__ == '__main__': - app.run(main) diff --git a/datasets/gedi_rasterize_l4a.py b/datasets/gedi_rasterize_l4a.py deleted file mode 100644 index 783e63e46..000000000 --- a/datasets/gedi_rasterize_l4a.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright 2020 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -from typing import Any - -from absl import app - -import ee -from google3.third_party.earthengine_community.datasets import gedi_extract_l4a -import gedi_lib - -# From https://lpdaac.usgs.gov/products/gedi02_av002/ -# We list all known property names for safety, even though we might not -# be currently using all of them during rasterization. -INTEGER_PROPS = ( - gedi_extract_l4a.numeric_variables + - gedi_extract_l4a.group_var_dict['agbd_prediction'] + - gedi_extract_l4a.group_var_dict['geolocation'] + - gedi_extract_l4a.group_var_dict['land_cover_data']) - - -def export_wrapper(table_asset_ids: list[str], raster_asset_id: str, - grid_cell_feature: Any, grill_month: datetime.datetime, - overwrite: bool) -> gedi_lib.ExportParameters: - """Creates an EE export job definition. - - Args: - table_asset_ids: list of strings, table asset ids to rasterize - raster_asset_id: string, raster asset id to create - grid_cell_feature: ee.Feature - grill_month: grilled month - overwrite: bool, if any of the assets can be replaced if they already exist - - Returns: - an ExportParameters object containing arguments for an export job. - """ - return gedi_lib.create_export( - table_asset_ids=table_asset_ids, - raster_asset_id=raster_asset_id, - raster_bands=list(INTEGER_PROPS), - int_bands=list(INTEGER_PROPS), - grid_cell_feature=grid_cell_feature, - grill_month=grill_month, - overwrite=overwrite) - - -def main(argv): - start_id = 1 # First UTM grid cell id - ee.Initialize() - raster_collection = 'LARSE/GEDI/GEDI04_A_002_MONTHLY' - for grid_cell_id in range(start_id, - start_id + gedi_lib.NUM_UTM_GRID_CELLS.value): - grid_cell_feature = ee.Feature( - ee.FeatureCollection( - 'users/yang/GEETables/GEDI/GEDI_UTM_GRIDS_LandOnly').filterMetadata( - 'grid_id', 'equals', grid_cell_id).first()) - with open(argv[1]) as fh: - gedi_lib.rasterize_gedi_by_utm_zone( - [x.strip() for x in fh], - raster_collection + '/' + '%03d' % grid_cell_id, - grid_cell_feature, - argv[2], - export_wrapper, - overwrite=gedi_lib.ALLOW_GEDI_RASTERIZE_OVERWRITE.value) - - -if __name__ == '__main__': - app.run(main) diff --git a/datasets/smap_anomaly_l3.py b/datasets/smap_anomaly_l3.py deleted file mode 100644 index 851cd318d..000000000 --- a/datasets/smap_anomaly_l3.py +++ /dev/null @@ -1,86 +0,0 @@ -"""SMAP L3 daily anomaly. - -Code by Karyn Tabor (NASA). - -This code calculates a normalized anomaly with SMAP L3 AM and PM data -1. Calculate the climatology by averaging 31 days of SMAP L3 daily - composite data over every year in the record. The 31-day window is - centered around day t. t is the current day-15. -2. Create QA mask -3. Subtract the climatology from the current average to get the anomaly -4. Apply QA mask to the anomaly -""" - -import datetime -import ee - - -# pytype:disable=attribute-error - -def anomaly(image): - SMAPL3 = ee.ImageCollection('NASA/SMAP/SPL3SMP_E/005_RAW') - current_date = ee.Date(datetime.datetime.now()) - image_date = ee.Date(image.get('system:time_start')) - - # create list of years to calculate climatology - start_year = 2015 # first year of SMAP data - current_year = current_date.get('year') - image_year = image_date.get('year') - years = ee.List.sequence(start_year, current_year).remove(image_year) - - soilmoisture_AM = image.select('soil_moisture_am') - soilmoisture_PM = image.select('soil_moisture_pm') - - # select QA values - soilmoisture_AM_qamask = image.select('retrieval_qual_flag_am') - soilmoisture_PM_qamask = image.select('retrieval_qual_flag_pm') - - # invert QA values to create mask - QA_AM_mask = soilmoisture_AM_qamask.eq(0) - QA_PM_mask = soilmoisture_PM_qamask.eq(0) - - # create climatologies - # function calculating image of climatology means for AM - def am_clim(year): - date_object = image_date.update(year=year) - begindate = ee.Date(date_object).advance(-15, 'day') - enddate = ee.Date(date_object).advance(+16, 'day') - annual = SMAPL3.filter(ee.Filter.date( - begindate, enddate)).select('soil_moisture_am').mean() - return annual.rename('soil_moisture_am_anomaly_15d') - - annualSM_AM = ee.ImageCollection.fromImages(years.map(am_clim)) - - # calculate average of all years - climSM_AM = annualSM_AM.toBands().reduce(ee.Reducer.mean()) - - # repeat above function for PM - def pm_clim(year): - date_object = image_date.update(year=year) - begindate = ee.Date(date_object).advance(-15, 'day') - enddate = ee.Date(date_object).advance(+15, 'day') - annual = SMAPL3.filter(ee.Filter.date( - begindate, enddate)).select('soil_moisture_pm').mean() - return annual.rename('soil_moisture_pm_anomaly_15d') - - annualSM_PM = ee.ImageCollection.fromImages(years.map(pm_clim)) - - # calculate average of all years - climSM_PM = annualSM_PM.toBands().reduce(ee.Reducer.mean()) - - # calculate anomaly - SM_anomaly_AM = soilmoisture_AM.subtract(climSM_AM).rename( - 'soil_moisture_am_anomaly') - SM_anomaly_PM = soilmoisture_PM.subtract(climSM_PM).rename( - 'soil_moisture_pm_anomaly') - - # apply QA mask - SM_anomaly_AM_masked = SM_anomaly_AM.updateMask(QA_AM_mask) - SM_anomaly_PM_masked = SM_anomaly_PM.updateMask(QA_PM_mask) - - return SM_anomaly_AM_masked, SM_anomaly_PM_masked - - -def apply(image): - am, pm = anomaly(image) - return image.addBands([am, pm]) diff --git a/datasets/smap_anomaly_l4.py b/datasets/smap_anomaly_l4.py deleted file mode 100644 index d04dff3dd..000000000 --- a/datasets/smap_anomaly_l4.py +++ /dev/null @@ -1,55 +0,0 @@ -"""SMAP L4 anomaly. - -Code by Karyn Tabor (NASA). - -This code calculates a normalized anomaly with SMAP L4 data -1. Calculate the climatology by averaging 31 days of SMAP L4 daily - data over every year in the record. The 31-day window is - centered around day t. t is the current day-15. -2. Subtract the climatology from the current average to get the anomaly -""" - -import datetime -import ee - - -# pytype:disable=attribute-error - -def anomaly(image): - SMAPL4 = ee.ImageCollection('NASA/SMAP/SPL4SMGP/007_RAW') - current_date = ee.Date(datetime.datetime.now()) - image_date = ee.Date(image.get('system:time_start')) - - # create list of years to calculate climatology - start_year = 2015 # first year of SMAP data - current_year = current_date.get('year') - image_year = image_date.get('year') - years = ee.List.sequence(start_year, current_year).remove(image_year) - - soilmoisture = image.select('sm_surface') - - # create climatologies - # function calculating image of climatology means - def clim(year): - date_object = image_date.update(year=year) - begindate = ee.Date(date_object).advance(-15, 'day') - enddate = ee.Date(date_object).advance(+16, 'day') - annual = SMAPL4.filter(ee.Filter.date( - begindate, enddate)).select('sm_surface').mean() - return annual.rename('soil_moisture_anomaly_15d') - - annualSM = ee.ImageCollection.fromImages(years.map(clim)) - - # calculate average of all years - climSM = annualSM.toBands().reduce(ee.Reducer.mean()) - - # calculate anomaly - SM_anomaly = soilmoisture.subtract(climSM).rename( - 'sm_surface_anomaly') - - return SM_anomaly - - -def apply(image): - computed_anomaly = anomaly(image) - return image.addBands([computed_anomaly]) diff --git a/datasets/smap_convert_l3.py b/datasets/smap_convert_l3.py deleted file mode 100644 index fa6c70df3..000000000 --- a/datasets/smap_convert_l3.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2022 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#This python code opens a single SMAP L3 HDF file and outputs a Geotiff in geographic projection -# -# requires pyl4c toolbox https://pypi.org/project/pyl4c/ -# -# the output bands are as follows: -# 1) soil_moisture AM -# 2) tb_h_corrected AM -# 3) tb_v_corrected AM -# 4) vegetation_water_content AM -# 5) soil_moisture quality flag AM -# 6) tb_h_corrected quality flag AM -# 7) tb_v_corrected quality flag AM -# 8) soil_moisture PM -# 9) tb_h_corrected PM -# 10) tb_v_corrected PM -# 11) vegetation_water_content PM -# 12) soil_moisture quality flag PM -# 13) tb_h_corrected quality flag PM -# 14) tb_v_corrected quality flag PM -# -# where AM is 6:00 AM overpass and PM is 6:00 PM overpass -# quality flag of 0=good quality and flag=1 is bad quality -# -# Convert_SMAPL3_QA_EASEvsHDF5_GeoTiff_9km_AMPM.py -# written by Qing Liu, Arthur Endsley and Karyn Tabor -# last edited November 30, 2022 by Karyn Tabor - - -from absl import app -from osgeo import gdal -import glob -import os -import re -import numpy as np -from pyl4c.lib.modis import dec2bin_unpack -from pyl4c.spatial import array_to_raster -from pyl4c.data.fixtures import EASE2_GRID_PARAMS -from pyl4c.epsg import EPSG - -from google3.third_party.earthengine_community.datasets import smap_lib - - -# For the L3 data, the var list is very specific; the order matters -# for retrieving specific datasets -# 1:4 are data sets; 4 is SM QA; 5:6 is TB QA -VAR_LIST = [ - 'soil_moisture', 'tb_h_corrected', 'tb_v_corrected', - 'vegetation_water_content', 'retrieval_qual_flag', 'tb_qual_flag_h', - 'tb_qual_flag_v' -] - - -def SMAP_retrievalQC_fail(x): - """Returns pass/fail for QC flags. - - Returns pass/fail for QC flags based on the SMAP l3 QC protocol for the - - `retrieval_qual_flag` band: Bad pixels have either `1` in the first bit - ("Soil - moisture retrieval doesn't have recommended quality") or 1` in the third bit - (Soil moisture retrieval was not successful"). - Output array is True wherever the array fails QC criteria. Compare to: - - np.vectorize(lambda v: v[0] == 1 or v[3:5] != '00') - - Parameters - ---------- - x : numpy.ndarray - Array where the last axis enumerates the unpacked bits - (ones and zeros) - example: c = dec2bin_unpack(np.array([2], dtype = np.uint8)) - Returns - ------- - numpy.ndarray - Boolean array with True wherever QC criteria are failed - example Returns: array([[0, 0, 0, 0, 0, 0, 1, 0]], dtype=uint8) - """ - y = dec2bin_unpack(x.astype(np.uint8)) - # Emit 1 = FAIL if 1st bit == 1 - # ("Soil moisture retrieval doesn't have recommended quality") - c1 = y[...,7] - # - # Third bit is ==1 "Soil moisture retrieval was not successful" - c2 = y[...,5] - # - # Intermediate arrays are 1 = FAIL, 0 = PASS - return (c1 + c2) > 0 - - -def SMAP_TB_QC_fail(x): - """Returns pass/fail for QC flags. - - Returns pass/fail for QC flags based on the SMAPL3 QC protocol for the - - `tb_qual_flag_v'` and tb_qual_flag_h' layers: Bad pixels have either `1` in - the first bit - ("Observation does not have acceptable quality") - Output array is True wherever the array fails QC criteria. Compare to: - - np.vectorize(lambda v: v[0] == 1 or v[3:5] != '00') - - Parameters - ---------- - x : numpy.ndarray - Array where the last axis enumerates the unpacked bits - (ones and zeros) - - Returns - ------- - numpy.ndarray - Boolean array with True wherever QC criteria are failed - """ - y = dec2bin_unpack(x.astype(np.uint8)) - # Emit 1 = FAIL if 1st bit == 1 ("Data has acceptable quality") - c1 = y[...,7] - # Intermediate arrays are 1 = FAIL, 0 = PASS - return (c1) > 0 - - -def convert(source_h5, target_tif): - """Converts a SMAP L3 HDF file to a geotiff.""" - - # Options for gdal_translate - translate_options = gdal.TranslateOptions( - format='GTiff', - outputSRS='+proj=cea +lon_0=0 +lat_ts=30 +ellps=WGS84 +units=m', - outputBounds=[-17367530.45, 7314540.11, 17367530.45, -7314540.11], - ) - - # array_to_raster params - gt = EASE2_GRID_PARAMS['M09']['geotransform'] - wkt = EPSG[6933] - tif_list =[] - - SMAP_opass = ['AM','PM'] - for i in range(0,2): - print('SMAP overpass is %s' % (SMAP_opass[i])) - if i == 1: - #add '_pm' to all the variables in the list - pass_var_list = [s + '_pm' for s in VAR_LIST] - bnum=8 - else: - pass_var_list = VAR_LIST - bnum = 1 - - # 'bnum' is the number to add to band to number the temp tiff - # band 1 for am and 7 for pm - - # convert individual variables to separate GeoTiff files - for iband in range(0,4): - var = pass_var_list[iband] - sds = smap_lib.gdal_safe_open( - 'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+ - '/'+var) - sds_array = sds.ReadAsArray() - dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif' - sds_gdal = array_to_raster(sds_array,gt,wkt) - ds = gdal.Translate(dst_tmp,sds_gdal,options=translate_options) - ds = None - tif_list.append(dst_tmp) - - # convert individual QA vars to separate GeoTiff files for Soil moisture - iband=4 - var = pass_var_list[iband] - - sds = smap_lib.gdal_safe_open( - 'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+ - '/'+var) - sds_array = sds.ReadAsArray() - dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif' - - #Call to QA function here - qa = SMAP_retrievalQC_fail(sds_array).astype(np.uint8) - qa_gdal = array_to_raster(qa,gt,wkt) - ds = gdal.Translate(dst_tmp,qa_gdal,options=translate_options) - ds = None - tif_list.append(dst_tmp) - - # convert individual QA vars to separate GeoTiff files for TB - - for iband in range(5,7): - var = pass_var_list[iband] - - sds = smap_lib.gdal_safe_open( - 'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+ - '/'+var) - sds_array = sds.ReadAsArray() - dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif' - - #Call to QA function here - qa = SMAP_TB_QC_fail(sds_array).astype(np.uint8) - qa_gdal = array_to_raster(qa,gt,wkt) - ds = gdal.Translate(dst_tmp,qa_gdal,options=translate_options) - ds = None - tif_list.append(dst_tmp) - - - # build a VRT(Virtual Dataset) that includes the list of input tif files - gdal.BuildVRT('/tmp/tmp.vrt', tif_list, options='-separate') - - warp_options = gdal.WarpOptions( - creationOptions=['COMPRESS=LZW'], - srcSRS='EPSG:6933', - dstSRS='EPSG:4326', - xRes=0.08, - yRes=0.08, - srcNodata=-9999, - dstNodata=-9999 - ) - - # run gdal_Warp to reproject the VRT data and save in the target tif file with - # compression - ds = gdal.Warp(target_tif, '/tmp/tmp.vrt', options=warp_options) - ds = None - - # remove temporary files - os.remove('/tmp/tmp.vrt') - for f in tif_list: - if os.path.exists(f): - os.remove(f) - - -def main(argv): - convert(argv[1], argv[2]) - - -if __name__ == '__main__': - app.run(main) diff --git a/datasets/smap_convert_l4.py b/datasets/smap_convert_l4.py deleted file mode 100644 index 1d27cf4f6..000000000 --- a/datasets/smap_convert_l4.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright 2022 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#This python code opens a single SMAP L4 HDF file and outputs a Geotiff in geographic projection -# -# requires pyl4c toolbox https://pypi.org/project/pyl4c/ -# -# the output bands are as follows: -# 1) sm_surface -# 2) sm_rootzone -# 3) sm_profile -# 4) sm_surface_wetness -# 5) sm_rootzone_wetness -# 6) sm_profile_wetness -# 7) surface_temp -# 8) soil_temp_layer1 -# 9) soil_temp_layer2 -# 10) soil_temp_layer3 -# 11) soil_temp_layer4 -# 12) soil_temp_layer5 -# 13) soil_temp_layer6 -# 14) snow_mass -# 15) snow_depth -# 16) land_evapotranspiration_flux -# 17) overland_runoff_flux -# 18) baseflow_flux -# 19) snow_melt_flux -# 20) soil_water_infiltration_flux -# 21) land_fraction_saturated -# 22) land_fraction_unsaturated -# 23) land_fraction_wilting -# 24) land_fraction_snow_covered -# 25) heat_flux_sensible -# 26) heat_flux_latent -# 27) heat_flux_ground -# 28) net_downward_shortwave_flux -# 29) net_downward_longwave_flux -# 30) radiation_shortwave_downward_flux -# 31) radiation_longwave_absorbed_flux -# 32) precipitation_total_surface_flux -# 33) snowfall_surface_flux -# 34) surface_pressure -# 35) height_lowatmmodlay -# 36) temp_lowatmmodlay -# 37) specific_humidity_lowatmmodlay -# 38) windspeed_lowatmmodlay -# 39) vegetation_greenness_fraction -# 40) leaf_area_index -# 41) sm_rootzone_pctl -# 42) sm_profile_pctl -# 43) depth_to_water_table_from_surface_in_peat -# 44) free_surface_water_on_peat_flux -# 45) mwrtm_vegopacity - -# Convert_SMAPL4_EASEvsHDF5_GeoTiff.py -# written by Qing Liu, Aurthur Endsley, and Karyn Tabor -# last edited November 30, 2022 by Karyn Tabor - -from absl import app - -from osgeo import gdal -import glob -import os -import re -import numpy as np -from pyl4c.lib.modis import dec2bin_unpack -from pyl4c.spatial import array_to_raster -from pyl4c.data.fixtures import EASE2_GRID_PARAMS -from pyl4c.epsg import EPSG - -from google3.third_party.earthengine_community.datasets import smap_lib - -# function to convert single EASEv2 h5 file to GeoTiff file with multiple -# variables. - - -VAR_LIST = [ - 'sm_surface', 'sm_rootzone', 'sm_profile', 'sm_surface_wetness', - 'sm_rootzone_wetness', 'sm_profile_wetness', 'surface_temp', - 'soil_temp_layer1', 'soil_temp_layer2', 'soil_temp_layer3', - 'soil_temp_layer4', 'soil_temp_layer5', 'soil_temp_layer6', - 'snow_mass', 'snow_depth', 'land_evapotranspiration_flux', - 'overland_runoff_flux', 'baseflow_flux', 'snow_melt_flux', - 'soil_water_infiltration_flux', 'land_fraction_saturated', - 'land_fraction_unsaturated', 'land_fraction_wilting', - 'land_fraction_snow_covered', 'heat_flux_sensible', - 'heat_flux_latent', 'heat_flux_ground', 'net_downward_shortwave_flux', - 'net_downward_longwave_flux', 'radiation_shortwave_downward_flux', - 'radiation_longwave_absorbed_flux', 'precipitation_total_surface_flux', - 'snowfall_surface_flux', 'surface_pressure', 'height_lowatmmodlay', - 'temp_lowatmmodlay', 'specific_humidity_lowatmmodlay', - 'windspeed_lowatmmodlay', 'vegetation_greenness_fraction', - 'leaf_area_index', 'sm_rootzone_pctl', 'sm_profile_pctl', - 'depth_to_water_table_from_surface_in_peat', - 'free_surface_water_on_peat_flux', 'mwrtm_vegopacity' -] - - -def convert(source_h5, target_tif): - """Converts a SMAP L4 HDF file to a geotiff.""" - - # Options for gdal_translate - translate_options = gdal.TranslateOptions( - format = 'GTiff', - outputSRS = '+proj=cea +lon_0=0 +lat_ts=30 +ellps=WGS84 +units=m', - outputBounds =[-17367530.45, 7314540.11, 17367530.45, -7314540.11], - ) - - #array_to_raster params - gt = EASE2_GRID_PARAMS['M09']['geotransform'] - wkt = EPSG[6933] - - #initiate temp tiff list - tif_list =[] - - # convert individual variables to separate GeoTiff files - sizeoflist = len(VAR_LIST) - for iband in range(0,sizeoflist): - var = VAR_LIST[iband] - sds = smap_lib.gdal_safe_open('HDF5:'+source_h5+'://Geophysical_Data/'+var) - sds_array = sds.ReadAsArray() - dst_tmp = '/tmp/tmp'+str(iband+1)+'.tif' - sds_gdal = array_to_raster(sds_array,gt,wkt) - ds = gdal.Translate(dst_tmp,sds_gdal,options=translate_options) - ds = None - tif_list.append(dst_tmp) - - # build a VRT(Virtual Dataset) that includes the list of input tif files - gdal.BuildVRT('/tmp/tmp.vrt', tif_list, options='-separate') - - warp_options = gdal.WarpOptions( - creationOptions=['COMPRESS=LZW'], - srcSRS='EPSG:6933', - dstSRS='EPSG:4326', - srcNodata=-9999, - dstNodata=-9999 - ) - - # run gdal_Warp to reproject the VRT data and save in the target tif file with - # compression - ds = gdal.Warp(target_tif, '/tmp/tmp.vrt', options=warp_options) - ds = None - - # remove temporary files - os.remove('/tmp/tmp.vrt') - for f in tif_list: - if os.path.exists(f): - os.remove(f) - - -def main(argv): - convert(argv[1], argv[2]) - - -if __name__ == '__main__': - app.run(main) diff --git a/datasets/smap_lib.py b/datasets/smap_lib.py deleted file mode 100644 index cb716a454..000000000 --- a/datasets/smap_lib.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2022 The Google Earth Engine Community Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Helper functions for SMAP conversion.""" - -from osgeo import gdal - - -def gdal_safe_open(path): - result = gdal.Open(path) - if not result: - raise ValueError('GDAL could not open ' + path) - return result -