From f5caa4f7006617b8cd27bef55afe3fab17a14cd8 Mon Sep 17 00:00:00 2001 From: rileyhales Date: Fri, 28 Feb 2020 13:38:42 -0700 Subject: [PATCH] 0.5 geotiffs and __utilities files --- conda.recipes/meta.yaml | 3 +- docs/API Documentation/geotiffs.rst | 9 +++ docs/API Documentation/index.rst | 1 + docs/API Documentation/netcdfs.rst | 7 -- docs/conf.py | 2 +- geomatics/__init__.py | 2 + geomatics/__utilities.py | 31 +++++++++ geomatics/geotiffs.py | 93 +++++++++++++++++++++++++ geomatics/netcdfs.py | 103 ++-------------------------- setup.py | 2 +- 10 files changed, 146 insertions(+), 107 deletions(-) create mode 100644 docs/API Documentation/geotiffs.rst create mode 100644 geomatics/__utilities.py create mode 100644 geomatics/geotiffs.py diff --git a/conda.recipes/meta.yaml b/conda.recipes/meta.yaml index bd56355..4650572 100644 --- a/conda.recipes/meta.yaml +++ b/conda.recipes/meta.yaml @@ -1,6 +1,6 @@ package: name: geomatics - version: 0.4 + version: 0.5 about: author: Riley Hales @@ -30,6 +30,7 @@ requirements: - python-dateutil - numpy - pandas + - pyshp outputs: - name: geomatics diff --git a/docs/API Documentation/geotiffs.rst b/docs/API Documentation/geotiffs.rst new file mode 100644 index 0000000..8638b81 --- /dev/null +++ b/docs/API Documentation/geotiffs.rst @@ -0,0 +1,9 @@ +======== +geotiffs +======== + +Convert other grids to GeoTiffs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: geomatics.geotiffs + :members: convert_netcdf \ No newline at end of file diff --git a/docs/API Documentation/index.rst b/docs/API Documentation/index.rst index 9979f46..8f0645b 100644 --- a/docs/API Documentation/index.rst +++ b/docs/API Documentation/index.rst @@ -7,3 +7,4 @@ API Documentation netcdfs geojsons + geotiffs diff --git a/docs/API Documentation/netcdfs.rst b/docs/API Documentation/netcdfs.rst index 5293e68..0c3e1cf 100644 --- a/docs/API Documentation/netcdfs.rst +++ b/docs/API Documentation/netcdfs.rst @@ -9,10 +9,3 @@ a single timestep. .. automodule:: geomatics.netcdfs :members: point_series, box_series, shp_series - -netCDFs and GeoTIFFs -~~~~~~~~~~~~~~~~~~~~ - -.. automodule:: geomatics.netcdfs - :members: convert_to_geotiff - :noindex: \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index f9ea1ee..b2cd775 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,7 +13,7 @@ author = 'Riley Hales' # The full version, including alpha/beta/rc tags -release = '0.4' +release = '0.5' master_doc = 'index' # -- General configuration --------------------------------------------------- diff --git a/geomatics/__init__.py b/geomatics/__init__.py index c2ca1c7..e5c2024 100644 --- a/geomatics/__init__.py +++ b/geomatics/__init__.py @@ -1,2 +1,4 @@ import geomatics.netcdfs import geomatics.geojsons +import geomatics.geotiffs +import geomatics.__utilities diff --git a/geomatics/__utilities.py b/geomatics/__utilities.py new file mode 100644 index 0000000..6a0ba59 --- /dev/null +++ b/geomatics/__utilities.py @@ -0,0 +1,31 @@ +import os +import glob + + +def path_to_file_list(path, filetype): + if filetype in ('netcdf', 'nc', 'nc4'): + filters = ['*.nc', '*.nc4'] + elif filetype in ('grib', 'grb'): + filters = ['*.grib', '*.grb'] + elif filetype in ('geotiff', 'tiff'): + filters = ['*.geotiff', '*.gtiff', '*.tiff'] + else: + raise ValueError('Unconfigured filter type') + + # check that a valid path was provided + if isinstance(path, str): + if not os.path.exists(path): + raise FileNotFoundError('No files or directory found at this path') + elif os.path.isfile(path): + return [path] + elif os.path.isdir(path): + files = [] + for filter in filters: + files += glob.glob(os.path.join(path, filter)) + if len(files) == 0: + raise FileNotFoundError('No located within this directory') + return files + elif isinstance(path, list): + return path + else: + raise ValueError('Provide an absolute file path to a file or directory of files, or a list of paths') diff --git a/geomatics/geotiffs.py b/geomatics/geotiffs.py new file mode 100644 index 0000000..a9ebe4e --- /dev/null +++ b/geomatics/geotiffs.py @@ -0,0 +1,93 @@ +import os + +import netCDF4 +import rasterio +import numpy as np + +from .__utilities import path_to_file_list + +__all__ = ['convert_netcdf'] + + +def convert_netcdf(path, variable, **kwargs): + """ + Args: + path: Either 1) the absolute path to a directory containing netcdfs named by date or 2) the absolute path to + a single netcdf containing many time values for a specified variable + variable: The name of a variable as it is stored in the netcdf e.g. 'temp' instead of Temperature + + Keyword Args: + xvar: Name of the x coordinate variable used to spatial reference the netcdf array. Default: 'lon' (longitude) + yvar: Name of the y coordinate variable used to spatial reference the netcdf array. Default: 'lat' (latitude) + save_dir: The directory to store the geotiffs to. Default: directory containing the netcdfs. + fill_value: The value used for filling no_data spaces in the array. Default: -9999 + delete_source: Allows you to delete the source netcdfs as they are converted. Default: False + + Returns: + 1. A list of paths to the geotiff files created + 2. A dictionary that contains the affine geotransform information + """ + files = path_to_file_list(path, 'nc') + + # parse the optional argument from the kwargs + x_var = kwargs.get('xvar', 'lon') + y_var = kwargs.get('yvar', 'lat') + save_dir = kwargs.get('save_dir', os.path.dirname(files[0])) + delete_sources = kwargs.get('delete_sources', False) + fill_value = kwargs.get('fill_value', -9999) + + # open the first netcdf and collect georeferencing information + nc_obj = netCDF4.Dataset(files[0], 'r') + lat = nc_obj.variables[x_var][:] + lon = nc_obj.variables[y_var][:] + lon_min = lon.min() + lon_max = lon.max() + lat_min = lat.min() + lat_max = lat.max() + data = nc_obj[variable][:] + data = data[0] + height = data.shape[0] + width = data.shape[1] + nc_obj.close() + + # Geotransform for each of the netcdf files + gt = rasterio.transform.from_bounds(lon_min, lat_min, lon_max, lat_max, width, height) + + # A list of all the files that get written which can be returned + output_files = [] + + # Create a geotiff for each netcdf in the list of files + for file in files: + # set the files to open/save + save_path = os.path.join(save_dir, os.path.basename(file) + '.tif') + output_files.append(save_path) + + # open the netcdf and get the data array + nc_obj = netCDF4.Dataset(file, 'r') + array = np.asarray(nc_obj[variable][:]) + array = array[0] + array[array == fill_value] = np.nan # If you have fill values, change the comparator to git rid of it + array = np.flip(array, axis=0) + nc_obj.close() + + # if you want to delete the source netcdfs as you go + if delete_sources: + os.remove(file) + + # write it to a geotiff + with rasterio.open( + save_path, + 'w', + driver='GTiff', + height=data.shape[0], + width=data.shape[1], + count=1, + dtype=data.dtype, + nodata=np.nan, + crs='+proj=latlong', + transform=gt, + ) as dst: + dst.write(array, 1) + + return output_files, dict( + lon_min=lon_min, lon_max=lon_max, lat_min=lat_min, lat_max=lat_max, height=height, width=width) diff --git a/geomatics/netcdfs.py b/geomatics/netcdfs.py index a7d83cc..b644934 100644 --- a/geomatics/netcdfs.py +++ b/geomatics/netcdfs.py @@ -8,7 +8,9 @@ import rasterio import rasterstats -__all__ = ['point_series', 'box_series', 'shp_series', 'convert_to_geotiff'] +from .__utilities import path_to_file_list + +__all__ = ['point_series', 'box_series', 'shp_series'] def point_series(path, variable, coordinates, filename_pattern=None, **kwargs): @@ -45,7 +47,7 @@ def point_series(path, variable, coordinates, filename_pattern=None, **kwargs): fill_value = kwargs.get('fill_value', -9999) # confirm that a valid path to data was provided - files = __path_to_file_list(path) + files = path_to_file_list(path, 'nc') files.sort() # get a list of the x&y coordinates in the netcdfs using the first file as a reference @@ -128,7 +130,7 @@ def box_series(path, variable, coordinates, filename_pattern=None, **kwargs): stat = kwargs.get('stat_type', 'mean') # confirm that a valid path to data was provided - files = __path_to_file_list(path) + files = path_to_file_list(path, 'nc') files.sort() # get a list of the x&y coordinates using the first file as a reference @@ -239,7 +241,7 @@ def shp_series(path, variable, shp_path, filename_pattern=None, **kwargs): stat = kwargs.get('stat_type', 'mean') # confirm that a valid path to data was provided - files = __path_to_file_list(path) + files = path_to_file_list(path, 'nc') files.sort() # open the netcdf determine the affine transformation of the netcdf grids @@ -307,99 +309,6 @@ def shp_series(path, variable, shp_path, filename_pattern=None, **kwargs): return pd.DataFrame(timeseries[1], columns=['values'], index=timeseries[0]) -def convert_to_geotiff(files, variable, **kwargs): - """ - - Args: - files: - variable: - **kwargs: - - Returns: - - """ - files = __path_to_file_list(files) - - # parse the optional argument from the kwargs - save_dir = kwargs.get('save_dir', os.path.dirname(files[0])) - delete_sources = kwargs.get('delete_sources', False) - fill_value = kwargs.get('fill_value', -9999) - - # open the first netcdf and collect georeferencing information - nc_obj = netCDF4.Dataset(files[0], 'r') - lat = nc_obj.variables['lat'][:] - lon = nc_obj.variables['lon'][:] - lon_min = lon.min() - lon_max = lon.max() - lat_min = lat.min() - lat_max = lat.max() - data = nc_obj[variable][:] - data = data[0] - height = data.shape[0] - width = data.shape[1] - nc_obj.close() - - # Geotransform for each of the netcdf files - gt = rasterio.transform.from_bounds(lon_min, lat_min, lon_max, lat_max, width, height) - - # A list of all the files that get written which can be returned - output_files = [] - - # Create a geotiff for each netcdf in the list of files - for file in files: - # set the files to open/save - save_path = os.path.join(save_dir, os.path.basename(file) + '.tif') - output_files.append(save_path) - - # open the netcdf and get the data array - nc_obj = netCDF4.Dataset(file, 'r') - array = np.asarray(nc_obj[variable][:]) - array = array[0] - array[array == fill_value] = np.nan # If you have fill values, change the comparator to git rid of it - array = np.flip(array, axis=0) - nc_obj.close() - - # if you want to delete the source netcdfs as you go - if delete_sources: - os.remove(file) - - # write it to a geotiff - with rasterio.open( - save_path, - 'w', - driver='GTiff', - height=data.shape[0], - width=data.shape[1], - count=1, - dtype=data.dtype, - nodata=np.nan, - crs='+proj=latlong', - transform=gt, - ) as dst: - dst.write(array, 1) - - return output_files, dict( - lon_min=lon_min, lon_max=lon_max, lat_min=lat_min, lat_max=lat_max, height=height, width=width) - - -def __path_to_file_list(path): - # check that a valid path was provided - if isinstance(path, str): - if os.path.isfile(path): - return [path] - elif os.path.isdir(path): - files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.nc') or f.endswith('.nc4')] - if len(files) == 0: - raise FileNotFoundError('No netcdfs located within this directory') - return files - else: - raise FileNotFoundError('No netcdf file or directory found at this path') - elif isinstance(path, list): - return path - else: - raise ValueError('Provide an absolute file path to a netcdf or directory of netcdf files, or a list of paths') - - def __guess_timedelta(nc_obj, t_var, step=1): units = str(nc_obj[t_var].__dict__['units']) units = units.replace(' ', '').lower() diff --git a/setup.py b/setup.py index febdceb..ef10884 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='geomatics', packages=['geomatics'], - version='0.4', + version='0.5', description='GIS tools developed by Riley Hales for the BYU Hydroinformatics Lab', long_description=long_description, long_description_content_type="text/markdown",