diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 17da89a..ccadf53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,7 +39,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.10", "3.12" ] + python-version: [ "3.12" ] numpy-version: [ "1", "2" ] steps: - uses: actions/checkout@v4 @@ -59,12 +59,16 @@ jobs: - name: Install raw2zarr run: | python -m pip install . --no-deps + - name: Install Test Dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-xdist pytest-cov - name: Version Info run: | - python -c "import raw2zarr; print(xradar.version.version)" + python -c "import raw2zarr; print(raw2zarr.__version__)" - name: Test with pytest run: | - pytest -n auto --dist loadfile --verbose --durations=15 --cov-report xml:coverage_unit.xml --cov=xradar --pyargs tests + pytest -n auto --dist loadfile --verbose --durations=15 --cov-report xml:coverage_unit.xml --cov=raw2zarr --cov-append - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: diff --git a/.gitignore b/.gitignore index 95364d1..2ce81a6 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,7 @@ MANIFEST pip-log.txt pip-delete-this-directory.txt -# Unit test / coverage reports +# Unit tests / coverage reports htmlcov/ .tox/ .nox/ diff --git a/delete.py b/delete.py deleted file mode 100644 index 24ac28d..0000000 --- a/delete.py +++ /dev/null @@ -1,53 +0,0 @@ -import fsspec -import xradar as xd -import pyart -from xarray import DataTree -import xarray as xr -from time import time -import s3fs - - -def main(): - print(xr.__version__) - st = time() - ## S3 bucket connection - URL = "https://js2.jetstream-cloud.org:8001/" - path = f"pythia/radar/erad2024" - fs = s3fs.S3FileSystem(anon=True, client_kwargs=dict(endpoint_url=URL)) - file = s3fs.S3Map(f"{path}/zarr_radar/Guaviare_test.zarr", s3=fs) - - # opening datatree stored in zarr - dtree = xr.backends.api.open_datatree( - file, engine="zarr", consolidated=True, chunks={} - ) - print(f"total time: {time() -st}") - - -def open_dtree(): - st = time() - print(xr.__version__) - path = "/media/alfonso/drive/Alfonso/python/raw2zarr/zarr/Guaviare_V2.zarr" - # path = "/media/alfonso/drive/Alfonso/python/raw2zarr/zarr/nexrad2.zarr" - dt = xr.open_datatree( - path, - engine="zarr", - consolidated=True, - chunks={}, - # group="sweep_0" - ) - print(f"total time: {time() -st}") - - -if __name__ == "__main__": - # open_dtree() - - file = "s3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_124851_V06" - file = "s3://noaa-nexrad-level2/2024/06/29/KILX/KILX20230629_120200_V06" - local_file = fsspec.open_local( - f"simplecache::s3://{file}", - s3={"anon": True}, - filecache={"cache_storage": "."}, - ) - # - # radar = pyart.io.read_nexrad_archive(local_file) - dtree = xd.io.open_nexradlevel2_datatree(local_file) diff --git a/environment.yml b/environment.yml index b49c0ad..efcc06e 100755 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - nodefaults dependencies: - - python>=3.12 + - python=3.12 - mamba - cartopy - fsspec @@ -21,6 +21,11 @@ dependencies: - hvplot - datashader - xarray<2025.0 + - pytest + - pytest-cov + - pytest-doctestplus + - pytest-sugar + - pytest-xdist - pip - pip: - . diff --git a/notebooks/1.Sigmet2Zarr.ipynb b/notebooks/1.Sigmet2Zarr.ipynb index 8d84dd8..eb9dcb7 100755 --- a/notebooks/1.Sigmet2Zarr.ipynb +++ b/notebooks/1.Sigmet2Zarr.ipynb @@ -622,22 +622,16 @@ } ], "source": [ - "import xradar as xd\n", + "from datetime import datetime\n", + "\n", + "import dask.bag as db\n", "import fsspec\n", - "import datatree\n", "import numpy as np\n", - "import pyproj\n", - "import holoviews as hv\n", - "import hvplot\n", - "import hvplot.xarray\n", - "from datetime import datetime\n", - "from wradlib.georef import epsg_to_osr, georeference\n", - "from cartopy import crs as ccrs\n", + "import xradar as xd\n", + "from dask.distributed import Client, LocalCluster\n", + "from sigmet2zarr.task2zarr import dt2zarr2, prepare2append\n", "from sigmet2zarr.utils import batch, create_query, data_accessor, make_dir\n", - "from sigmet2zarr.task2zarr import prepare2append, dt2zarr2\n", - "from xarray.backends.api import open_datatree\n", - "import dask.bag as db\n", - "from dask.distributed import Client, LocalCluster" + "from xarray.backends.api import open_datatree" ] }, { @@ -660,7 +654,7 @@ "v = 2\n", "consolidated = False if v == 3 else True\n", "# Store path to save radar datatree in Zarr format\n", - "zarr_store = f'../zarr/{radar_name}.zarr'\n", + "zarr_store = f\"../zarr/{radar_name}.zarr\"\n", "make_dir(zarr_store)\n", "\n", "# Lest define some dates to convert\n", @@ -686,11 +680,11 @@ "outputs": [], "source": [ "def accessor_wrapper(filename):\n", - " return prepare2append(xd.io.open_iris_datatree(\n", - " data_accessor(filename)),\n", - " append_dim=\"vcp_time\",\n", - " radar_name=\"GUA\"\n", - " )" + " return prepare2append(\n", + " xd.io.open_iris_datatree(data_accessor(filename)),\n", + " append_dim=\"vcp_time\",\n", + " radar_name=\"GUA\",\n", + " )" ] }, { @@ -810,7 +804,9 @@ " query = create_query(date=date_query, radar_site=radar_name)\n", " str_bucket = \"s3://s3-radaresideam/\"\n", " fs = fsspec.filesystem(\"s3\", anon=True)\n", - " radar_files = [f\"s3://{i}\" for i in sorted(fs.glob(f\"{str_bucket}{query}*\"))][550:600]\n", + " radar_files = [f\"s3://{i}\" for i in sorted(fs.glob(f\"{str_bucket}{query}*\"))][\n", + " 550:600\n", + " ]\n", " for files in batch(radar_files, n=12):\n", " bag = db.from_sequence(files, npartitions=len(files)).map(accessor_wrapper)\n", " ls_dtree = bag.compute()\n", @@ -891,11 +887,10 @@ "source": [ "%%time\n", "dt_radar = open_datatree(\n", - " # zarr_store, \n", - " \"../zarr/Guaviare_V2.zarr/\", \n", - "\n", - " engine='zarr',\n", - " chunks={}\n", + " # zarr_store,\n", + " \"../zarr/Guaviare_V2.zarr/\",\n", + " engine=\"zarr\",\n", + " chunks={},\n", ")" ] }, @@ -1421,7 +1416,7 @@ } ], "source": [ - "dt_radar['sweep_0']" + "dt_radar[\"sweep_0\"]" ] }, { @@ -2014,7 +2009,7 @@ } ], "source": [ - "ds_05 = dt_radar['sweep_0'].ds\n", + "ds_05 = dt_radar[\"sweep_0\"].ds\n", "display(ds_05)" ] }, @@ -2186,7 +2181,7 @@ " y=\"y\",\n", " cmap=\"ChaseSpectral\",\n", " clabel=\"Horizontal Reflectivity (dBZ)\",\n", - " title=f'Carimagua Radar',\n", + " title=\"Carimagua Radar\",\n", " clim=(-20, 60),\n", " height=540,\n", " rasterize=True,\n", @@ -2753,7 +2748,7 @@ } ], "source": [ - "ds_sw9 = dt_radar['sweep_9'].ds\n", + "ds_sw9 = dt_radar[\"sweep_9\"].ds\n", "display(ds_sw9)" ] }, @@ -2774,7 +2769,7 @@ ], "source": [ "%%time\n", - "qvp = 10 * np.log10((10 ** (ds_sw9.DBZH / 10)).mean('azimuth'))" + "qvp = 10 * np.log10((10 ** (ds_sw9.DBZH / 10)).mean(\"azimuth\"))" ] }, { @@ -2784,7 +2779,12 @@ "metadata": {}, "outputs": [], "source": [ - "qvp = qvp.assign_coords(range=(qvp.range.values * np.sin(ds_sw9.sweep_fixed_angle.mean(skipna=True).values * np.pi / 180. )))" + "qvp = qvp.assign_coords(\n", + " range=(\n", + " qvp.range.values\n", + " * np.sin(ds_sw9.sweep_fixed_angle.mean(skipna=True).values * np.pi / 180.0)\n", + " )\n", + ")" ] }, { @@ -2815,7 +2815,9 @@ } ], "source": [ - "qvp.sel(range=slice(0, 12000)).plot(x='vcp_time', y='range', cmap='ChaseSpectral', vmin=-10, vmax=50)" + "qvp.sel(range=slice(0, 12000)).plot(\n", + " x=\"vcp_time\", y=\"range\", cmap=\"ChaseSpectral\", vmin=-10, vmax=50\n", + ")" ] }, { diff --git a/notebooks/NexRad2Zarr.ipynb b/notebooks/NexRad2Zarr.ipynb index 9e25ea4..291eaae 100644 --- a/notebooks/NexRad2Zarr.ipynb +++ b/notebooks/NexRad2Zarr.ipynb @@ -634,10 +634,10 @@ } ], "source": [ - "import xarray as xr\n", "import fsspec\n", - "from raw2zarr.dtree_builder import append_parallel\n", - "import hvplot.xarray" + "import xarray as xr\n", + "\n", + "from raw2zarr.dtree_builder import append_parallel" ] }, { @@ -789,7 +789,7 @@ } ], "source": [ - "append_parallel?" + "?append_parallel" ] }, { @@ -885,11 +885,7 @@ ], "source": [ "%%time\n", - "dt_radar = xr.open_datatree(\n", - " zarr_store, \n", - " engine='zarr',\n", - " chunks={}\n", - ")" + "dt_radar = xr.open_datatree(zarr_store, engine=\"zarr\", chunks={})" ] }, { @@ -34022,7 +34018,7 @@ } ], "source": [ - "ds_lowest = dt_radar['VCP-12/sweep_0'].ds\n", + "ds_lowest = dt_radar[\"VCP-12/sweep_0\"].ds\n", "display(ds_lowest)" ] }, @@ -34070,7 +34066,9 @@ } ], "source": [ - "ds_lowest.isel(vcp_time=1).DBZH.plot(x=\"x\", y=\"y\", cmap=\"ChaseSpectral\", vmin=-10, vmax=70)" + "ds_lowest.isel(vcp_time=1).DBZH.plot(\n", + " x=\"x\", y=\"y\", cmap=\"ChaseSpectral\", vmin=-10, vmax=70\n", + ")" ] }, { @@ -34194,7 +34192,7 @@ " y=\"y\",\n", " cmap=\"ChaseSpectral\",\n", " clabel=\"Horizontal Reflectivity (dBZ)\",\n", - " title=f'Carimagua Radar',\n", + " title=\"Carimagua Radar\",\n", " clim=(-20, 60),\n", " height=540,\n", " rasterize=True,\n", diff --git a/notebooks/Poster.ipynb b/notebooks/Poster.ipynb index b452927..1836559 100644 --- a/notebooks/Poster.ipynb +++ b/notebooks/Poster.ipynb @@ -7,12 +7,12 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", "import cartopy.crs as ccrs\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", "import xradar as xd\n", - "from xarray.backends.api import open_datatree\n", - "from dask.distributed import Client, LocalCluster" + "from dask.distributed import LocalCluster\n", + "from xarray.backends.api import open_datatree" ] }, { @@ -22,7 +22,7 @@ "metadata": {}, "outputs": [], "source": [ - "cluster = LocalCluster() " + "cluster = LocalCluster()" ] }, { @@ -42,9 +42,7 @@ "metadata": {}, "outputs": [], "source": [ - "radar_dtree = open_datatree(path_datatree, \n", - " engine='zarr', \n", - " chunks={})" + "radar_dtree = open_datatree(path_datatree, engine=\"zarr\", chunks={})" ] }, { @@ -25757,7 +25755,7 @@ } ], "source": [ - "print(f'radar datatree size: {radar_dtree.nbytes / (1024 ** 3)} GB')" + "print(f\"radar datatree size: {radar_dtree.nbytes / (1024 ** 3)} GB\")" ] }, { @@ -25775,7 +25773,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds_20deg = radar_dtree['sweep_9'].ds" + "ds_20deg = radar_dtree[\"sweep_9\"].ds" ] }, { @@ -25795,10 +25793,14 @@ ], "source": [ "%%time\n", - "qvp = 10 * np.log10((10 ** (ds_20deg.DBZH / 10)).mean('azimuth'))\n", - "qvp = qvp.assign_coords(range=(qvp.range.values / 1000 * \n", - " np.sin(ds_20deg.sweep_fixed_angle.mean(skipna=True).values \n", - " * np.pi / 180. )))" + "qvp = 10 * np.log10((10 ** (ds_20deg.DBZH / 10)).mean(\"azimuth\"))\n", + "qvp = qvp.assign_coords(\n", + " range=(\n", + " qvp.range.values\n", + " / 1000\n", + " * np.sin(ds_20deg.sweep_fixed_angle.mean(skipna=True).values * np.pi / 180.0)\n", + " )\n", + ")" ] }, { @@ -25828,18 +25830,15 @@ ], "source": [ "%%time\n", - "fig, ax = plt.subplots(figsize=(6,4), dpi=150)\n", - "qvp.sel(range=slice(0, 12)).sel(vcp_time='2022-06-04').plot(x='vcp_time', \n", - " y='range', \n", - " cmap='ChaseSpectral', \n", - " vmin=-10, \n", - " vmax=50, \n", - " ax=ax)\n", + "fig, ax = plt.subplots(figsize=(6, 4), dpi=150)\n", + "qvp.sel(range=slice(0, 12)).sel(vcp_time=\"2022-06-04\").plot(\n", + " x=\"vcp_time\", y=\"range\", cmap=\"ChaseSpectral\", vmin=-10, vmax=50, ax=ax\n", + ")\n", "ax.set_ylabel(r\"$Height \\ [km]$\")\n", - "ax.set_xlabel(r\"$Time \\ [UTC]$\");\n", - "ax.set_title(r\"$QVP \\ Guaviare \\ radar$\");\n", + "ax.set_xlabel(r\"$Time \\ [UTC]$\")\n", + "ax.set_title(r\"$QVP \\ Guaviare \\ radar$\")\n", "fig.tight_layout()\n", - "plt.savefig('../results/Guaviare_QVP.svg', bbox_inches='tight')" + "plt.savefig(\"../results/Guaviare_QVP.svg\", bbox_inches=\"tight\")" ] }, { @@ -25857,7 +25856,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds_05deg = radar_dtree['sweep_0'].ds" + "ds_05deg = radar_dtree[\"sweep_0\"].ds" ] }, { @@ -25869,8 +25868,8 @@ "source": [ "def rain_rate(z, a=200, b=1.6, dt=5):\n", " \"\"\"\n", - " Function that computes rain rate from the Marshall & Palmer (1948) relationship. \n", - " This function also converts rain rates into rain depth by accumulating it at a \n", + " Function that computes rain rate from the Marshall & Palmer (1948) relationship.\n", + " This function also converts rain rates into rain depth by accumulating it at a\n", " given time delta\n", " \"\"\"\n", " z_lin = 10 ** (z / 10)\n", @@ -25894,7 +25893,7 @@ ], "source": [ "%%time\n", - "rr_depth = rain_rate(z=ds_05deg.DBZH, a=250, b=1.2).sum('vcp_time').load()" + "rr_depth = rain_rate(z=ds_05deg.DBZH, a=250, b=1.2).sum(\"vcp_time\").load()" ] }, { @@ -25935,20 +25934,24 @@ "proj_crs = xd.georeference.get_crs(ds_05deg)\n", "data_crs = ccrs.Projection(proj_crs)\n", "\n", - "fig, ax = plt.subplots(subplot_kw={'projection': ccrs.PlateCarree()}, dpi=150)\n", - "rr_depth.plot(x='x', y='y', cmap='nipy_spectral',\n", - " cbar_kwargs={'label': r\"$Rain \\ depth \\ [mm]$\", \n", - " 'pad':0.025, \n", - " }, \n", - " ax=ax, \n", - " transform=data_crs, \n", - " vmin=0, \n", - " vmax=300,\n", - " )\n", + "fig, ax = plt.subplots(subplot_kw={\"projection\": ccrs.PlateCarree()}, dpi=150)\n", + "rr_depth.plot(\n", + " x=\"x\",\n", + " y=\"y\",\n", + " cmap=\"nipy_spectral\",\n", + " cbar_kwargs={\n", + " \"label\": r\"$Rain \\ depth \\ [mm]$\",\n", + " \"pad\": 0.025,\n", + " },\n", + " ax=ax,\n", + " transform=data_crs,\n", + " vmin=0,\n", + " vmax=300,\n", + ")\n", "gl = ax.gridlines(draw_labels=True, ls=\"--\", lw=0.5)\n", - "gl.xlabel_style = {'size': 8}\n", - "gl.ylabel_style = {'size': 8}\n", - "ax.set_title(r'$Guaviare \\ Radar \\ QPE$')\n", + "gl.xlabel_style = {\"size\": 8}\n", + "gl.ylabel_style = {\"size\": 8}\n", + "ax.set_title(r\"$Guaviare \\ Radar \\ QPE$\")\n", "gl.top_labels = False\n", "gl.right_labels = False" ] diff --git a/notebooks/RYZHKOV-QVP.ipynb b/notebooks/RYZHKOV-QVP.ipynb index 4dc199f..501173e 100644 --- a/notebooks/RYZHKOV-QVP.ipynb +++ b/notebooks/RYZHKOV-QVP.ipynb @@ -649,17 +649,16 @@ } ], "source": [ - "import os\n", "import gzip\n", + "import os\n", "import tempfile\n", - "import xarray as xr\n", - "import cmweather\n", + "\n", + "import fsspec\n", + "import matplotlib.pyplot as plt\n", "import numpy as np\n", - "import xradar\n", "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import hvplot.xarray\n", - "import fsspec\n", + "import xarray as xr\n", + "import xradar\n", "from dask.distributed import Client, LocalCluster" ] }, @@ -791,7 +790,9 @@ "metadata": {}, "outputs": [], "source": [ - "ds_qvp = dtree[\"/VCP-12/sweep_16\"].ds.sel(vcp_time=slice(\"2011-05-20 10:00\",\"2011-05-20 12:00\"))" + "ds_qvp = dtree[\"/VCP-12/sweep_16\"].ds.sel(\n", + " vcp_time=slice(\"2011-05-20 10:00\", \"2011-05-20 12:00\")\n", + ")" ] }, { @@ -801,16 +802,16 @@ "metadata": {}, "outputs": [], "source": [ - "def compute_qvp(ds: xr.Dataset, var=\"DBZH\")-> xr.DataArray:\n", + "def compute_qvp(ds: xr.Dataset, var=\"DBZH\") -> xr.DataArray:\n", " \"\"\"\n", " Computes a Quasi-Vertical Profile (QVP) from a radar time-series dataset.\n", - " \n", + "\n", " This function averages the specified variable over the azimuthal dimension\n", " to produce a QVP. If the variable is in dBZ (a logarithmic scale), it converts\n", - " the values to linear units before averaging and then converts the result \n", + " the values to linear units before averaging and then converts the result\n", " back to dBZ.\n", " \"\"\"\n", - " units: str = ds[var].attrs['units']\n", + " units: str = ds[var].attrs[\"units\"]\n", " if units.startswith(\"dB\"):\n", " qvp = 10 ** (ds[var] / 10)\n", " qvp = qvp.mean(\"azimuth\", skipna=True)\n", @@ -820,10 +821,15 @@ " qvp = qvp.mean(\"azimuth\", skipna=True)\n", "\n", " # computing heigth dimension\n", - " qvp = qvp.assign_coords({\"range\":(qvp.range.values * \n", - " np.sin(\n", - " ds.sweep_fixed_angle.mean(skipna=True).values * \n", - " np.pi / 180.)) /1000})\n", + " qvp = qvp.assign_coords(\n", + " {\n", + " \"range\": (\n", + " qvp.range.values\n", + " * np.sin(ds.sweep_fixed_angle.mean(skipna=True).values * np.pi / 180.0)\n", + " )\n", + " / 1000\n", + " }\n", + " )\n", "\n", " qvp = qvp.rename(f\"qvp_{var}\")\n", " qvp = qvp.rename({\"range\": \"height\"})\n", @@ -847,7 +853,9 @@ ], "source": [ "%%time\n", - "ds_qvp = dtree[\"/VCP-12/sweep_16\"].ds.sel(vcp_time=slice(\"2011-05-20 09:45\",\"2011-05-20 12:15\"))" + "ds_qvp = dtree[\"/VCP-12/sweep_16\"].ds.sel(\n", + " vcp_time=slice(\"2011-05-20 09:45\", \"2011-05-20 12:15\")\n", + ")" ] }, { @@ -918,16 +926,18 @@ " levels=np.arange(-10, 50, 10), # Contour lines every 10 units\n", " ax=axs[0][0],\n", ")\n", - "axs[0][0].clabel(contour_lines, fmt='%d', inline=True, fontsize=8)\n", + "axs[0][0].clabel(contour_lines, fmt=\"%d\", inline=True, fontsize=8)\n", "\n", "axs[0][0].set_title(r\"$Z$\")\n", "axs[0][0].set_xlabel(\"\")\n", "axs[0][0].set_ylabel(r\"$Height \\ [km]$\")\n", - "axs[0][0].set_ylim(0,7)\n", + "axs[0][0].set_ylim(0, 7)\n", "\n", - "cbar = plt.colorbar(cf, ax=axs[0][0], \n", - " label=r\"$Reflectivity \\ [dBZ]$\", \n", - " )\n", + "cbar = plt.colorbar(\n", + " cf,\n", + " ax=axs[0][0],\n", + " label=r\"$Reflectivity \\ [dBZ]$\",\n", + ")\n", "## ZDR plot\n", "cf1 = qvp_zdr.plot.contourf(\n", " x=\"vcp_time\",\n", @@ -945,15 +955,17 @@ " levels=np.arange(-10, 50, 10), # Contour lines every 10 units\n", " ax=axs[0][1],\n", ")\n", - "axs[0][1].clabel(contour_lines, fmt='%d', inline=True, fontsize=8) \n", + "axs[0][1].clabel(contour_lines, fmt=\"%d\", inline=True, fontsize=8)\n", "\n", "axs[0][1].set_title(r\"$Z_{DR}$\")\n", "axs[0][1].set_xlabel(\"\")\n", "axs[0][1].set_ylabel(r\"\")\n", "\n", - "cbar = plt.colorbar(cf1, ax=axs[0][1], \n", - " label=r\"$Diff. \\ Reflectivity \\ [dB]$\", \n", - " )\n", + "cbar = plt.colorbar(\n", + " cf1,\n", + " ax=axs[0][1],\n", + " label=r\"$Diff. \\ Reflectivity \\ [dB]$\",\n", + ")\n", "\n", "### RHOHV plot\n", "cf2 = qvp_rhohv.plot.contourf(\n", @@ -972,15 +984,17 @@ " levels=np.arange(-10, 50, 10), # Contour lines every 10 units\n", " ax=axs[1][0],\n", ")\n", - "axs[1][0].clabel(contour_lines, fmt='%d', inline=True, fontsize=8)\n", + "axs[1][0].clabel(contour_lines, fmt=\"%d\", inline=True, fontsize=8)\n", "\n", "axs[1][0].set_title(r\"$\\rho _{HV}$\")\n", "axs[1][0].set_ylabel(r\"$Height \\ [km]$\")\n", "axs[1][0].set_xlabel(r\"$Time \\ [UTC]$\")\n", "\n", - "cbar = plt.colorbar(cf2, ax=axs[1][0], \n", - " label=r\"$Cross-Correlation \\ Coef.$\", \n", - " )\n", + "cbar = plt.colorbar(\n", + " cf2,\n", + " ax=axs[1][0],\n", + " label=r\"$Cross-Correlation \\ Coef.$\",\n", + ")\n", "\n", "### PHIDP\n", "cf3 = qvp_phidp.plot.contourf(\n", @@ -990,7 +1004,6 @@ " ax=axs[1][1],\n", " levels=np.arange(0, 110, 5),\n", " add_colorbar=False,\n", - "\n", ")\n", "\n", "contour_lines = qvp_ref.plot.contour(\n", @@ -1000,17 +1013,19 @@ " levels=np.arange(-10, 50, 10), # Contour lines every 10 units\n", " ax=axs[1][1],\n", ")\n", - "axs[1][1].clabel(contour_lines, fmt='%d', inline=True, fontsize=8)\n", + "axs[1][1].clabel(contour_lines, fmt=\"%d\", inline=True, fontsize=8)\n", "\n", "axs[1][1].set_title(r\"$\\theta _{DP}$\")\n", "axs[1][1].set_xlabel(r\"$Time \\ [UTC]$\")\n", "axs[1][1].set_ylabel(r\"\")\n", "\n", - "cbar = plt.colorbar(cf3, ax=axs[1][1], \n", - " label=r\"$Differential \\ Phase \\ [deg]$\", \n", - " )\n", + "cbar = plt.colorbar(\n", + " cf3,\n", + " ax=axs[1][1],\n", + " label=r\"$Differential \\ Phase \\ [deg]$\",\n", + ")\n", "\n", - "fig.tight_layout()\n" + "fig.tight_layout()" ] }, { @@ -1050,7 +1065,7 @@ "\n", " # Step 3: Use xradar to open the temporary file\n", " try:\n", - " data_tree = xradar.io.open_nexradlevel2_datatree(temp_file_path) \n", + " data_tree = xradar.io.open_nexradlevel2_datatree(temp_file_path)\n", " finally:\n", " # Step 4: Clean up the temporary files\n", " os.remove(local_file)\n", @@ -1141,6 +1156,7 @@ "source": [ "import dask.bag as db\n", "\n", + "\n", "def data_extraction(radar_file):\n", " dtree = nexrad_donwload(radar_file)\n", " time_start = pd.to_datetime(dtree.time_coverage_start.item())\n", @@ -1250,16 +1266,18 @@ " levels=np.arange(-10, 50, 10), # Contour lines every 10 units\n", " ax=axs[0][0],\n", ")\n", - "axs[0][0].clabel(contour_lines, fmt='%d', inline=True, fontsize=8)\n", + "axs[0][0].clabel(contour_lines, fmt=\"%d\", inline=True, fontsize=8)\n", "\n", "axs[0][0].set_title(r\"$Z$\")\n", "axs[0][0].set_xlabel(\"\")\n", "axs[0][0].set_ylabel(r\"$Height \\ [km]$\")\n", - "axs[0][0].set_ylim(0,7)\n", + "axs[0][0].set_ylim(0, 7)\n", "\n", - "cbar = plt.colorbar(cf, ax=axs[0][0], \n", - " label=r\"$Reflectivity \\ [dBZ]$\", \n", - " )\n", + "cbar = plt.colorbar(\n", + " cf,\n", + " ax=axs[0][0],\n", + " label=r\"$Reflectivity \\ [dBZ]$\",\n", + ")\n", "## ZDR plot\n", "cf1 = qvp_trad_zdr.plot.contourf(\n", " x=\"vcp_time\",\n", @@ -1277,15 +1295,17 @@ " levels=np.arange(-10, 50, 10), # Contour lines every 10 units\n", " ax=axs[0][1],\n", ")\n", - "axs[0][1].clabel(contour_lines, fmt='%d', inline=True, fontsize=8) \n", + "axs[0][1].clabel(contour_lines, fmt=\"%d\", inline=True, fontsize=8)\n", "\n", "axs[0][1].set_title(r\"$Z_{DR}$\")\n", "axs[0][1].set_xlabel(\"\")\n", "axs[0][1].set_ylabel(r\"\")\n", "\n", - "cbar = plt.colorbar(cf1, ax=axs[0][1], \n", - " label=r\"$Diff. \\ Reflectivity \\ [dB]$\", \n", - " )\n", + "cbar = plt.colorbar(\n", + " cf1,\n", + " ax=axs[0][1],\n", + " label=r\"$Diff. \\ Reflectivity \\ [dB]$\",\n", + ")\n", "\n", "### RHOHV plot\n", "cf2 = qvp_trad_rhohv.plot.contourf(\n", @@ -1304,15 +1324,17 @@ " levels=np.arange(-10, 50, 10), # Contour lines every 10 units\n", " ax=axs[1][0],\n", ")\n", - "axs[1][0].clabel(contour_lines, fmt='%d', inline=True, fontsize=8)\n", + "axs[1][0].clabel(contour_lines, fmt=\"%d\", inline=True, fontsize=8)\n", "\n", "axs[1][0].set_title(r\"$\\rho _{HV}$\")\n", "axs[1][0].set_ylabel(r\"$Height \\ [km]$\")\n", "axs[1][0].set_xlabel(r\"$Time \\ [UTC]$\")\n", "\n", - "cbar = plt.colorbar(cf2, ax=axs[1][0], \n", - " label=r\"$Cross-Correlation \\ Coef.$\", \n", - " )\n", + "cbar = plt.colorbar(\n", + " cf2,\n", + " ax=axs[1][0],\n", + " label=r\"$Cross-Correlation \\ Coef.$\",\n", + ")\n", "\n", "### PHIDP\n", "cf3 = qvp_trad_phidp.plot.contourf(\n", @@ -1322,7 +1344,6 @@ " ax=axs[1][1],\n", " levels=np.arange(0, 110, 5),\n", " add_colorbar=False,\n", - "\n", ")\n", "\n", "contour_lines = qvp_ref.plot.contour(\n", @@ -1332,17 +1353,19 @@ " levels=np.arange(-10, 50, 10), # Contour lines every 10 units\n", " ax=axs[1][1],\n", ")\n", - "axs[1][1].clabel(contour_lines, fmt='%d', inline=True, fontsize=8)\n", + "axs[1][1].clabel(contour_lines, fmt=\"%d\", inline=True, fontsize=8)\n", "\n", "axs[1][1].set_title(r\"$\\theta _{DP}$\")\n", "axs[1][1].set_xlabel(r\"$Time \\ [UTC]$\")\n", "axs[1][1].set_ylabel(r\"\")\n", "\n", - "cbar = plt.colorbar(cf3, ax=axs[1][1], \n", - " label=r\"$Differential \\ Phase \\ [deg]$\", \n", - " )\n", + "cbar = plt.colorbar(\n", + " cf3,\n", + " ax=axs[1][1],\n", + " label=r\"$Differential \\ Phase \\ [deg]$\",\n", + ")\n", "\n", - "fig.tight_layout()\n" + "fig.tight_layout()" ] }, { diff --git a/notebooks/dtree_args.ipynb b/notebooks/dtree_args.ipynb new file mode 100644 index 0000000..2ab2e19 --- /dev/null +++ b/notebooks/dtree_args.ipynb @@ -0,0 +1,1131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "4571f1ba-bd01-489a-860c-558be63e2fb1", + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ebafe38b-0b60-439a-b89b-a6240cad9728", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2024.10.1.dev59+g700191b9'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b3742bd6-e0c9-45bb-9a25-4fc293d632a8", + "metadata": {}, + "outputs": [], + "source": [ + "path = \"../zarr/KVNX.zarr\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4d965cc1-2540-40a3-b981-6a8edf838497", + "metadata": {}, + "outputs": [], + "source": [ + "dt = xr.open_datatree(path, engine=\"zarr\", chunks={})" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3978a759-b958-45f3-9ae9-6eae2a1608af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['VCP-11', 'VCP-12', 'VCP-212', 'VCP-32']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(dt.children)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b3990d30-99cf-4329-a9a1-6f4557da5c11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/VCP-212',\n", + " '/VCP-212/georeferencing_correction',\n", + " '/VCP-212/radar_calibration',\n", + " '/VCP-212/radar_parameters',\n", + " '/VCP-212/sweep_0',\n", + " '/VCP-212/sweep_1',\n", + " '/VCP-212/sweep_10',\n", + " '/VCP-212/sweep_11',\n", + " '/VCP-212/sweep_12',\n", + " '/VCP-212/sweep_13',\n", + " '/VCP-212/sweep_14',\n", + " '/VCP-212/sweep_15',\n", + " '/VCP-212/sweep_16',\n", + " '/VCP-212/sweep_2',\n", + " '/VCP-212/sweep_3',\n", + " '/VCP-212/sweep_4',\n", + " '/VCP-212/sweep_5',\n", + " '/VCP-212/sweep_6',\n", + " '/VCP-212/sweep_7',\n", + " '/VCP-212/sweep_8',\n", + " '/VCP-212/sweep_9']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(dt[\"VCP-212\"].groups)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ae32cb8a-bb9c-46b8-96b8-88eed6c0c21e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'DBZH' (vcp_time: 59, azimuth: 720, range: 1832)> Size: 623MB\n",
+       "dask.array<open_dataset-DBZH, shape=(59, 720, 1832), dtype=float64, chunksize=(1, 180, 458), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * vcp_time   (vcp_time) datetime64[ns] 472B 2011-05-20T19:19:02 ... 2011-05...\n",
+       "    altitude   int64 8B ...\n",
+       "  * azimuth    (azimuth) float64 6kB 0.25 0.75 1.25 1.75 ... 358.8 359.2 359.8\n",
+       "    crs_wkt    int64 8B ...\n",
+       "    elevation  (azimuth) float64 6kB dask.array<chunksize=(720,), meta=np.ndarray>\n",
+       "    latitude   float64 8B ...\n",
+       "    longitude  float64 8B ...\n",
+       "  * range      (range) float32 7kB 2.125e+03 2.375e+03 ... 4.596e+05 4.599e+05\n",
+       "    time       (azimuth) datetime64[ns] 6kB dask.array<chunksize=(720,), meta=np.ndarray>\n",
+       "    x          (azimuth, range) float64 11MB dask.array<chunksize=(180, 458), meta=np.ndarray>\n",
+       "    y          (azimuth, range) float64 11MB dask.array<chunksize=(180, 458), meta=np.ndarray>\n",
+       "    z          (azimuth, range) float64 11MB dask.array<chunksize=(180, 458), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    long_name:      Equivalent reflectivity factor H\n",
+       "    standard_name:  radar_equivalent_reflectivity_factor_h\n",
+       "    units:          dBZ
" + ], + "text/plain": [ + " Size: 623MB\n", + "dask.array\n", + "Coordinates:\n", + " * vcp_time (vcp_time) datetime64[ns] 472B 2011-05-20T19:19:02 ... 2011-05...\n", + " altitude int64 8B ...\n", + " * azimuth (azimuth) float64 6kB 0.25 0.75 1.25 1.75 ... 358.8 359.2 359.8\n", + " crs_wkt int64 8B ...\n", + " elevation (azimuth) float64 6kB dask.array\n", + " latitude float64 8B ...\n", + " longitude float64 8B ...\n", + " * range (range) float32 7kB 2.125e+03 2.375e+03 ... 4.596e+05 4.599e+05\n", + " time (azimuth) datetime64[ns] 6kB dask.array\n", + " x (azimuth, range) float64 11MB dask.array\n", + " y (azimuth, range) float64 11MB dask.array\n", + " z (azimuth, range) float64 11MB dask.array\n", + "Attributes:\n", + " long_name: Equivalent reflectivity factor H\n", + " standard_name: radar_equivalent_reflectivity_factor_h\n", + " units: dBZ" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt[\"VCP-212/sweep_0\"].DBZH" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9f2b2b59-3150-4c0d-88ac-9c82f82fd5bd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34;42m../zarr/KVNX.zarr/\u001b[0m\n", + "├── \u001b[34;42mVCP-11\u001b[0m\n", + "├── \u001b[34;42mVCP-12\u001b[0m\n", + "├── \u001b[34;42mVCP-212\u001b[0m\n", + "└── \u001b[34;42mVCP-32\u001b[0m\n", + "\n", + "5 directories\n" + ] + } + ], + "source": [ + "! tree -d -L 1 ../zarr/KVNX.zarr/" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1205d440-b2c9-48be-9961-0dd6bd742dfc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34;42m../zarr/KVNX.zarr/VCP-12\u001b[0m\n", + "├── \u001b[34;42maltitude\u001b[0m\n", + "├── \u001b[34;42mgeoreferencing_correction\u001b[0m\n", + "├── \u001b[34;42minstrument_type\u001b[0m\n", + "├── \u001b[34;42mlatitude\u001b[0m\n", + "├── \u001b[34;42mlongitude\u001b[0m\n", + "├── \u001b[34;42mplatform_type\u001b[0m\n", + "├── \u001b[34;42mradar_calibration\u001b[0m\n", + "├── \u001b[34;42mradar_parameters\u001b[0m\n", + "├── \u001b[34;42msweep_0\u001b[0m\n", + "├── \u001b[34;42msweep_1\u001b[0m\n", + "├── \u001b[34;42msweep_10\u001b[0m\n", + "├── \u001b[34;42msweep_11\u001b[0m\n", + "├── \u001b[34;42msweep_12\u001b[0m\n", + "├── \u001b[34;42msweep_13\u001b[0m\n", + "├── \u001b[34;42msweep_14\u001b[0m\n", + "├── \u001b[34;42msweep_15\u001b[0m\n", + "├── \u001b[34;42msweep_16\u001b[0m\n", + "├── \u001b[34;42msweep_2\u001b[0m\n", + "├── \u001b[34;42msweep_3\u001b[0m\n", + "├── \u001b[34;42msweep_4\u001b[0m\n", + "├── \u001b[34;42msweep_5\u001b[0m\n", + "├── \u001b[34;42msweep_6\u001b[0m\n", + "├── \u001b[34;42msweep_7\u001b[0m\n", + "├── \u001b[34;42msweep_8\u001b[0m\n", + "├── \u001b[34;42msweep_9\u001b[0m\n", + "├── \u001b[34;42mtime_coverage_end\u001b[0m\n", + "├── \u001b[34;42mtime_coverage_start\u001b[0m\n", + "├── \u001b[34;42mvcp_time\u001b[0m\n", + "└── \u001b[34;42mvolume_number\u001b[0m\n", + "\n", + "30 directories\n" + ] + } + ], + "source": [ + "! tree -d -L 1 ../zarr/KVNX.zarr/VCP-12" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/radar.mp4 b/notebooks/radar.mp4 new file mode 100644 index 0000000..84a365a Binary files /dev/null and b/notebooks/radar.mp4 differ diff --git a/notebooks/radar_dtree.ipynb b/notebooks/radar_dtree.ipynb new file mode 100644 index 0000000..53ae1e3 --- /dev/null +++ b/notebooks/radar_dtree.ipynb @@ -0,0 +1,555 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7ee62b77-744e-4fb9-b060-f9b77a385bca", + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "from xarray.backends.api import open_datatree" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5332900e-f6c6-4271-9089-365a50467fd5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2024.9.1.dev23+g52f13d44'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "40b970e6-4fbf-4495-afe2-9feaf06400f6", + "metadata": {}, + "outputs": [], + "source": [ + "path = \"/media/alfonso/drive/Alfonso/python/raw2zarr/zarr/Guaviare_V2.zarr\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a6867fc2-5c17-4f33-9eb2-5b171c96c41c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 3.95 s, sys: 654 ms, total: 4.61 s\n", + "Wall time: 5.99 s\n" + ] + } + ], + "source": [ + "%%time\n", + "dt = open_datatree(path, engine=\"zarr\", consolidated=\"True\", chunks={})" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ee19ec44-4dd8-42a9-a0b5-b3655a814a60", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "size in GB 105.6224237754941\n" + ] + } + ], + "source": [ + "print(f\"size in GB {dt.nbytes / 1024**3}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0bce8cf2-5ec8-44af-81ac-062d0c061c33", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'vcp_time' (vcp_time: 847)> Size: 7kB\n",
+       "array(['2022-06-01T00:00:11.780999936', '2022-06-01T00:05:21.983000064',\n",
+       "       '2022-06-01T00:10:31.363000064', ..., '2022-06-04T23:35:11.759000064',\n",
+       "       '2022-06-04T23:40:16.840999936', '2022-06-04T23:45:33.212000000'],\n",
+       "      dtype='datetime64[ns]')\n",
+       "Coordinates:\n",
+       "    altitude   float64 8B ...\n",
+       "    crs_wkt    int64 8B ...\n",
+       "    latitude   float64 8B ...\n",
+       "    longitude  float64 8B ...\n",
+       "  * vcp_time   (vcp_time) datetime64[ns] 7kB 2022-06-01T00:00:11.780999936 .....
" + ], + "text/plain": [ + " Size: 7kB\n", + "array(['2022-06-01T00:00:11.780999936', '2022-06-01T00:05:21.983000064',\n", + " '2022-06-01T00:10:31.363000064', ..., '2022-06-04T23:35:11.759000064',\n", + " '2022-06-04T23:40:16.840999936', '2022-06-04T23:45:33.212000000'],\n", + " dtype='datetime64[ns]')\n", + "Coordinates:\n", + " altitude float64 8B ...\n", + " crs_wkt int64 8B ...\n", + " latitude float64 8B ...\n", + " longitude float64 8B ...\n", + " * vcp_time (vcp_time) datetime64[ns] 7kB 2022-06-01T00:00:11.780999936 ....." + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt[\"sweep_0\"].to_dataset().vcp_time # .sel(vcp_time=\"2022-06-01\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f167978a-cb26-48c7-9f08-bb0bb1cee53e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 212a138..e4d41a6 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,43 @@ description = "Package for convertir Sigmet radar files into zarr format" version = "2025.0.0.1" requires-python = ">=3.9" +[tool.pytest.ini_options] +testpaths = ["tests"] + [tool.setuptools.packages.find] include = ['raw2zarr'] exclude = ["data", "notebooks"] + +[tool.black] +target-version = ["py39"] +line-length = 88 + +[tool.ruff] +target-version = "py39" +builtins = ["ellipsis"] +exclude = [ + ".eggs", + "doc", +] + + +[tool.ruff.lint] +# E402: module level import not at top of file +# E501: line too long - let black worry about that +# E731: do not assign a lambda expression, use a def +ignore = [ + "E402", + "E501", + "E731", +] +select = [ + # Pyflakes + "F", + # Pycodestyle + "E", + "W", + # isort + "I", + # Pyupgrade + "UP", +] diff --git a/raw2zarr/__init__.py b/raw2zarr/__init__.py index 5b50f31..7c18318 100644 --- a/raw2zarr/__init__.py +++ b/raw2zarr/__init__.py @@ -8,15 +8,16 @@ __author__ = """Alfonso Ladino""" __email__ = "alfonso8@illinois.edu" +__version__ = "2025.1.0." from .dtree_builder import ( - datatree_builder, - append_sequential, append_parallel, + append_sequential, + datatree_builder, process_file, ) -from .dtree_io import prepare2read, load_radar_data, _load_file -from .utils import ensure_dimension, fix_angle, batch, dtree_encoding +from .dtree_io import _load_file, load_radar_data, prepare2read +from .utils import batch, dtree_encoding, ensure_dimension, fix_angle from .zarr_writer import dtree2zarr __all__ = [ @@ -28,4 +29,8 @@ "fix_angle", "batch", "dtree_encoding", + "process_file", + "prepare2read", + "_load_file", + "dtree2zarr", ] diff --git a/raw2zarr/create_dataset.py b/raw2zarr/create_dataset.py deleted file mode 100644 index 83a8b62..0000000 --- a/raw2zarr/create_dataset.py +++ /dev/null @@ -1,65 +0,0 @@ -from datetime import datetime - -from sigmet2zarr.utils import ( - create_query, - check_if_exist, - timer_func, - batch, - data_accessor, -) -from sigmet2zarr.task2zarr import prepare2append, dt2zarr2 -import fsspec -import xradar as xd -import dask.bag as db -from dask.distributed import Client, LocalCluster - - -def accessor_wrapper(filename): - return prepare2append( - xd.io.open_iris_datatree(data_accessor(filename)), - append_dim="vcp_time", - radar_name="GUA", - ) - - -@timer_func -def radar_convert(): - cluster = LocalCluster(dashboard_address="127.0.0.1:8785") - client = Client(cluster) - radar_name = "Guaviare" - v = 2 - consolidated = False if v == 3 else True - zarr_store = f"../zarr/{radar_name}_test.zarr" - year, months, days = 2022, range(6, 7), range(1, 5) # Guaviare - # year, months, days = 2022, range(3, 4), range(3, 4) - for month in months: - for day in days: - date_query = datetime(year=year, month=month, day=day) - query = create_query(date=date_query, radar_site=radar_name) - str_bucket = "s3://s3-radaresideam/" - fs = fsspec.filesystem("s3", anon=True) - x - radar_files = [ - f"s3://{i}" for i in sorted(fs.glob(f"{str_bucket}{query}*")) - ][:30] - for files in batch(radar_files, n=12): - bag = db.from_sequence(files, npartitions=len(files)).map( - accessor_wrapper - ) - ls_dtree = bag.compute() - for dtree in ls_dtree: - dt2zarr2( - dtree, - zarr_store=zarr_store, - zarr_version=v, - append_dim="vcp_time", - consolidated=consolidated, - ) - - -def main(): - radar_convert() - - -if __name__ == "__main__": - main() diff --git a/raw2zarr/dtree_builder.py b/raw2zarr/dtree_builder.py index 872cb4a..45e8959 100644 --- a/raw2zarr/dtree_builder.py +++ b/raw2zarr/dtree_builder.py @@ -1,28 +1,27 @@ from __future__ import annotations -from typing import Iterable, List, Union import os +from collections.abc import Iterable -import xarray as xr import pandas as pd -from requests.utils import parse_dict_header +import xarray as xr from xarray import DataTree from xarray.backends.common import _normalize_path # Relative imports from .dtree_io import load_radar_data from .utils import ( - ensure_dimension, - fix_angle, - dtree_encoding, batch, check_adaptative_scannig, + dtree_encoding, + ensure_dimension, + fix_angle, ) from .zarr_writer import dtree2zarr def datatree_builder( - filename_or_obj: Union[str, os.PathLike, Iterable[Union[str, os.PathLike]]], + filename_or_obj: str | os.PathLike | Iterable[str | os.PathLike], engine: str = "iris", append_dim: str = "vcp_time", ) -> DataTree: @@ -229,10 +228,11 @@ def append_parallel( ) """ + import gc from functools import partial + from dask import bag as db from dask.distributed import Client, LocalCluster - import gc cluster = LocalCluster(dashboard_address="127.0.0.1:8785", memory_limit="10GB") client = Client(cluster) @@ -243,7 +243,7 @@ def append_parallel( for radar_files_batch in batch(radar_files, n=batch_size): bag = db.from_sequence(radar_files_batch, npartitions=batch_size).map(pf) - ls_dtree: List[DataTree] = bag.compute() + ls_dtree: list[DataTree] = bag.compute() for dtree in ls_dtree: if dtree: dtree2zarr( diff --git a/raw2zarr/dtree_io.py b/raw2zarr/dtree_io.py index af155fc..69ab59d 100644 --- a/raw2zarr/dtree_io.py +++ b/raw2zarr/dtree_io.py @@ -1,13 +1,14 @@ from __future__ import annotations -import os + import gzip -import tempfile import logging +import os +import tempfile +from typing import Callable -import xradar as xd import fsspec +import xradar as xd from s3fs.core import S3File -from typing import Callable, Dict from xarray import DataTree # Relative imports @@ -17,7 +18,7 @@ SUPPORTED_ENGINES = {"iris", "nexradlevel2"} -ENGINE_REGISTRY: Dict[str, Callable] = {} +ENGINE_REGISTRY: dict[str, Callable] = {} def iris_loader(file: str | bytes | S3File) -> DataTree: @@ -101,9 +102,10 @@ def _decompress(gz_file: str) -> DataTree: DataTree The loaded radar data. """ - with gzip.open(gz_file, "rb") as gz, tempfile.NamedTemporaryFile( - delete=False - ) as temp_file: + with ( + gzip.open(gz_file, "rb") as gz, + tempfile.NamedTemporaryFile(delete=False) as temp_file, + ): temp_file.write(gz.read()) temp_file_path = temp_file.name return temp_file_path diff --git a/raw2zarr/main.py b/raw2zarr/main.py index 65c3c59..3938132 100644 --- a/raw2zarr/main.py +++ b/raw2zarr/main.py @@ -1,8 +1,9 @@ -import fsspec from datetime import datetime -from raw2zarr.dtree_builder import append_sequential, append_parallel -from raw2zarr.utils import timer_func, create_query +import fsspec + +from raw2zarr.dtree_builder import append_sequential +from raw2zarr.utils import create_query, timer_func def get_radar_files(engine): @@ -22,27 +23,41 @@ def get_radar_files(engine): return radar_files, zs, "iris" elif engine == "nexradlevel2": # NEXRAD - zs = "../zarr/nexrad2.zarr" - query = f"2023/06/29/KILX/KILX" + radar = "KVNX" + zs = f"../zarr/{radar}_sail.zarr" + query = f"2012/01/29/{radar}/{radar}" str_bucket = "s3://noaa-nexrad-level2/" radar_files = [f"s3://{i}" for i in sorted(fs.glob(f"{str_bucket}{query}*"))] - radar_files = radar_files[110:200] + radar_files = radar_files return radar_files, zs, "nexradlevel2" @timer_func def main(): + # IRIS Colombia radar_files, zs, engine = get_radar_files("iris") # NEXRAD - # radar_files, zs, engine = get_radar_files('nexradlevel2') - + radar_files, zs, engine = get_radar_files("nexradlevel2") + # radar_fil es = [ + # # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_154426_V06', + # # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_154815_V06', + # # # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_155154_V06', + # # # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_155533_V06', + # # # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_155912_V06', + # # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_155912_V06_MDM', + # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_160251_V06', + # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_160643_V06', + # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_161058_V06', + # 's3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_161526_V06' ### SAIL mode enabled + # ] zarr_format = 2 append_dim = "vcp_time" - # append_sequential(radar_files, zarr_store=zs, append_dim=append_dim, zarr_format=zarr_format) - append_parallel( - radar_files, + # append_parallel( + append_sequential( + radar_files, # [1:4], + # ["s3://noaa-nexrad-level2/2023/06/29/KILX/KILX20230629_124851_V06"], zarr_store=zs, append_dim=append_dim, zarr_format=zarr_format, diff --git a/raw2zarr/task2zarr.py b/raw2zarr/task2zarr.py deleted file mode 100644 index 7956720..0000000 --- a/raw2zarr/task2zarr.py +++ /dev/null @@ -1,199 +0,0 @@ -import zarr -import xradar as xd -import numpy as np -from xarray import full_like, Dataset, DataTree -from zarr.errors import ContainsGroupError -from raw2zarr.utils import ( - data_accessor, - fix_angle, - write_file_radar, - load_toml, - time_encoding, - exp_dim -) - - -def _get_root(dt: DataTree): - groups = [ - i for i in list(dt.groups) if not i.startswith("/sweep") if i not in ["/"] - ] - root = DataTree(data=dt.root.ds, name="root") - for group in groups: - DataTree(data=dt[group].ds, name=group[1:], parent=root) - return root - - -def _fix_sn(ds: Dataset, sw_num: dict[float, int]) -> dict: - sn: float = float(ds["sweep_fixed_angle"].values) - nsn: int = sw_num[sn] - new_sn = full_like(ds.sweep_number, nsn) - new_ds = ds.copy(deep=True) - new_ds["sweep_number"] = new_sn - return new_ds - - -def prepare2append(dt: DataTree, append_dim: str, radar_name: str = "GUA") -> DataTree: - """ - Converts SIGMET radar files into a DataTree structure and prepares it for appending along a specified dimension. - - This function processes a given DataTree of radar data, organizes it by sweep angles, and prepares it for appending - along the specified dimension. It uses configuration files to map radar sweep angles and numbers, and georeferences - the data before appending. - - Parameters - ---------- - dt : DataTree - The DataTree object containing radar data to be processed. - append_dim : str - The dimension along which the data will be appended (e.g., time, elevation). - radar_name : str, optional - The radar name to identify the correct configuration (default is "GUA"). - - Returns - ------- - DataTree - A new DataTree object with all sweeps processed and ready for appending along the specified dimension. - - Notes - ----- - - The function expects a configuration file in TOML format located at "../config/radar.toml", containing - the necessary radar sweep angle and sweep number information. - - Each sweep in the DataTree is georeferenced, and its sweep number is corrected before being organized - into the final DataTree structure. - - Examples - -------- - >>> radar_data = prepare2append(my_datatree, append_dim="time", radar_name="GUA") - >>> # radar_data is now prepared for appending along the time dimension - """ - elev: np.array = np.array( - load_toml("../config/radar.toml")[radar_name]["elevations"] - ) - sw_num: np.array = np.array( - load_toml("../config/radar.toml")[radar_name]["sweep_number"] - ) - swps: dict[float, str] = {j: f"sweep_{idx}" for idx, j in enumerate(elev)} - sw_fix: dict[float, int] = {j: sw_num[idx] for idx, j in enumerate(elev)} - - tree = { - node.path: node.to_dataset() - for node in dt.subtree - if not node.path.startswith("/sweep") - } - tree.update( - { - swps[float(node.sweep_fixed_angle.values)]: fix_angle( - _fix_sn(node, sw_num=sw_fix) - ) - .to_dataset() - .xradar.georeference() - for node in dt.subtree - if node.path.startswith("/sweep") - } - ) - tree = exp_dim(tree, append_dim=append_dim) - return DataTree.from_dict(tree) - - - -def dt2zarr2( - dt: DataTree, - zarr_store: str, - zarr_version: int, - append_dim: str, - consolidated: bool, -) -> None: - """ - Functions to save xradar datatree using zarr format - @param consolidated: Xarray consolidated metadata. Default True - @param append_dim: dimension name where data will be appended. e.g. 'vcp_time' - @param mode: Xarray.to_zarr mode. Options are "w", "w-", "a", "a-", r+", None - @param zarr_version: data can be store in zarr format using version 2 or 3. Default V=2 - @param zarr_store: path to zarr store - @param dt: xradar datatree - @return: None - """ - st: zarr.DirectoryStore = ( - zarr.DirectoryStoreV3(zarr_store) - if zarr_version == 3 - else zarr.DirectoryStore(zarr_store) - ) - - for node in dt.subtree: - ds = node.to_dataset() - group_path = node.path - if group_path.startswith("/sweep"): - encoding = time_encoding(ds, append_dim) - else: - encoding = {} - try: - ds.to_zarr( - store=st, - group=group_path, - mode="w-", - encoding=encoding, - consolidated=consolidated, - ) - except ContainsGroupError: - try: - ds.to_zarr( - store=st, - group=group_path, - mode="a-", - consolidated=consolidated, - append_dim="vcp_time", - ) - except ValueError: - continue - - -def raw2zarr( - file: str, - zarr_store: str, - zarr_version: int = 2, - elevation: list[float] = None, - append_dim: str = "vcp_time", - mode: str = "a", - consolidated: bool = True, - p2c: str = "../results", -) -> None: - """ - Main function to convert sigmet radar files into xradar datatree and save them using zarr format - @param consolidated: Xarray consolidated metadata. Default True - @param p2c: path to write a file where each radar filename will be written once is processed. - @param mode: Xarray.to_zarr mode. Options are "w", "w-", "a", "a-", r+", None - @param append_dim: dimension name where data will be appended. e.g. 'vcp_time' - @param elevation: list of elevation to be converted into zarr. - E.g. [0.5, 1.0, 3]. If None all sweeps within the radar object will be considered - @param zarr_version: data can be store in zarr format using version 2 or 3. Default V=2 - @param zarr_store: path to zarr store - @param file: radar file path - @return: None - """ - dt: DataTree = xd.io.open_iris_datatree(data_accessor(file)) - dtree = prepare2append(dt, append_dim=append_dim) - elevations = [ - np.round(np.median(dtree.children[i].elevation.data), 1) - for i in list(dtree.children) - if i not in ["radar_parameters"] - ] - if not elevation: - dt2zarr2( - dt=dtree, - zarr_store=zarr_store, - zarr_version=zarr_version, - mode=mode, - consolidated=consolidated, - append_dim=append_dim, - ) - write_file_radar(file, p2c) - elif elevation in elevations: - dt2zarr2( - dt=dtree, - zarr_store=zarr_store, - zarr_version=zarr_version, - mode=mode, - consolidated=consolidated, - append_dim=append_dim, - ) - write_file_radar(file, p2c) diff --git a/raw2zarr/utils.py b/raw2zarr/utils.py index 3c3a73a..1a3b7fb 100644 --- a/raw2zarr/utils.py +++ b/raw2zarr/utils.py @@ -1,23 +1,22 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +import bz2 +import gzip import os +from collections.abc import Iterator +from datetime import datetime, timezone +from time import time +from typing import Any -import xarray as xr -import xradar as xd import fsspec import numpy as np -from datetime import datetime, timezone import pandas as pd import tomllib -from time import time -from collections.abc import Iterator -from typing import Any, List +import xarray as xr +import xradar as xd from xarray import DataTree -import gzip -import bz2 -def batch(iterable: List[Any], n: int = 1) -> Iterator[List[Any]]: +def batch(iterable: list[Any], n: int = 1) -> Iterator[list[Any]]: """ Splits a list into consecutive chunks of size `n`. @@ -47,9 +46,9 @@ def batch(iterable: List[Any], n: int = 1) -> Iterator[List[Any]]: >>> list(batch(['a', 'b', 'c', 'd'], n=3)) [['a', 'b', 'c'], ['d']] """ - l = len(iterable) - for ndx in range(0, l, n): - yield iterable[ndx : min(ndx + n, l)] + length = len(iterable) + for ndx in range(0, length, n): + yield iterable[ndx : min(ndx + n, length)] def timer_func(func): @@ -158,7 +157,7 @@ def check_if_exist(file: str, path: str = "../results") -> bool: file_path = f"{path}" file_name = f"{file_path}/{file.split('/')[-2]}_files.txt" try: - with open(file_name, "r", newline="\n") as txt_file: + with open(file_name, newline="\n") as txt_file: lines = txt_file.readlines() txt_file.close() _file = [i for i in lines if i.replace("\n", "") == file] @@ -171,7 +170,7 @@ def check_if_exist(file: str, path: str = "../results") -> bool: return False -def write_file_radar(file: str, path: str = f"../results") -> None: +def write_file_radar(file: str, path: str = "../results") -> None: """ Write a new line with the radar filename converted. This is intended to create a checklist to avoid file reprocessing diff --git a/raw2zarr/zarr_writer.py b/raw2zarr/zarr_writer.py index e69de29..0d9f927 100644 --- a/raw2zarr/zarr_writer.py +++ b/raw2zarr/zarr_writer.py @@ -0,0 +1,76 @@ +from collections.abc import Mapping, MutableMapping +from os import PathLike +from typing import Any, Literal + +from xarray import DataTree +from xarray.core.types import ZarrWriteModes + + +def dtree2zarr( + dt: DataTree, + store: MutableMapping | str | PathLike[str], + mode: ZarrWriteModes = "w-", + encoding: Mapping[str, Any] | None = None, + consolidated: bool = True, + group: str | None = None, + write_inherited_coords: bool = False, + compute: Literal[True] = True, + **kwargs, +): + """This function creates an appropriate datastore for writing a datatree + to a zarr store. + + See `DataTree.to_zarr` for full API docs. + """ + + from zarr import consolidate_metadata + + if group is not None: + raise NotImplementedError( + "specifying a root group for the tree has not been implemented" + ) + + if not compute: + raise NotImplementedError("compute=False has not been implemented yet") + + if encoding is None: + encoding = {} + + # In the future, we may want to expand this check to insure all the provided encoding + # options are valid. For now, this simply checks that all provided encoding keys are + # groups in the datatree. + if set(encoding) - set(dt.groups): + raise ValueError( + f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}" + ) + append_dim = kwargs.pop("append_dim", None) + for node in dt.subtree: + at_root = node is dt + if node.is_empty | node.is_root: + continue + ds = node.to_dataset(inherit=write_inherited_coords or at_root) + group_path = None if at_root else "/" + node.relative_to(dt) + try: + ds.to_zarr( + store, + group=group_path, + mode=mode, + consolidated=False, + append_dim=append_dim, + **kwargs, + ) + except ValueError: + ds.to_zarr( + store, + group=group_path, + mode="a-", + encoding=encoding.get(node.path), + consolidated=False, + **kwargs, + ) + + if "w" in mode: + mode = "a" + + if consolidated: + consolidate_metadata(store) diff --git a/setup.py b/setup.py index e1ab6a3..29ac38b 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup setup( name="raw2zarr", diff --git a/test/__init__.py b/tests/__init__.py similarity index 78% rename from test/__init__.py rename to tests/__init__.py index f4a2416..09460a5 100644 --- a/test/__init__.py +++ b/tests/__init__.py @@ -2,4 +2,4 @@ # Copyright (c) 2025, developed by Alfonso Ladino-Rincon. # Distributed under the MIT License. See LICENSE for more info. -"""Unit test package for raw2zarr.""" +"""Unit tests package for raw2zarr.""" diff --git a/test/conftest.py b/tests/conftest.py similarity index 95% rename from test/conftest.py rename to tests/conftest.py index 77f09be..6fe7e94 100644 --- a/test/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,9 @@ -import pytest -import fsspec import gzip import tempfile +import fsspec +import pytest + @pytest.fixture(scope="session") def nexrad_local_str_file(tmp_path_factory): @@ -14,9 +15,10 @@ def nexrad_local_str_file(tmp_path_factory): with fsspec.open(aws_url, anon=True) as s3_file: with open(local_path, "wb") as local_file: local_file.write(s3_file.read()) - with gzip.open(str(local_path), "rb") as gz, tempfile.NamedTemporaryFile( - delete=False - ) as temp_file: + with ( + gzip.open(str(local_path), "rb") as gz, + tempfile.NamedTemporaryFile(delete=False) as temp_file, + ): temp_file.write(gz.read()) temp_file_path = temp_file.name return temp_file_path diff --git a/test/tests_dtree_io.py b/tests/test_dtree_io.py similarity index 95% rename from test/tests_dtree_io.py rename to tests/test_dtree_io.py index 5b0a142..5be8c21 100644 --- a/test/tests_dtree_io.py +++ b/tests/test_dtree_io.py @@ -1,7 +1,8 @@ import pytest -from raw2zarr.dtree_io import iris_loader, nexradlevel2_loader from xarray import DataTree +from raw2zarr.dtree_io import nexradlevel2_loader + @pytest.mark.parametrize( "nexradlevel2_files", diff --git a/upload.sh b/upload.sh new file mode 100644 index 0000000..71eba40 --- /dev/null +++ b/upload.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Set your source directory and destination bucket +SOURCE_DIR="/media/alfonso/drive/Alfonso/python/raw2zarr/zarr/Guaviare_test.zarr" +DEST_BUCKET="sj://dtree-zarr/Guaviare.zarr" + +# Function to recursively upload files +upload_files() { + for file in "$1"/*; do + if [ -d "$file" ]; then + # If it's a directory, recursively upload + upload_files "$file" + else + # If it's a file, upload it + uplink cp "$file" "$DEST_BUCKET/${file#$SOURCE_DIR/}" + fi + done +} + +# Call the function to start uploading +upload_files "$SOURCE_DIR"