Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
pagecp authored Nov 6, 2020
1 parent 933489b commit 39f10ea
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 1 deletion.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
# subsetnc
# subsetnc

Example of execution:

mkdir results
python subsetnc.py examples/input_files3.txt 30 90 345 30 20160101 20171231
python subsetnc.py examples/input_files3_notime.txt 30 90 345 30
4 changes: 4 additions & 0 deletions examples/input_files.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
http://crd-esgf-drc.ec.gc.ca/thredds/dodsC/esgD_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp585/r1i1p1f1/day/psl/gn/v20190429/psl_day_CanESM5_ssp585_r1i1p1f1_gn_20150101-21001231.nc
http://crd-esgf-drc.ec.gc.ca/thredds/dodsC/esgD_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp585/r1i1p1f1/day/ua/gn/v20190429/ua_day_CanESM5_ssp585_r1i1p1f1_gn_20150101-20201231.nc
http://crd-esgf-drc.ec.gc.ca/thredds/dodsC/esgD_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp585/r1i1p1f1/day/va/gn/v20190429/va_day_CanESM5_ssp585_r1i1p1f1_gn_20150101-20201231.nc
http://crd-esgf-drc.ec.gc.ca/thredds/dodsC/esgD_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp585/r1i1p1f1/day/zg/gn/v20190429/zg_day_CanESM5_ssp585_r1i1p1f1_gn_20150101-20201231.nc
2 changes: 2 additions & 0 deletions examples/input_files_notime.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
http://crd-esgf-drc.ec.gc.ca/thredds/dodsC/esgD_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp585/r1i1p1f1/fx/sftlf/gn/v20190429/sftlf_fx_CanESM5_ssp585_r1i1p1f1_gn.nc
http://crd-esgf-drc.ec.gc.ca/thredds/dodsC/esgD_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp585/r1i1p1f1/fx/orog/gn/v20190429/orog_fx_CanESM5_ssp585_r1i1p1f1_gn.nc
162 changes: 162 additions & 0 deletions subsetnc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import shutil
import sys
import os
import cftime
import numpy as np
import pandas as pd
import xarray as xr
import datetime

input_file = sys.argv[1]
minlat = float(sys.argv[2])
maxlat = float(sys.argv[3])
minlon = float(sys.argv[4])
maxlon = float(sys.argv[5])
if len(sys.argv) > 6 :
period_start_time = sys.argv[6]
period_end_time = sys.argv[7]

year_start = period_start_time[0:4]
month_start = period_start_time[4:6]
day_start = period_start_time[6:8]
starttime = year_start+"-"+month_start+"-"+day_start
start = datetime.datetime(int(year_start), int(month_start), int(day_start))

year_end = period_end_time[0:4]
month_end = period_end_time[4:6]
day_end = period_end_time[6:8]
endtime = year_end+"-"+month_end+"-"+day_end
end = datetime.datetime(int(year_end), int(month_end), int(day_end))

else :
period_start_time = -1
period_end_time = -1

f = open(input_file,"r")
allfiles=f.readlines()

if not( os.path.isdir('results')):
os.mkdir('results')

files = []
for f in allfiles:

f = f.rstrip()
print(f)
fb = f.rsplit('/', 1)[-1]

process = True

if period_start_time == -1 :
dset = xr.open_dataset(f, mask_and_scale=False, decode_coords=True)
fbs = fb.strip('.nc')
outf = fbs + "_subset.nc"
else :
try:
dset = xr.open_dataset(f, chunks={'time': '100MB'}, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True)
except:
dset = xr.open_dataset(f, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True)
year_startf = dset.time.dt.year[0]
month_startf = dset.time.dt.month[0]
day_startf = dset.time.dt.day[0]
startf = datetime.datetime(year_startf, month_startf, day_startf)
year_endf = dset.time.dt.year[-1:]
month_endf = dset.time.dt.month[-1:]
day_endf = dset.time.dt.day[-1:]
endf = datetime.datetime(year_endf, month_endf, day_endf)
if not ((start >= startf and start <= endf) or (end >= startf and end <= endf)) :
process = False
else :
if start > startf :
file_start = start
fstart_year = year_start
fstart_month = month_start
fstart_day = day_start
else :
file_start = startf
fstart_year = year_startf
fstart_month = month_startf
fstart_day = day_startf
if end < endf :
file_end = end
fend_year = year_end
fend_month = month_end
fend_day = day_end
else :
file_end = endf
fend_year = year_endf
fend_month = month_endf
fend_day = day_endf
fbs = fb.strip('.nc')
outf = fbs + "_subset_" + fstart_year + fstart_month + fstart_day + "-" + fend_year + fend_month + fend_day + ".nc"
try:
del dset.attrs['_NCProperties']
except:
pass

if process == True :
if minlon > maxlon or minlon < 0:
if period_start_time == -1 :
dset = dset.sel(lat=slice(minlat,maxlat))
else :
dset = dset.sel(time=slice(starttime,endtime), lat=slice(minlat,maxlat))
else:
if period_start_time == -1 :
dset = dset.sel(lon=slice(minlon,maxlon), lat=slice(minlat,maxlat))
else :
dset = dset.sel(time=slice(starttime,endtime), lon=slice(minlon,maxlon), lat=slice(minlat,maxlat))

print("Saving to: "+"results/"+outf)
dims = dset.dims
dimsf = {k: v for k, v in dims.items() if k.startswith('lat') or k.startswith('lon') or k.startswith('time')}
enc = dict(dimsf)
enc = dict.fromkeys(enc, {'_FillValue': None})

if period_start_time == -1 :
dset.to_netcdf(path="results/"+outf, mode='w', format='NETCDF4', engine='netcdf4', encoding=enc)
else:
files.append("results/"+outf)
dset.to_netcdf(path="results/"+outf, mode='w', format='NETCDF4', unlimited_dims='time', engine='netcdf4', encoding=enc)
tunits = dset.time.encoding['units']
else :
print("Not processing file because time range is outside time period requested.")

dset.close()
del dset

# Reorder longitudes if needed, and subset longitudes in that specific case differently (need to do it on local file for reasonable performance)
if process == True :
if minlon > maxlon or minlon < 0:
print("Subsetting for non-contiguous longitude")
if period_start_time == -1 :
dsetl = xr.open_dataset("results/"+outf, mask_and_scale=False, decode_coords=True)
else :
try:
dsetl = xr.open_dataset("results/"+outf, chunks={'time': '100MB'}, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True);
except:
dsetl = xr.open_dataset("results/"+outf, mask_and_scale=False, decode_coords=True, decode_times=True, use_cftime=True);
saveattrs = dsetl.lon.attrs
dsetl = dsetl.assign_coords(lon=(((dsetl.lon + 180) % 360) - 180)).roll(lon=(dsetl.dims['lon'] // 2), roll_coords=True)
if minlon >= 180:
minlon = minlon - 360
if maxlon >= 180:
maxlon = maxlon - 360
dsetl = dsetl.sel(lon=slice(minlon,maxlon))
dsetl.lon.attrs = saveattrs
if period_start_time == -1 :
dsetl.to_netcdf(path="results/tmp"+outf, mode='w', format='NETCDF4', engine='netcdf4', encoding=enc)
else :
dsetl.time.encoding['units'] = tunits
dsetl.to_netcdf(path="results/tmp"+outf, mode='w', format='NETCDF4', unlimited_dims='time', engine='netcdf4', encoding=enc)
dsetl.close()
del dsetl
os.rename("results/tmp"+outf, "results/"+outf)

# Combine all files into one
#try:
# dsmerged = xr.open_mfdataset(files, chunks={'time': '100MB'}, mask_and_scale=False, decode_coords=True, combine='by_coords')
#except:
# dsmerged = xr.open_mfdataset(files, mask_and_scale=False, decode_coords=True, combine='by_coords')
#print("Merging files into: "+"results/"+outfilenc)
#print(files)
#dsmerged.to_netcdf(path="results/"+outfilenc, mode='w', format='NETCDF4', unlimited_dims='time')

0 comments on commit 39f10ea

Please sign in to comment.