-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathiterate_call_ATL08_h5ToShp.py
135 lines (101 loc) · 4.96 KB
/
iterate_call_ATL08_h5ToShp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# -*- coding: utf-8 -*-
"""
iterate_call_ATL08_h5toShp.py
Created on Wed Mar 18 22:06:12 2020
Ierate through directory and call
ATL08_h5toShp.py
Added 5/11 - ACTUALLY this is happening in ATL08_h5ToShp.py:
Use update GDB function in 3DSI_zonalStats.py to create big GDB
If GDB does not work, write to GPKG then convert to GDB
NEW 5/15 -
Adding argparse, and adding option to run in parallel (default False)
utilizing all of the CPUs on nodes by running ATL08_h5ToShp.py
with GNU parallel
Also moving logging to h5ToShp.py so overwriting not an issue
Adding parameter in h5ToShp.py to update an output GDB or not
If not running in parallel, supply output GDB
If running in parallel, do not supply output GDB
Will instead build list of output shapefiles after parallel is
done running here and update gdb using list
NEW 5/27 -
Adding option to specify na or eu. The former is default, and will be
used in output naming, and sent to ATL08 h5 to shp code
"""
import os, sys
import platform
import time
import argparse
#import glob
from FeatureClass import FeatureClass
# Set variables that should more or less stay the same (do not depend on input):
runScript = '/home/mwooten3/code/HRSI/ATL08_h5ToShp.py'
def main(args):
# Unpack arguments
listRange = args['range']
runPar = args['parallel']
cont = args['continent'] # eu or na (default na)
# Set variables that should more or less stay the same (but depend on input)
fileList = '/att/gpfsfs/briskfs01/ppl/mwooten3/3DSI/ATL08/ls_ATL08_{}_v3.txt'.format(cont)
flightShpDir = '/att/gpfsfs/briskfs01/ppl/mwooten3/3DSI/ATL08/{}/flight_shps'.format(cont)
outGdb = '/att/gpfsfs/briskfs01/ppl/mwooten3/3DSI/ATL08/{}/ATL08_{}_v3__{}.gdb'.format(cont, cont, platform.node())
print "\nBEGIN: {}".format(time.strftime("%m-%d-%y %I:%M:%S"))
with open (fileList, 'r') as l:
h5Files = [x.strip('\r\n') for x in l.readlines()]
# Check inputs:
# 1. Check that continent is na or eu
if cont != 'na' and cont != 'eu':
raise RuntimeError("Continent must be 'na' or 'eu'")
# 2. Check that range is supplied correctly
try:
listRange = listRange.split('-')
S, E = listRange
except:
raise RuntimeError("Range must be supplied like: 1-20")
# Get list using range
h5Files = h5Files[ int(S)-1 : int(E) ]
# Get list of output shapefiles using input h5 files
shps = [os.path.join(flightShpDir,
os.path.basename(i).replace('.h5', '.shp')) for i in h5Files]
""" Exploring possible duplicates issue
import collection
print [item for item, count in collections.Counter(h5Files).items() if count > 1]
sys.exit()
"""
# Run in parallel
if runPar:
# Prepare inputs for parallel call:
call = "lscpu | awk '/^Socket.s.:/ {sockets=$NF} END {print sockets}'"
ncpu = os.popen(call).read().strip()
ncpu = int(ncpu) - 1 # all CPUs minus 1
parList = ' '.join(h5Files)
print "\nProcessing {} .h5 files in parallel...\n".format(len(h5Files))
# Do not supply output GDB, do supply continent
parCall = '{} -i '.format(runScript) + '{1} -continent {2}'
cmd = "parallel --progress -j {} --delay 1 '{}' ::: {} ::: {}".format(ncpu, parCall, parList, cont)
os.system(cmd)
# And update node-specific GDB
print "\n\nCreating {} with completed shapefiles ({})...".format(outGdb, time.strftime("%m-%d-%y %I:%M:%S"))
for shp in shps:
if os.path.isfile(shp):
fc = FeatureClass(shp)
fc.addToFeatureClass(outGdb)
# Do not run in parallel
else:
c = 0
for h5 in h5Files:
c += 1
print "\nProcessing {} of {}...".format(c, len(h5Files))
# Call script one at a time and supply node-specific output GDB and continent
cmd = 'python {} -i {} -gdb {} -continent {}'.format(runScript, h5, outGdb, cont)
os.system(cmd)
#o = os.popen(call).read()
print "\nEND: {}\n".format(time.strftime("%m-%d-%y %I:%M:%S"))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-r", "--range", type=str, required=True,
help="Range for stack iteration (i.e. 1-20)")
parser.add_argument("-par", "--parallel", action='store_true', help="Run in parallel")
parser.add_argument("-continent", "--continent", type=str, required=False,
default = 'na', help="Continent (na or eu)")
args = vars(parser.parse_args())
main(args)