Skip to content

Commit

Permalink
change QC test success storage system
Browse files Browse the repository at this point in the history
- The summary logs from the script are now optional
- The QC status files are now stored not in the "logs_and_status_files" folder, but in a separate directory called "QC" in the same hierarchical position as the directories for the different product levels of JOANNE
  • Loading branch information
Geet-George committed May 10, 2021
1 parent 30c6ace commit 1c5faa0
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 115 deletions.
135 changes: 89 additions & 46 deletions joanne/Level_2/QC.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
import warnings
from importlib import reload
import argparse

import matplotlib.pyplot as plt
import numpy as np
Expand All @@ -25,65 +26,107 @@
)

# %%
###------ Platform Name ------###
parser = argparse.ArgumentParser(
description="This script takes .QC or .PQC files processed by ASPEN and runs the JOANNE QC tests on them. It outputs a status file that provides the success of the provided soundings in the JOANNE QC tests. There is an additional option to print out the sonde IDs which failed to detect a launch, along with a summary of the soundings' performance in the QC test by setting the argument 'logs' to True"
)

parser.add_argument(
"-l",
"--logs",
help="Set this as True if a summary of launch detect failures is to be printed along with a summary of soundings' performance in JOANNE QC tests. This is set as False by default.",
type=str,
)

args = parser.parse_args()

# %%

logs_directory = "/Users/geet/Documents/JOANNE/Data/Level_2/logs_and_stats/"
# directory to store logs and stats
data_directory = "/Users/geet/Documents/JOANNE/Data/"
save_directory = "/Users/geet/Documents/JOANNE/Data/QC/"
# %%


def run_qc(
logs_directory=logs_directory,
data_directory=data_directory,
save_directory=save_directory,
):
###------ Platform Name ------###

for Platform in ["P3", "HALO"]:

print(f"{Platform} running now...")

directory = f"{data_directory}Level_1/{Platform}/"
# directory where all sonde files are present

a_dir = f"{data_directory}Level_0/{Platform}/All_A_files/"
# directory where all the A files are present

status_ds = f2.get_status_ds_for_platform(Platform, save_dir=save_directory)

for Platform in ["P3", "HALO"]:
return print("JOANNE QC tests finished running")

print(f"{Platform} running now...")

directory = "/Users/geet/Documents/JOANNE/Data/Level_1/" + Platform + "/"
# directory where all sonde files are present
def create_QC_summary_logs(
logs_directory=logs_directory, data_directory=data_directory,save_directory=save_directory,
):

a_dir = "/Users/geet/Documents/JOANNE/Data/Level_0/" + Platform + "/All_A_files/"
# directory where all the A files are present
for Platform in ["P3", "HALO"]:

logs_directory = "/Users/geet/Documents/JOANNE/Data/Level_2/logs_and_stats/"
# directory to store logs and stats
file = open(
f"{logs_directory}summary_logs_{Platform}_v{joanne.__version__}.txt", "w",
)

status_ds = f2.get_status_ds_for_platform(Platform)
status_ds = f2.get_status_ds_for_platform(Platform, save_dir=save_directory)

file = open(
f"{logs_directory}no_launch_detect_logs_{Platform}_v{joanne.__version__}.txt",
"a",
)
status_dict = {}

status_dict = {}
for x in ["sat", "low", "qc_flag"]:

for x in ["sat", "low", "qc_flag"]:
for y in ["GOOD", "UGLY", "BAD"]:

for y in ["GOOD", "UGLY", "BAD"]:
if x != "qc_flag":
status_dict[f"{y}_{x}"] = len(
status_ds.where(
status_ds[f"{x}_test"] == y, drop=True
).launch_time
)
else:
status_dict[f"{y}_{x}"] = len(
status_ds.where(status_ds[x] == y, drop=True).launch_time
)

if x != "qc_flag":
status_dict[f"{y}_{x}"] = len(
status_ds.where(status_ds[f"{x}_test"] == y, drop=True).launch_time
)
else:
status_dict[f"{y}_{x}"] = len(
status_ds.where(status_ds[x] == y, drop=True).launch_time
)
file.write("----------------------------------------------\n")
file.write(
f"As per the sat_test tests,\n{status_dict['GOOD_sat']} are good sondes,\n"
)
file.write(
f"{status_dict['BAD_sat']} are bad sondes\nand {status_dict['UGLY_sat']} are ugly sondes.\n"
)
file.write("----------------------------------------------\n")
file.write(
f"As per the low_test tests,\n{status_dict['GOOD_low']} are good sondes,\n"
)
file.write(
f"{status_dict['BAD_low']} are bad sondes\nand {status_dict['UGLY_low']} are ugly sondes.\n"
)
file.write("----------------------------------------------\n")
file.write(f"There are a total of {len(status_ds.launch_time)} sondes\n")
file.write(f"out of which {status_dict['GOOD_qc_flag']} are good sondes,\n")
file.write(
f"{status_dict['BAD_qc_flag']} are bad sondes\nand {status_dict['UGLY_qc_flag']} are ugly sondes that can be salvaged with some effort.\n"
)

file.write("----------------------------------------------\n")
file.write(
f"As per the sat_test tests,\n{status_dict['GOOD_sat']} are good sondes,\n"
)
file.write(
f"{status_dict['BAD_sat']} are bad sondes\nand {status_dict['UGLY_sat']} are ugly sondes.\n"
)
file.write("----------------------------------------------\n")
file.write(
f"As per the low_test tests,\n{status_dict['GOOD_low']} are good sondes,\n"
)
file.write(
f"{status_dict['BAD_low']} are bad sondes\nand {status_dict['UGLY_low']} are ugly sondes.\n"
)
file.write("----------------------------------------------\n")
file.write(f"There are a total of {len(status_ds.launch_time)} sondes\n")
file.write(f"out of which {status_dict['GOOD_qc_flag']} are good sondes,\n")
file.write(
f"{status_dict['BAD_qc_flag']} are bad sondes\nand {status_dict['UGLY_qc_flag']} are ugly sondes that can be salvaged with some effort.\n"
)
file.close()

file.close()
return print("JOANNE QC summary logs finished printing")


if __name__ == "__main__":
run_qc()
if args.logs:
create_QC_summary_logs()
# %%
162 changes: 93 additions & 69 deletions joanne/Level_2/fn_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import glob
import sys
import warnings

import os
from importlib import reload

# import matplotlib.pyplot as plt
Expand Down Expand Up @@ -34,7 +34,7 @@ def get_all_sondes_list(Platform):
a_dir = "/Users/geet/Documents/JOANNE/Data/Level_0/" + Platform + "/All_A_files/"
# directory where all the A files are present

logs_directory = "/Users/geet/Documents/JOANNE/Data/Level_2/logs_and_stats/"
logs_directory = "/Users/geet/Documents/JOANNE/Data/QC/"
# directory to store logs and stats

sonde_paths = sorted(glob.glob(directory + "*QC.nc"))
Expand Down Expand Up @@ -84,7 +84,7 @@ def get_var_count_sums(list_nc):
return list_of_variables, s_time, s_t, s_rh, s_p, s_z, s_u, s_v, s_alt


def get_ld_flag_from_a_files(a_dir, a_files, logs_directory, Platform, logs=True):
def get_ld_flag_from_a_files(a_dir, a_files, logs_directory, Platform, logs=False):

a_filepaths = []
# list to store individual file paths for all A files
Expand All @@ -95,11 +95,12 @@ def get_ld_flag_from_a_files(a_dir, a_files, logs_directory, Platform, logs=True
ld_FLAG = np.full(len(a_files), np.nan)
# array to store ld_FLAG values

# create and start writing a log file which will store sonde info about sondes with failed launch detection
file = open(
f"{logs_directory}no_launch_detect_logs_{Platform}_v{joanne.__version__}.txt",
"w",
)
if logs:
# create and start writing a log file which will store sonde info about sondes with failed launch detection
file = open(
f"{logs_directory}no_launch_detect_logs_{Platform}_v{joanne.__version__}.txt",
"w",
)

g = 0
# counter of failed sondes
Expand Down Expand Up @@ -144,21 +145,25 @@ def get_ld_flag_from_a_files(a_dir, a_files, logs_directory, Platform, logs=True
if a == 0: # if value is 0, then the launch detection failed
ld_FLAG[id_] = False
g += 1
for line in lines:
if "Sonde ID/Type/Rev" in line:
# storing the sonde ID information and relevant details to the log file we created
file.write(line)
if "START Time:" in line:
# storing the sonde start time to the log file we created
file.write(line)
# line breaker in our log file as a break between two file records
file.write("------------------------------------------\n")
break
if logs:
for line in lines:
if "Sonde ID/Type/Rev" in line:
# storing the sonde ID information and relevant details to the log file we created
file.write(line)
if "START Time:" in line:
# storing the sonde start time to the log file we created
file.write(line)
# line breaker in our log file as a break between two file records
file.write(
"------------------------------------------\n"
)
break
else:
ld_FLAG[id_] = True

file.write(f"In total, there were {g} sondes that didn't detect a launch.\n")
# writing summary of failed sondes to the log file
if logs:
file.write(f"In total, there were {g} sondes that didn't detect a launch.\n")
# writing summary of failed sondes to the log file

return ld_FLAG

Expand Down Expand Up @@ -759,7 +764,7 @@ def rename_vars(ds):
return ds.rename(rename_dict)


def get_status_ds_for_platform(Platform):
def get_status_ds_for_platform(Platform, save_dir):

(
sonde_ds,
Expand All @@ -771,60 +776,79 @@ def get_status_ds_for_platform(Platform):
sonde_paths,
) = get_all_sondes_list(Platform)

# Retrieving all non NaN index sums in to a list for all sondes
list_nc = list(map(get_total_non_nan_indices, sonde_ds))
if os.path.exists(save_dir):
pass
else:
os.makedirs(save_dir)

launch_time = [None] * len(sonde_ds)
to_save_ds_filename = (
f"{save_dir}Status_of_sondes_{Platform}_v{joanne.__version__}.nc"
)

for i in range(len(sonde_ds)):
launch_time[i] = sonde_ds[i].launch_time.values
if os.path.exists(to_save_ds_filename):

(
list_of_variables,
s_time,
s_t,
s_rh,
s_p,
s_z,
s_u,
s_v,
s_alt,
) = get_var_count_sums(list_nc)

ld_FLAG = get_ld_flag_from_a_files(a_dir, a_files, logs_directory, Platform)

status_ds = init_status_ds(
list_of_variables,
s_time,
s_t,
s_rh,
s_p,
s_z,
s_u,
s_v,
s_alt,
ld_FLAG,
file_time,
)
print(f"Status file for {Platform} of the current version exists.")

status_ds, ind_flag_vars = add_ind_flags_to_statusds(status_ds, list_of_variables)
status_ds, srf_flag_vars = add_srf_flags_to_statusds(status_ds, sonde_paths)
status_ds, ind_FLAG = get_the_ind_FLAG_to_statusds(status_ds, ind_flag_vars)
status_ds, srf_FLAG = get_the_srf_FLAG_to_statusds(status_ds, srf_flag_vars)
status_ds = get_the_FLAG(status_ds, ind_FLAG, srf_FLAG)
status_ds["launch_time"] = (["time"], pd.DatetimeIndex(launch_time))
status_ds = add_sonde_id_to_status_ds(Platform, sonde_ds, status_ds)

to_save_ds = (
status_ds.swap_dims({"time": "sonde_id"}).reset_coords("time", drop=True)
# .sortby("launch_time")
)
to_save_ds = xr.open_dataset(to_save_ds_filename)

to_save_ds = rename_vars(to_save_ds)
else:

to_save_ds.to_netcdf(
f"{logs_directory}Status_of_sondes_{Platform}_v{joanne.__version__}.nc"
)
# Retrieving all non NaN index sums in to a list for all sondes
list_nc = list(map(get_total_non_nan_indices, sonde_ds))

launch_time = [None] * len(sonde_ds)

for i in range(len(sonde_ds)):
launch_time[i] = sonde_ds[i].launch_time.values

(
list_of_variables,
s_time,
s_t,
s_rh,
s_p,
s_z,
s_u,
s_v,
s_alt,
) = get_var_count_sums(list_nc)

ld_FLAG = get_ld_flag_from_a_files(a_dir, a_files, logs_directory, Platform)

status_ds = init_status_ds(
list_of_variables,
s_time,
s_t,
s_rh,
s_p,
s_z,
s_u,
s_v,
s_alt,
ld_FLAG,
file_time,
)

status_ds, ind_flag_vars = add_ind_flags_to_statusds(
status_ds, list_of_variables
)
status_ds, srf_flag_vars = add_srf_flags_to_statusds(status_ds, sonde_paths)
status_ds, ind_FLAG = get_the_ind_FLAG_to_statusds(status_ds, ind_flag_vars)
status_ds, srf_FLAG = get_the_srf_FLAG_to_statusds(status_ds, srf_flag_vars)
status_ds = get_the_FLAG(status_ds, ind_FLAG, srf_FLAG)
status_ds["launch_time"] = (["time"], pd.DatetimeIndex(launch_time))
status_ds = add_sonde_id_to_status_ds(Platform, sonde_ds, status_ds)

to_save_ds = (
status_ds.swap_dims({"time": "sonde_id"}).reset_coords("time", drop=True)
# .sortby("launch_time")
)

to_save_ds = rename_vars(to_save_ds)

to_save_ds.to_netcdf(
f"{save_dir}Status_of_sondes_{Platform}_v{joanne.__version__}.nc"
)

return to_save_ds

Expand Down

0 comments on commit 1c5faa0

Please sign in to comment.