Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I/O: Split OutputManager creation from setup #3005

Merged
merged 10 commits into from
Oct 18, 2024
135 changes: 90 additions & 45 deletions components/eamxx/src/control/atmosphere_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,56 +677,44 @@ void AtmosphereDriver::create_fields()
m_atm_logger->info("[EAMxx] create_fields ... done!");
}

void AtmosphereDriver::initialize_output_managers () {
m_atm_logger->info("[EAMxx] initialize_output_managers ...");
void AtmosphereDriver::create_output_managers () {
m_atm_logger->info("[EAMxx] create_output_managers ...");
start_timer("EAMxx::init");
start_timer("EAMxx::initialize_output_managers");
start_timer("EAMxx::create_output_managers");

check_ad_status (s_comm_set | s_params_set | s_grids_created | s_fields_created);
check_ad_status (s_comm_set | s_params_set | s_ts_inited);

auto& io_params = m_atm_params.sublist("Scorpio");

// IMPORTANT: create model restart OutputManager first! This OM will be in charge
// of creating rpointer.atm, while other OM's will simply append to it.
// If this assumption is not verified, we must always append to rpointer, which
// can make the rpointer file a bit confusing.

// Check for model restart output
ekat::ParameterList checkpoint_params;
checkpoint_params.set("frequency_units",std::string("never"));
checkpoint_params.set("Frequency",-1);

// Create model restart OutputManager first. This OM will be in charge
// of creating rpointer.atm, while other OM's will simply append to it.
// If this assumption is not verified, we must always append to rpointer, which
// can make the rpointer file a bit confusing.
if (io_params.isSublist("model_restart")) {
auto restart_pl = io_params.sublist("model_restart");
restart_pl.set<std::string>("Averaging Type","Instant");
restart_pl.sublist("provenance") = m_atm_params.sublist("provenance");
auto& om = m_output_managers.emplace_back();
if (fvphyshack) {
// Don't save CGLL fields from ICs to the restart file.
std::map<std::string,field_mgr_ptr> fms;
for (auto& it : m_field_mgrs) {
if (it.first == "Physics GLL") continue;
fms[it.first] = it.second;
}
om.set_logger(m_atm_logger);
om.setup(m_atm_comm,restart_pl, fms,m_grids_manager,m_run_t0,m_case_t0,true);
} else {
om.set_logger(m_atm_logger);
om.setup(m_atm_comm,restart_pl,m_field_mgrs,m_grids_manager,m_run_t0,m_case_t0,true);
}
om.set_logger(m_atm_logger);
for (const auto& it : m_atm_process_group->get_restart_extra_data()) {
om.add_global(it.first,it.second);
}
// Create model restart manager
auto params = io_params.sublist("model_restart");
params.set<std::string>("Averaging Type","Instant");
params.sublist("provenance") = m_atm_params.sublist("provenance");

m_restart_output_manager = std::make_shared<OutputManager>();
m_restart_output_manager->initialize(m_atm_comm,
params,
m_run_t0,
m_case_t0,
/*is_model_restart_output*/ true);

// Store the "Output Control" pl of the model restart as the "Checkpoint Control" for all other output streams
checkpoint_params.set<std::string>("frequency_units",restart_pl.sublist("output_control").get<std::string>("frequency_units"));
checkpoint_params.set("Frequency",restart_pl.sublist("output_control").get<int>("Frequency"));
checkpoint_params.set<std::string>("frequency_units",params.sublist("output_control").get<std::string>("frequency_units"));
checkpoint_params.set("Frequency",params.sublist("output_control").get<int>("Frequency"));
}

// Build one manager per output yaml file
// Create one output manager per output yaml file
using vos_t = std::vector<std::string>;
const auto& output_yaml_files = io_params.get<vos_t>("output_yaml_files",vos_t{});
int om_tally = 0;
for (const auto& fname : output_yaml_files) {
ekat::ParameterList params;
ekat::parse_yaml_file(fname,params);
Expand All @@ -737,15 +725,59 @@ void AtmosphereDriver::initialize_output_managers () {

// Check if the filename prefix for this file has already been set. If not, use the simulation casename.
if (not params.isParameter("filename_prefix")) {
params.set<std::string>("filename_prefix",m_casename+".scream.h"+std::to_string(om_tally));
om_tally++;
params.set<std::string>("filename_prefix",m_casename+".scream.h");
}
params.sublist("provenance") = m_atm_params.sublist("provenance");
// Add a new output manager
m_output_managers.emplace_back();
auto& om = m_output_managers.back();

auto& om = m_output_managers.emplace_back();
om.initialize(m_atm_comm,
params,
m_run_t0,
m_case_t0,
/*is_model_restart_output*/ false);
}

m_ad_status |= s_output_created;

stop_timer("EAMxx::create_output_managers");
stop_timer("EAMxx::init");
m_atm_logger->info("[EAMxx] create_output_managers ... done!");
}

void AtmosphereDriver::initialize_output_managers () {
m_atm_logger->info("[EAMxx] initialize_output_managers ...");
start_timer("EAMxx::init");
start_timer("EAMxx::initialize_output_managers");

check_ad_status (s_output_created | s_grids_created | s_fields_created);

// Check for model restart output manager and setup if it exists.
if (m_restart_output_manager) {
if (fvphyshack) {
// Don't save CGLL fields from ICs to the restart file.
std::map<std::string,field_mgr_ptr> fms;
for (auto& it : m_field_mgrs) {
if (it.first == "Physics GLL") continue;
fms[it.first] = it.second;
}
m_restart_output_manager->setup(fms, m_grids_manager);
} else {
m_restart_output_manager->setup(m_field_mgrs,m_grids_manager);
}
m_restart_output_manager->set_logger(m_atm_logger);
for (const auto& it : m_atm_process_group->get_restart_extra_data()) {
m_restart_output_manager->add_global(it.first,it.second);
}
}

// Setup output managers
for (auto& om : m_output_managers) {
EKAT_REQUIRE_MSG(not om.is_restart(),
"Error! No restart output should be in m_output_managers. Model restart "
"output should be setup in m_restart_output_manager./n");

om.set_logger(m_atm_logger);
om.setup(m_atm_comm,params,m_field_mgrs,m_grids_manager,m_run_t0,m_case_t0,false);
om.setup(m_field_mgrs,m_grids_manager);
}

m_ad_status |= s_output_inited;
Expand Down Expand Up @@ -1574,6 +1606,8 @@ initialize (const ekat::Comm& atm_comm,

init_time_stamps (run_t0, case_t0);

create_output_managers ();

create_atm_processes ();

create_grids ();
Expand All @@ -1595,10 +1629,10 @@ void AtmosphereDriver::run (const int dt) {
start_timer("EAMxx::run");

// DEBUG option: Check if user has set the run to fail at a specific timestep.
auto& debug = m_atm_params.sublist("driver_debug_options");
auto fail_step = debug.get<int>("force_crash_nsteps",-1);
if (fail_step==m_current_ts.get_num_steps()) {
std::abort();
auto& debug = m_atm_params.sublist("driver_debug_options");
auto fail_step = debug.get<int>("force_crash_nsteps",-1);
if (fail_step==m_current_ts.get_num_steps()) {
std::abort();
}

// Make sure the end of the time step is after the current start_time
Expand All @@ -1622,6 +1656,7 @@ void AtmosphereDriver::run (const int dt) {
// that quantity at the beginning of the timestep. Or they may need to store
// the timestamp at the beginning of the timestep, so that we can compute
// dt at the end.
if (m_restart_output_manager) m_restart_output_manager->init_timestep(m_current_ts, dt);
for (auto& it : m_output_managers) {
it.init_timestep(m_current_ts,dt);
}
Expand All @@ -1648,6 +1683,7 @@ void AtmosphereDriver::run (const int dt) {

// Update output streams
m_atm_logger->debug("[EAMxx::run] running output managers...");
if (m_restart_output_manager) m_restart_output_manager->run(m_current_ts);
for (auto& out_mgr : m_output_managers) {
out_mgr.run(m_current_ts);
}
Expand Down Expand Up @@ -1678,6 +1714,10 @@ void AtmosphereDriver::finalize ( /* inputs? */ ) {
m_atm_logger->info("[EAMxx] Finalize ...");

// Finalize and destroy output streams, make sure files are closed
if (m_restart_output_manager) {
m_restart_output_manager->finalize();
m_restart_output_manager = nullptr;
}
for (auto& out_mgr : m_output_managers) {
out_mgr.finalize();
}
Expand Down Expand Up @@ -1801,6 +1841,11 @@ void AtmosphereDriver::report_res_dep_memory_footprint () const {
// Atm buffer
my_dev_mem_usage += m_memory_buffer->allocated_bytes();
// Output
if (m_restart_output_manager) {
const auto om_footprint = m_restart_output_manager->res_dep_memory_footprint();
my_dev_mem_usage += om_footprint;
my_host_mem_usage += om_footprint;
}
for (const auto& om : m_output_managers) {
const auto om_footprint = om.res_dep_memory_footprint ();
my_dev_mem_usage += om_footprint;
Expand Down
5 changes: 5 additions & 0 deletions components/eamxx/src/control/atmosphere_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ class AtmosphereDriver
// Load initial conditions for atm inputs
void initialize_fields ();

// Create output managers
void create_output_managers ();

// Initialie I/O structures for output
void initialize_output_managers ();

Expand Down Expand Up @@ -204,6 +207,7 @@ class AtmosphereDriver

ekat::ParameterList m_atm_params;

std::shared_ptr<OutputManager> m_restart_output_manager;
std::list<OutputManager> m_output_managers;

std::shared_ptr<ATMBufferManager> m_memory_buffer;
Expand Down Expand Up @@ -239,6 +243,7 @@ class AtmosphereDriver
static constexpr int s_fields_inited = 256;
static constexpr int s_procs_inited = 512;
static constexpr int s_ts_inited = 1024;
static constexpr int s_output_created = 2048;

// Lazy version to ensure s_atm_inited & flag is true for every flag,
// even if someone adds new flags later on
Expand Down
3 changes: 2 additions & 1 deletion components/eamxx/src/dynamics/homme/tests/dyn_grid_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ TEST_CASE("dyn_grid_io")
out_params.set<std::string>("Floating Point Precision","real");

OutputManager output;
output.setup (comm, out_params, fm_dyn, gm, t0, t0, false);
output.initialize(comm, out_params, t0, false);
output.setup (fm_dyn, gm);
output.run(t0);
output.finalize();

Expand Down
32 changes: 16 additions & 16 deletions components/eamxx/src/mct_coupling/atm_comp_mct.F90
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,13 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename )
call string_f2c(yaml_fname,yaml_fname_c)
call string_f2c(calendar,calendar_c)
call string_f2c(trim(atm_log_fname),atm_log_fname_c)
call string_f2c(trim(caseid),caseid_c)
call string_f2c(trim(hostname),hostname_c)
call string_f2c(trim(username),username_c)
call scream_create_atm_instance (mpicom_atm, ATM_ID, yaml_fname_c, atm_log_fname_c, &
INT(cur_ymd,kind=C_INT), INT(cur_tod,kind=C_INT), &
INT(case_start_ymd,kind=C_INT), INT(case_start_tod,kind=C_INT), &
calendar_c)
calendar_c, caseid_c, hostname_c, username_c)


! Init MCT gsMap
Expand Down Expand Up @@ -269,10 +272,7 @@ subroutine atm_init_mct( EClock, cdata, x2a, a2x, NLFilename )
c_loc(export_constant_multiple), c_loc(do_export_during_init), &
num_cpl_exports, num_scream_exports, export_field_size)

call string_f2c(trim(caseid),caseid_c)
call string_f2c(trim(username),username_c)
call string_f2c(trim(hostname),hostname_c)
call scream_init_atm (caseid_c,hostname_c,username_c)
call scream_init_atm ()
#ifdef HAVE_MOAB
! data should be set now inside moab from import and export fields
! do we import and export or just export at init stage ?
Expand Down Expand Up @@ -308,7 +308,7 @@ subroutine atm_run_mct(EClock, cdata, x2a, a2x)
#ifdef MOABCOMP
use mct_mod
use seq_comm_mct, only : num_moab_exports
#endif
#endif

integer :: ent_type
#ifdef MOABCOMP
Expand All @@ -324,7 +324,7 @@ subroutine atm_run_mct(EClock, cdata, x2a, a2x)

type(ESMF_Clock) ,intent(inout) :: EClock ! clock
type(seq_cdata) ,intent(inout) :: cdata
type(mct_aVect) ,intent(inout) :: x2a ! driver -> atmosphere
type(mct_aVect) ,intent(inout) :: x2a ! driver -> atmosphere
type(mct_aVect) ,intent(inout) :: a2x ! atmosphere -> driver

!--- local ---
Expand Down Expand Up @@ -403,7 +403,7 @@ subroutine atm_final_mct(EClock, cdata, x2a, a2x)
! !INPUT/OUTPUT PARAMETERS:
type(ESMF_Clock) ,intent(inout) :: EClock ! clock
type(seq_cdata) ,intent(inout) :: cdata
type(mct_aVect) ,intent(inout) :: x2a ! driver -> atmosphere
type(mct_aVect) ,intent(inout) :: x2a ! driver -> atmosphere
type(mct_aVect) ,intent(inout) :: a2x ! atmosphere -> driver

!----------------------------------------------------------------------------
Expand Down Expand Up @@ -437,7 +437,7 @@ subroutine atm_Set_gsMap_mct( mpicom_atm, ATMID, GSMap_atm )
! Build the atmosphere grid numbering for MCT
! NOTE: Numbering scheme is: West to East and South to North
! starting at south pole. Should be the same as what's used in SCRIP

! Determine global seg map
num_local_cols = scream_get_num_local_cols()
num_global_cols = scream_get_num_global_cols()
Expand Down Expand Up @@ -466,7 +466,7 @@ subroutine atm_domain_mct( lsize, gsMap_atm, dom_atm )
!
integer , intent(in) :: lsize
type(mct_gsMap), intent(in) :: gsMap_atm
type(mct_ggrid), intent(inout):: dom_atm
type(mct_ggrid), intent(inout):: dom_atm
!
! Local Variables
!
Expand All @@ -488,20 +488,20 @@ subroutine atm_domain_mct( lsize, gsMap_atm, dom_atm )

! Fill in correct values for domain components
call scream_get_cols_latlon(c_loc(data1),c_loc(data2))
call mct_gGrid_importRAttr(dom_atm,"lat",data1,lsize)
call mct_gGrid_importRAttr(dom_atm,"lon",data2,lsize)
call mct_gGrid_importRAttr(dom_atm,"lat",data1,lsize)
call mct_gGrid_importRAttr(dom_atm,"lon",data2,lsize)

call scream_get_cols_area(c_loc(data1))
call mct_gGrid_importRAttr(dom_atm,"area",data1,lsize)
call mct_gGrid_importRAttr(dom_atm,"area",data1,lsize)

! Mask and frac are both exactly 1
data1 = 1.0
call mct_gGrid_importRAttr(dom_atm,"mask",data1,lsize)
call mct_gGrid_importRAttr(dom_atm,"frac",data1,lsize)
call mct_gGrid_importRAttr(dom_atm,"mask",data1,lsize)
call mct_gGrid_importRAttr(dom_atm,"frac",data1,lsize)

! Aream is computed by mct, so give invalid initial value
data1 = -9999.0_R8
call mct_gGrid_importRAttr(dom_atm,"aream",data1,lsize)
call mct_gGrid_importRAttr(dom_atm,"aream",data1,lsize)
end subroutine atm_domain_mct

#ifdef HAVE_MOAB
Expand Down
14 changes: 7 additions & 7 deletions components/eamxx/src/mct_coupling/scream_cxx_f90_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,10 @@ void scream_create_atm_instance (const MPI_Fint f_comm, const int atm_id,
const int run_start_tod,
const int case_start_ymd,
const int case_start_tod,
const char* calendar_name)
const char* calendar_name,
const char* caseid,
const char* hostname,
const char* username)
{
using namespace scream;
using namespace scream::control;
Expand Down Expand Up @@ -171,6 +174,8 @@ void scream_create_atm_instance (const MPI_Fint f_comm, const int atm_id,
ad.set_params(scream_params);
ad.init_scorpio(atm_id);
ad.init_time_stamps(run_t0,case_t0);
ad.set_provenance_data (caseid,hostname,username);
ad.create_output_managers ();
ad.create_atm_processes ();
ad.create_grids ();
ad.create_fields ();
Expand Down Expand Up @@ -239,9 +244,7 @@ void scream_init_hip_atm () {
}
#endif

void scream_init_atm (const char* caseid,
const char* hostname,
const char* username)
void scream_init_atm ()
{
using namespace scream;
using namespace scream::control;
Expand All @@ -250,9 +253,6 @@ void scream_init_atm (const char* caseid,
// Get the ad, then complete initialization
auto& ad = get_ad_nonconst();

// Set provenance info in the driver (will be added to the output files)
ad.set_provenance_data (caseid,hostname,username);

// Init all fields, atm processes, and output streams
ad.initialize_fields ();
ad.initialize_atm_procs ();
Expand Down
Loading