Skip to content

Commit

Permalink
Copying logs file (foundation-model-stack#113)
Browse files Browse the repository at this point in the history
* Copy logs file

Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com>

* Remove unnecessary import

Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com>

---------

Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com>
  • Loading branch information
tharapalanivel authored Apr 9, 2024
1 parent 7b7effd commit 115eacf
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
9 changes: 5 additions & 4 deletions build/launch_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import os
import tempfile
import shutil
import glob

# First Party
import logging
Expand Down Expand Up @@ -107,9 +106,11 @@ def main():
dirs_exist_ok=True,
)

# copy over any loss logs
for file in glob.glob(f"{training_args.output_dir}/*loss.jsonl"):
shutil.copy(file, original_output_dir)
# copy over logs
shutil.copy(
os.path.join(training_args.output_dir, sft_trainer.TRAINING_LOGS_FILENAME),
original_output_dir,
)


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion tuning/sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
# Local
from tuning.aim_loader import get_aimstack_callback

TRAINING_LOGS_FILENAME = "training_logs.jsonl"


class FileLoggingCallback(TrainerCallback):
"""Exports metrics, e.g., training loss to a file in the checkpoint directory."""
Expand All @@ -64,7 +66,7 @@ def on_log(self, args, state, control, logs=None, **kwargs):
if not state.is_world_process_zero:
return

log_file_path = os.path.join(args.output_dir, "training_logs.jsonl")
log_file_path = os.path.join(args.output_dir, TRAINING_LOGS_FILENAME)
if logs is not None and "loss" in logs and "epoch" in logs:
self._track_loss("loss", "training_loss", log_file_path, logs, state)
elif logs is not None and "eval_loss" in logs and "epoch" in logs:
Expand Down

0 comments on commit 115eacf

Please sign in to comment.