diff --git a/src/aind_data_schema/base.py b/src/aind_data_schema/base.py index bd938bd2c..4db7bb65d 100644 --- a/src/aind_data_schema/base.py +++ b/src/aind_data_schema/base.py @@ -2,6 +2,7 @@ import json import re +import logging from pathlib import Path from typing import Any, Generic, Optional, TypeVar, get_args @@ -23,6 +24,9 @@ from aind_data_schema_models.brain_atlas import CCFStructure +MAX_FILE_SIZE = 500 * 1024 # 500KB + + def _coerce_naive_datetime(v: Any, handler: ValidatorFunctionWrapHandler) -> AwareDatetime: """Validator to wrap around AwareDatetime to set a default timezone as user's locale""" try: @@ -178,3 +182,7 @@ def write_standard_file( with open(filename, "w") as f: f.write(self.model_dump_json(indent=3)) + + # Check that size doesn't exceed the maximum + if len(self.model_dump_json(indent=3)) > MAX_FILE_SIZE: + logging.warning(f"File size exceeds {MAX_FILE_SIZE / 1024} KB: {filename}") diff --git a/tests/test_base.py b/tests/test_base.py index 9aa22a0c2..968630288 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -9,7 +9,14 @@ from pydantic import ValidationError, create_model, SkipValidation from typing import Literal -from aind_data_schema.base import AindGeneric, AwareDatetimeWithDefault, is_dict_corrupt, AindModel, AindCoreModel +from aind_data_schema.base import ( + AindGeneric, + AwareDatetimeWithDefault, + is_dict_corrupt, + AindModel, + AindCoreModel, + MAX_FILE_SIZE, +) from aind_data_schema.core.subject import Subject from aind_data_schema_models.brain_atlas import CCFStructure @@ -151,6 +158,20 @@ class Modelv2(AindCoreModel): # this is to ensure you can't get a bumped schema_version without passing validation self.assertRaises(ValidationError, lambda: Modelv1(**v2_from_v1.model_dump())) + @patch("builtins.open", new_callable=mock_open) + @patch("logging.warning") + def test_write_standard_file_size_warning(self, mock_logging_warning: MagicMock, mock_open: MagicMock): + """Tests that a warning is logged if the file size exceeds MAX_FILE_SIZE""" + + s = Subject.model_construct() + s.subject_id = "s" * (MAX_FILE_SIZE + 1000) + s.write_standard_file(output_directory=Path("dir"), suffix=".foo.bar") + + mock_open.assert_has_calls([call(Path("dir/subject.foo.bar"), "w")]) + mock_logging_warning.assert_called_once_with( + f"File size exceeds {MAX_FILE_SIZE / 1024} KB: dir/subject.foo.bar" + ) + if __name__ == "__main__": unittest.main()