From e0c363e1bf62d873d7c59d48af4f17409661375d Mon Sep 17 00:00:00 2001 From: Samrudhi Sharma Date: Tue, 19 Mar 2024 17:32:17 -0700 Subject: [PATCH] move accelerate to utils --- src/sagemaker/serve/builder/model_builder.py | 50 ++++----------- .../serve/utils/hardware_detector.py | 37 +++++++++++ .../serve/builder/test_model_builder.py | 63 ++----------------- 3 files changed, 54 insertions(+), 96 deletions(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index ca50e82a99..1f6ceb1ffe 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -20,7 +20,7 @@ from pathlib import Path -from accelerate.commands.estimate import estimate_command_parser, gather_data + from sagemaker import Session from sagemaker.model import Model from sagemaker.base_predictor import PredictorBase @@ -43,7 +43,11 @@ from sagemaker.serve.utils import task from sagemaker.serve.utils.exceptions import TaskNotFoundException from sagemaker.serve.utils.predictors import _get_local_mode_predictor -from sagemaker.serve.utils.hardware_detector import _get_gpu_info, _get_gpu_info_fallback +from sagemaker.serve.utils.hardware_detector import ( + _get_gpu_info, + _get_gpu_info_fallback, + _total_inference_model_size_mib, +) from sagemaker.serve.detector.image_detector import ( auto_detect_container, _detect_framework_and_version, @@ -70,13 +74,6 @@ ModelServer.DJL_SERVING, } -MIB_CONVERSION_FACTOR = 0.00000095367431640625 -MEMORY_BUFFER_MULTIPLIER = 1.2 # 20% buffer -VERSION_DETECTION_ERROR = ( - "Please install accelerate and transformers for HuggingFace (HF) model " - "size calculations e.g. pip install 'sagemaker[huggingface]'" -) - # pylint: disable=attribute-defined-outside-init, disable=E1101 @dataclass @@ -723,32 +720,6 @@ def _schema_builder_init(self, model_task: str): except ValueError: raise TaskNotFoundException(f"Schema builder for {model_task} could not be found.") - def _total_inference_model_size_mib(self): - """Calculates the model size from HF accelerate - - This function gets the model size from accelerate. It also adds a - padding and converts to size MiB. When performing inference, expect - to add up to an additional 20% to the given model size as found by EleutherAI. - """ - try: - dtypes = self.env_vars.get("dtypes", "float32") - parser = estimate_command_parser() - args = parser.parse_args([self.model, "--dtypes", dtypes]) - - output = gather_data( - args - ) # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam" - except ImportError as e: - logger.warning(VERSION_DETECTION_ERROR) - raise e - - if output is None: - raise ValueError(f"Could not get Model size for {self.model}") - - total_memory_size_mib = MEMORY_BUFFER_MULTIPLIER * output[0][2] * MIB_CONVERSION_FACTOR - logger.info("Total memory size MIB: %s", total_memory_size_mib) - return total_memory_size_mib - def _can_fit_on_single_gpu(self) -> Type[bool]: """Check if model can fit on a single GPU @@ -756,10 +727,15 @@ def _can_fit_on_single_gpu(self) -> Type[bool]: """ try: single_gpu_size_mib = self._try_fetch_gpu_info() - if self._total_inference_model_size_mib() <= single_gpu_size_mib: + if ( + _total_inference_model_size_mib(self.model, self.env_vars.get("dtypes", "float32")) + <= single_gpu_size_mib + ): logger.info( "Total inference model size MIB %s, single GPU size for instance MIB %s", - self._total_inference_model_size_mib(), + _total_inference_model_size_mib( + self.model, self.env_vars.get("dtypes", "float32") + ), single_gpu_size_mib, ) return True diff --git a/src/sagemaker/serve/utils/hardware_detector.py b/src/sagemaker/serve/utils/hardware_detector.py index 632149dc8f..a59b6b1829 100644 --- a/src/sagemaker/serve/utils/hardware_detector.py +++ b/src/sagemaker/serve/utils/hardware_detector.py @@ -18,12 +18,22 @@ from botocore.exceptions import ClientError +from accelerate.commands.estimate import estimate_command_parser, gather_data from sagemaker import Session +from sagemaker.model import Model from sagemaker import instance_types_gpu_info logger = logging.getLogger(__name__) +MIB_CONVERSION_FACTOR = 0.00000095367431640625 +MEMORY_BUFFER_MULTIPLIER = 1.2 # 20% buffer +VERSION_DETECTION_ERROR = ( + "Please install accelerate and transformers for HuggingFace (HF) model " + "size calculations e.g. pip install 'sagemaker[huggingface]'" +) + + def _get_gpu_info(instance_type: str, session: Session) -> Tuple[int, int]: """Get GPU info for the provided instance @@ -108,3 +118,30 @@ def _format_instance_type(instance_type: str) -> str: ec2_instance = ".".join(split_instance) return ec2_instance + + +def _total_inference_model_size_mib(model: Model, dtype: str) -> int: + """Calculates the model size from HF accelerate + + This function gets the model size from accelerate. It also adds a + padding and converts to size MiB. When performing inference, expect + to add up to an additional 20% to the given model size as found by EleutherAI. + """ + try: + dtypes = dtype + parser = estimate_command_parser() + args = parser.parse_args([model, "--dtypes", dtypes]) + + output = gather_data( + args + ) # "dtype", "Largest Layer", "Total Size Bytes", "Training using Adam" + except ImportError as e: + logger.warning(VERSION_DETECTION_ERROR) + raise e + + if output is None: + raise ValueError(f"Could not get Model size for {model}") + + total_memory_size_mib = MEMORY_BUFFER_MULTIPLIER * output[0][2] * MIB_CONVERSION_FACTOR + logger.info("Total memory size MIB: %s", total_memory_size_mib) + return total_memory_size_mib diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py index 3b60d13dfb..37acacea94 100644 --- a/tests/unit/sagemaker/serve/builder/test_model_builder.py +++ b/tests/unit/sagemaker/serve/builder/test_model_builder.py @@ -1205,7 +1205,7 @@ def test_build_for_transformers_happy_case( @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers") @patch("sagemaker.serve.builder.model_builder.ModelBuilder._try_fetch_gpu_info") - @patch("sagemaker.serve.builder.model_builder.ModelBuilder._total_inference_model_size_mib") + @patch("sagemaker.serve.builder.model_builder._total_inference_model_size_mib") @patch("sagemaker.image_uris.retrieve") @patch("sagemaker.djl_inference.model.urllib") @patch("sagemaker.djl_inference.model.json") @@ -1248,7 +1248,7 @@ def test_build_for_transformers_happy_case_with_values( @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_djl", Mock()) @patch("sagemaker.serve.builder.model_builder._get_gpu_info") - @patch("sagemaker.serve.builder.model_builder.ModelBuilder._total_inference_model_size_mib") + @patch("sagemaker.serve.builder.model_builder._total_inference_model_size_mib") @patch("sagemaker.image_uris.retrieve") @patch("sagemaker.djl_inference.model.urllib") @patch("sagemaker.djl_inference.model.json") @@ -1293,7 +1293,7 @@ def test_build_for_transformers_happy_case_with_valid_gpu_info( @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers", Mock()) @patch("sagemaker.serve.builder.model_builder._get_gpu_info") @patch("sagemaker.serve.builder.model_builder._get_gpu_info_fallback") - @patch("sagemaker.serve.builder.model_builder.ModelBuilder._total_inference_model_size_mib") + @patch("sagemaker.serve.builder.model_builder._total_inference_model_size_mib") @patch("sagemaker.image_uris.retrieve") @patch("sagemaker.djl_inference.model.urllib") @patch("sagemaker.djl_inference.model.json") @@ -1342,61 +1342,6 @@ def test_build_for_transformers_happy_case_with_valid_gpu_fallback( ) self.assertEqual(model_builder._can_fit_on_single_gpu(), True) - @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers", Mock()) - @patch("sagemaker.serve.builder.model_builder.estimate_command_parser") - @patch("sagemaker.serve.builder.model_builder.gather_data") - @patch("sagemaker.image_uris.retrieve") - @patch("sagemaker.djl_inference.model.urllib") - @patch("sagemaker.djl_inference.model.json") - @patch("sagemaker.huggingface.llm_utils.urllib") - @patch("sagemaker.huggingface.llm_utils.json") - @patch("sagemaker.model_uris.retrieve") - @patch("sagemaker.serve.builder.model_builder._ServeSettings") - def test_build_for_transformers_happy_case_hugging_face_responses( - self, - mock_serveSettings, - mock_model_uris_retrieve, - mock_llm_utils_json, - mock_llm_utils_urllib, - mock_model_json, - mock_model_urllib, - mock_image_uris_retrieve, - mock_gather_data, - mock_parser, - ): - mock_setting_object = mock_serveSettings.return_value - mock_setting_object.role_arn = mock_role_arn - mock_setting_object.s3_model_data_url = mock_s3_model_data_url - - mock_model_uris_retrieve.side_effect = KeyError - mock_llm_utils_json.load.return_value = {"pipeline_tag": "text-classification"} - mock_llm_utils_urllib.request.Request.side_effect = Mock() - - mock_model_json.load.return_value = {"some": "config"} - mock_model_urllib.request.Request.side_effect = Mock() - mock_image_uris_retrieve.return_value = "https://some-image-uri" - - mock_parser.return_value = Mock() - mock_gather_data.return_value = [[1, 1, 1, 1]] - product = MIB_CONVERSION_FACTOR * 1 * MEMORY_BUFFER_MULTIPLIER - - model_builder = ModelBuilder( - model="stable-diffusion", - sagemaker_session=mock_session, - instance_type=mock_instance_type, - ) - self.assertEqual(model_builder._total_inference_model_size_mib(), product) - - mock_parser.return_value = Mock() - mock_gather_data.return_value = None - model_builder = ModelBuilder( - model="stable-diffusion", - sagemaker_session=mock_session, - instance_type=mock_instance_type, - ) - with self.assertRaises(ValueError) as _: - model_builder._total_inference_model_size_mib() - @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_djl") @patch("sagemaker.serve.builder.model_builder.ModelBuilder._can_fit_on_single_gpu") @patch("sagemaker.image_uris.retrieve") @@ -1556,7 +1501,7 @@ def test_try_fetch_gpu_info_throws( self.assertEqual(model_builder._can_fit_on_single_gpu(), False) @patch("sagemaker.serve.builder.model_builder.ModelBuilder._build_for_transformers", Mock()) - @patch("sagemaker.serve.builder.model_builder.ModelBuilder._total_inference_model_size_mib") + @patch("sagemaker.serve.builder.model_builder._total_inference_model_size_mib") @patch("sagemaker.image_uris.retrieve") @patch("sagemaker.djl_inference.model.urllib") @patch("sagemaker.djl_inference.model.json")