From 6a737925b1a6aabfa3b7052c30ba87bf17be1801 Mon Sep 17 00:00:00 2001 From: David McCloskey Date: Fri, 17 Jan 2025 12:18:30 -0600 Subject: [PATCH 1/2] Move the model_sampling to CPU since it is very small amount of work --- comfy/sd.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/comfy/sd.py b/comfy/sd.py index d7e89f726e2..d0911908f39 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -923,6 +923,8 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c if inital_load_device != torch.device("cpu"): logging.info("loaded diffusion model directly to GPU") model_management.load_models_gpu([model_patcher], force_full_load=True) + # damcclos: move the model_sampling back to the CPU. The work needed for this is not worth the gpu. + model_patcher.model.model_sampling.to(torch.device("cpu")) return (model_patcher, clip, vae, clipvision) From f258af8c01df950491aafa102a20fe2eae3f1ddf Mon Sep 17 00:00:00 2001 From: David McCloskey Date: Fri, 17 Jan 2025 15:16:49 -0600 Subject: [PATCH 2/2] Adding command line option and moving model_sampling to the cpu --- comfy/cli_args.py | 1 + comfy/model_management.py | 5 +++++ comfy/sd.py | 6 ++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 812798bf88f..6ef948a6cf4 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -73,6 +73,7 @@ def __call__(self, parser, namespace, values, option_string=None): fpvae_group.add_argument("--bf16-vae", action="store_true", help="Run the VAE in bf16.") parser.add_argument("--cpu-vae", action="store_true", help="Run the VAE on the CPU.") +parser.add_argument("--cpu-model-sampling", action="store_true", help="Run the model sampling on the CPU.") fpte_group = parser.add_mutually_exclusive_group() fpte_group.add_argument("--fp8_e4m3fn-text-enc", action="store_true", help="Store text encoder weights in fp8 (e4m3fn variant).") diff --git a/comfy/model_management.py b/comfy/model_management.py index f6dfc18b02b..d7b17b04e2c 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -766,6 +766,11 @@ def vae_device(): return torch.device("cpu") return get_torch_device() +def model_sampling_device(): + if args.cpu_model_sampling: + return torch.device("cpu") + return get_torch_device() + def vae_offload_device(): if args.gpu_only: return get_torch_device() diff --git a/comfy/sd.py b/comfy/sd.py index d0911908f39..11f4db41ba9 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -923,8 +923,10 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c if inital_load_device != torch.device("cpu"): logging.info("loaded diffusion model directly to GPU") model_management.load_models_gpu([model_patcher], force_full_load=True) - # damcclos: move the model_sampling back to the CPU. The work needed for this is not worth the gpu. - model_patcher.model.model_sampling.to(torch.device("cpu")) + #damcclos: move the model_sampling back to the CPU. The work needed for this is not worth the gpu. + model_sampling_device = model_management.model_sampling_device() + if model_sampling_device == torch.device("cpu"): + model_patcher.model.model_sampling.to(model_sampling_device) return (model_patcher, clip, vae, clipvision)