Skip to content

Commit

Permalink
Update repositories & migrate nvidia-pstate to other repo
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha0552 authored Mar 5, 2024
1 parent 921272e commit 689e4e7
Show file tree
Hide file tree
Showing 15 changed files with 45 additions and 156 deletions.
2 changes: 1 addition & 1 deletion airootfs/home/tori/ComfyUI
Submodule ComfyUI updated 51 files
+47 −2 .ci/update_windows/update.py
+7 −1 .ci/update_windows/update_comfyui.bat
+0 −3 .ci/update_windows/update_comfyui_and_python_dependencies.bat
+0 −11 .ci/update_windows_cu118/update_comfyui_and_python_dependencies.bat
+0 −71 .github/workflows/windows_release_cu118_dependencies.yml
+0 −37 .github/workflows/windows_release_cu118_dependencies_2.yml
+0 −79 .github/workflows/windows_release_cu118_package.yml
+3 −4 .github/workflows/windows_release_dependencies.yml
+3 −3 .github/workflows/windows_release_nightly_pytorch.yml
+1 −1 .github/workflows/windows_release_package.yml
+1 −1 README.md
+7 −1 comfy/clip_model.py
+2 −2 comfy/controlnet.py
+6 −2 comfy/diffusers_convert.py
+9 −28 comfy/extra_samplers/uni_pc.py
+27 −25 comfy/gligen.py
+57 −0 comfy/latent_formats.py
+1 −1 comfy/ldm/cascade/common.py
+95 −0 comfy/ldm/cascade/stage_c_coder.py
+15 −3 comfy/ldm/modules/attention.py
+27 −24 comfy/ldm/modules/diffusionmodules/openaimodel.py
+12 −10 comfy/ldm/modules/diffusionmodules/util.py
+10 −0 comfy/lora.py
+10 −7 comfy/model_base.py
+19 −4 comfy/model_detection.py
+8 −5 comfy/model_management.py
+7 −5 comfy/model_patcher.py
+26 −9 comfy/model_sampling.py
+11 −21 comfy/samplers.py
+62 −19 comfy/sd.py
+28 −24 comfy/sd1_clip.py
+6 −6 comfy/sdxl_clip.py
+59 −20 comfy/supported_models.py
+4 −2 comfy/supported_models_base.py
+19 −2 comfy/utils.py
+1 −269 comfy_extras/nodes_canny.py
+42 −0 comfy_extras/nodes_differential_diffusion.py
+19 −0 comfy_extras/nodes_mask.py
+45 −3 comfy_extras/nodes_model_advanced.py
+49 −0 comfy_extras/nodes_morphology.py
+1 −1 comfy_extras/nodes_perpneg.py
+37 −2 comfy_extras/nodes_stable_cascade.py
+7 −1 cuda_malloc.py
+3 −0 custom_nodes/example_node.py.example
+10 −4 execution.py
+7 −0 main.py
+35 −0 new_updater.py
+21 −18 nodes.py
+1 −0 requirements.txt
+2 −2 server.py
+2 −2 web/scripts/pnginfo.js
2 changes: 1 addition & 1 deletion airootfs/home/tori/SillyTavern-Extras
2 changes: 1 addition & 1 deletion airootfs/home/tori/automatic
2 changes: 1 addition & 1 deletion airootfs/home/tori/axolotl
2 changes: 1 addition & 1 deletion airootfs/home/tori/koboldcpp
Submodule koboldcpp updated 133 files
2 changes: 1 addition & 1 deletion airootfs/home/tori/llama.cpp
Submodule llama.cpp updated 111 files
2 changes: 1 addition & 1 deletion airootfs/home/tori/vllm
Submodule vllm updated 144 files
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,23 @@
import subprocess
from functools import lru_cache
import installer
+from pstates import enter_pstate_high, enter_pstate_low
+from nvidia_pstate import set_pstate_high, set_pstate_low


commandline_args = os.environ.get('COMMANDLINE_ARGS', "")
@@ -175,6 +176,7 @@ def start_server(immediate=True, server=None):
debug_install = installer.log.debug if os.environ.get('SD_INSTALL_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -260,4 +261,5 @@ def main():


if __name__ == "__main__":
+ enter_pstate_low()
installer.ensure_base_requirements()
init_args() # setup argparser and default folders
installer.args = args
+ set_pstate_low()
main()
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -3,6 +3,7 @@ import sys
import time
import datetime
from modules.errors import log
+from pstates import enter_pstate_high, enter_pstate_low
+from nvidia_pstate import set_pstate_high, set_pstate_low


class State:
Expand All @@ -31,7 +29,7 @@
if self.debug_output:
log.debug(f'State begin: {self.job}')
+ if self.job != "load":
+ enter_pstate_high()
+ set_pstate_high()
modules.devices.torch_gc()

def end(self, api=None):
Expand All @@ -40,7 +38,7 @@
if self.debug_output:
log.debug(f'State end: {self.job} time={time.time() - self.time_start:.2f}')
+ if self.job != "load":
+ enter_pstate_low()
+ set_pstate_low()
self.job = ""
self.job_count = 0
self.job_no = 0
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,31 @@
import argparse
import json, sys, http.server, time, asyncio, socket, threading
from concurrent.futures import ThreadPoolExecutor
+from pstates import enter_pstate_high, enter_pstate_low
+from nvidia_pstate import set_pstate_high, set_pstate_low

sampler_order_max = 7
stop_token_max = 16
@@ -339,6 +340,7 @@ def load_model(model_filename):
@@ -380,6 +381,7 @@ def load_model(model_filename):
return ret

def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}):
+ enter_pstate_high()
+ set_pstate_high()
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
inputs = generation_inputs()
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
@@ -438,6 +440,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
inputs.prompt = prompt.encode("UTF-8")
@@ -478,6 +480,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
sindex = outstr.find(trim_str)
if sindex != -1 and trim_str!="":
outstr = outstr[:sindex]
+ enter_pstate_low()
+ set_pstate_low()
return outstr

def utfprint(str):
@@ -2337,6 +2340,7 @@ def sanitize_string(input_string):

@@ -2545,6 +2548,7 @@ def sanitize_string(input_string):
return sanitized_string

def main(launch_args,start_server=True):
+ enter_pstate_low()
global args, friendlymodelname
+ set_pstate_low()
global args, friendlymodelname, friendlysdmodelname, fullsdmodelpath
args = launch_args
embedded_kailite = None
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
--- a/examples/server/oai.hpp
+++ b/examples/server/oai.hpp
@@ -35,7 +35,7 @@ inline static json oaicompat_completion_params_parse(
@@ -33,7 +33,7 @@ inline static json oaicompat_completion_params_parse(
llama_sampling_params default_sparams;
llama_params["model"] = json_value(body, "model", std::string("unknown"));
llama_params["prompt"] = formatted_prompt;
llama_params["prompt"] = format_chat(model, chat_template, body["messages"]);
- llama_params["cache_prompt"] = json_value(body, "cache_prompt", false);
+ llama_params["cache_prompt"] = json_value(body, "cache_prompt", true);
llama_params["temperature"] = json_value(body, "temperature", 0.0);
llama_params["top_k"] = json_value(body, "top_k", default_sparams.top_k);
llama_params["top_p"] = json_value(body, "top_p", 1.0);
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -522,7 +522,7 @@ struct llama_server_context
@@ -68,7 +68,7 @@ enum slot_command {

struct slot_params {
bool stream = true;
- bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
+ bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt

uint32_t seed = -1; // RNG seed
int32_t n_keep = 0; // number of tokens to keep from initial prompt
@@ -552,7 +552,7 @@ struct llama_server_context
}

slot->params.stream = json_value(data, "stream", false);
Expand All @@ -20,14 +29,3 @@
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -93,7 +93,7 @@ enum slot_command
struct slot_params
{
bool stream = true;
- bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
+ bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt

uint32_t seed = -1; // RNG seed
int32_t n_keep = 0; // number of tokens to keep from initial prompt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Supported NVIDIA GPU architectures.
-NVIDIA_SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
+NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx942", "gfx1100"}
ROCM_SUPPORTED_ARCHS = {"gfx908", "gfx90a", "gfx942", "gfx1100"}
# SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)

@@ -222,9 +222,9 @@ if _is_cuda() and not compute_capabilities:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
set -eu

{% if CUDA %}
pip3 install --break-system-packages nvidia-pstate
{% endif %}
17 changes: 0 additions & 17 deletions airootfs/usr/lib/python3.11/pstates.py

This file was deleted.

96 changes: 0 additions & 96 deletions airootfs/usr/local/bin/nvidia-pstate

This file was deleted.

0 comments on commit 689e4e7

Please sign in to comment.