Update repositories & selfhosted runner fixes (#25)

CI: * Some selfhosted runner fixes Misc: * Update repositories
sasha0552 · Feb 17, 2024 · 921272e · 921272e
1 parent 45ed817
commit 921272e
Show file tree

Hide file tree

Showing 10 changed files with 24 additions and 12 deletions.
diff --git a/.ci/template/sh-build-iso.yml.jinja2 b/.ci/template/sh-build-iso.yml.jinja2
@@ -16,14 +16,18 @@ jobs:
     steps:
       - name: Start virtual machine
         run: |
-          # Do nothing for now
+          # FIXME
           exit 0
 
   build:
     needs: vm-start
     runs-on: self-hosted
 
     steps:
+      - name: Remove previous artifacts
+        run: |
+          sudo rm -fr out/
+
       - name: Checkout repository
         uses: actions/checkout@v4
         with:
@@ -70,6 +74,7 @@ jobs:
           path: out/
 
   vm-shutdown:
+    if: "!cancelled()"
     needs: build
     runs-on: self-hosted
 
@@ -80,6 +85,7 @@ jobs:
           sudo shutdown 1
 
   vm-shutdown-wait:
+    if: "!cancelled()"
     needs: vm-shutdown
     runs-on: ubuntu-latest
 

diff --git a/.github/workflows/sh-build-iso-rocm.yml b/.github/workflows/sh-build-iso-rocm.yml
@@ -16,14 +16,18 @@ jobs:
     steps:
       - name: Start virtual machine
         run: |
-          # Do nothing for now
+          # FIXME
           exit 0
 
   build:
     needs: vm-start
     runs-on: self-hosted
 
     steps:
+      - name: Remove previous artifacts
+        run: |
+          sudo rm -fr out/
+
       - name: Checkout repository
         uses: actions/checkout@v4
         with:
@@ -68,6 +72,7 @@ jobs:
           path: out/
 
   vm-shutdown:
+    if: "!cancelled()"
     needs: build
     runs-on: self-hosted
 
@@ -78,6 +83,7 @@ jobs:
           sudo shutdown 1
 
   vm-shutdown-wait:
+    if: "!cancelled()"
     needs: vm-shutdown
     runs-on: ubuntu-latest
 

diff --git a/airootfs/home/tori/ComfyUI b/airootfs/home/tori/ComfyUI
diff --git a/airootfs/home/tori/axolotl b/airootfs/home/tori/axolotl
diff --git a/airootfs/home/tori/koboldcpp b/airootfs/home/tori/koboldcpp
diff --git a/airootfs/home/tori/llama.cpp b/airootfs/home/tori/llama.cpp
diff --git a/airootfs/home/tori/text-generation-webui b/airootfs/home/tori/text-generation-webui
diff --git a/airootfs/home/tori/vllm b/airootfs/home/tori/vllm
diff --git a/airootfs/root/customize_airootfs/patches/0000-koboldcpp-drop-pstate-in-idle.patch b/airootfs/root/customize_airootfs/patches/0000-koboldcpp-drop-pstate-in-idle.patch
@@ -8,23 +8,23 @@
 
  sampler_order_max = 7
  stop_token_max = 16
-@@ -337,6 +338,7 @@ def load_model(model_filename):
+@@ -339,6 +340,7 @@ def load_model(model_filename):
      return ret
 
  def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}):
 +    enter_pstate_high()
      global maxctx, args, currentusergenkey, totalgens, pendingabortkey
      inputs = generation_inputs()
      outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
-@@ -436,6 +438,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
+@@ -438,6 +440,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
                  sindex = outstr.find(trim_str)
                  if sindex != -1 and trim_str!="":
                      outstr = outstr[:sindex]
 +        enter_pstate_low()
          return outstr
 
  def utfprint(str):
-@@ -2332,6 +2335,7 @@ def sanitize_string(input_string):
+@@ -2337,6 +2340,7 @@ def sanitize_string(input_string):
      return sanitized_string
 
  def main(launch_args,start_server=True):

diff --git a/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch b/airootfs/root/customize_airootfs/patches/0100-llamacpp-enable-prompt-cache.patch
@@ -11,7 +11,7 @@
      llama_params["top_p"]             = json_value(body, "top_p", 1.0);
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
-@@ -526,7 +526,7 @@ struct llama_server_context
+@@ -522,7 +522,7 @@ struct llama_server_context
          }
 
          slot->params.stream             = json_value(data, "stream",            false);
+1 −1		comfy/clip_model.py
+13 −4		comfy/controlnet.py
+8 −0		comfy/latent_formats.py
+161 −0		comfy/ldm/cascade/common.py
+258 −0		comfy/ldm/cascade/stage_a.py
+257 −0		comfy/ldm/cascade/stage_b.py
+271 −0		comfy/ldm/cascade/stage_c.py
+8 −1		comfy/ldm/modules/attention.py
+62 −3		comfy/model_base.py
+35 −7		comfy/model_detection.py
+62 −11		comfy/model_management.py
+49 −0		comfy/model_sampling.py
+48 −1		comfy/ops.py
+40 −18		comfy/sd.py
+2 −2		comfy/sd1_clip.py
+22 −0		comfy/sdxl_clip.py
+56 −1		comfy/supported_models.py
+4 −2		comfy/supported_models_base.py
+2 −0		comfy/utils.py
+74 −0		comfy_extras/nodes_stable_cascade.py
+49 −0		custom_nodes/websocket_image_save.py.disabled
+11 −4		nodes.py
+2 −0		.gitignore
+212 −183		CMakeLists.txt
+8 −0		Makefile
+1 −1		README.md
+50 −0		ci/run.sh
+53 −26		common/common.cpp
+2 −2		common/common.h
+1 −1		common/sampling.cpp
+7 −7		common/sampling.h
+101 −39		convert-hf-to-gguf.py
+21 −16		convert.py
+1 −0		examples/CMakeLists.txt
+2 −1		examples/batched-bench/batched-bench.cpp
+1 −1		examples/batched.swift/Sources/main.swift
+2 −1		examples/batched/batched.cpp
+2 −1		examples/beam-search/beam-search.cpp
+108 −37		examples/embedding/embedding.cpp
+3 −3		examples/finetune/README.md
+121 −121		examples/finetune/finetune.cpp
+2 −1		examples/imatrix/imatrix.cpp
+2 −1		examples/infill/infill.cpp
+1 −2		examples/llama-bench/llama-bench.cpp
+2 −2		examples/llama.android/app/src/main/cpp/llama-android.cpp
+1 −1		examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
+48 −4		examples/llava/README.md
+642 −136		examples/llava/clip.cpp
+28 −21		examples/llava/clip.h
+59 −7		examples/llava/convert-image-encoder-to-gguf.py
+26 −3		examples/llava/llava-cli.cpp
+167 −0		examples/llava/llava-surgery-v2.py
+280 −17		examples/llava/llava.cpp
+0 −2		examples/llava/llava.h
+2 −1		examples/lookahead/lookahead.cpp
+2 −1		examples/lookup/lookup.cpp
+5 −1		examples/main/README.md
+2 −1		examples/main/main.cpp
+2 −1		examples/parallel/parallel.cpp
+2 −1		examples/passkey/passkey.cpp
+2 −1		examples/perplexity/perplexity.cpp
+1 −1		examples/quantize/quantize.cpp
+9 −0		examples/server/README.md
+82 −32		examples/server/server.cpp
+2 −1		examples/simple/simple.cpp
+2 −1		examples/speculative/speculative.cpp
+1 −1		examples/tokenize/tokenize.cpp
+27 −27		examples/train-text-from-scratch/train-text-from-scratch.cpp
+8 −0		ggml-backend.c
+1 −0		ggml-cuda.cu
+18 −18		ggml-quants.c
+48 −29		ggml-vulkan.cpp
+67 −13		ggml.c
+11 −1		ggml.h
+45 −0		gguf-py/examples/reader.py
+47 −20		gguf-py/gguf/constants.py
+10 −0		gguf-py/gguf/gguf_writer.py
+10 −2		gguf-py/gguf/tensor_mapping.py
+1 −5		gguf-py/gguf/vocab.py
+287 −102		llama.cpp
+15 −1		llama.h
+37 −0		scripts/compare-commits.sh
+107 −0		scripts/hf.sh
+1 −1		tests/test-autorelease.cpp
+2 −3		tests/test-backend-ops.cpp
+1 −1		tests/test-model-load-cancel.cpp
+1 −1		tests/test-tokenizer-0-falcon.cpp
+1 −1		tests/test-tokenizer-0-llama.cpp
+38 −37		tests/test-tokenizer-1-bpe.cpp
+30 −23		tests/test-tokenizer-1-llama.cpp
+42 −30		unicode.h
+2 −2		.github/workflows/stale.yml
+7 −7		README.md
+4 −0		css/main.css
+22 −13		download-model.py
+1 −1		instruction-templates/Mistral.yaml
+36 −32		modules/chat.py
+12 −9		modules/exllamav2.py
+12 −8		modules/exllamav2_hf.py
+63 −0		modules/llama_cpp_python_hijack.py
+1 −1		modules/llamacpp_hf.py
+1 −1		modules/llamacpp_model.py
+2 −0		modules/loaders.py
+11 −20		modules/models.py
+47 −18		modules/models_settings.py
+44 −9		modules/presets.py
+1 −0		modules/sampler_hijack.py
+19 −5		modules/shared.py
+2 −2		modules/text_generation.py
+1 −0		modules/ui.py
+1 −1		modules/ui_chat.py
+64 −11		modules/ui_model_menu.py
+11 −2		modules/utils.py
+21 −21		requirements.txt
+12 −12		requirements_amd.txt
+10 −10		requirements_amd_noavx2.txt
+11 −11		requirements_apple_intel.txt
+13 −13		requirements_apple_silicon.txt
+8 −9		requirements_cpu_only.txt
+8 −9		requirements_cpu_only_noavx2.txt
+21 −21		requirements_noavx2.txt
+4 −5		requirements_nowheels.txt
+1 −1		server.py
+10 −4		.buildkite/run-benchmarks.sh
+7 −0		.buildkite/test-pipeline.yaml
+3 −1		.buildkite/test-template.j2
+5 −16		Dockerfile
+3 −3		Dockerfile.rocm
+284 −0		benchmarks/backend_request_func.py
+258 −120		benchmarks/benchmark_serving.py
+1 −1		benchmarks/launch_tgi_server.sh
+2 −0		docs/source/conf.py
+1 −0		docs/source/index.rst
+2 −5		docs/source/models/supported_models.rst
+1 −0		docs/source/quantization/fp8_e5m2_kv_cache.rst
+1 −0		requirements.txt
+5 −0		tests/lora/conftest.py
+47 −35		tests/lora/test_lora_manager.py
+53 −0		tests/lora/test_mixtral.py
+18 −0		tests/test_regression.py
+1 −1		vllm/__init__.py
+4 −1		vllm/engine/llm_engine.py
+31 −65		vllm/lora/models.py
+164 −170		vllm/lora/punica.py
+11 −10		vllm/lora/worker_manager.py
+3 −1		vllm/model_executor/layers/triton_kernel/prefix_prefill.py
+1 −1		vllm/model_executor/model_loader.py
+3 −4		vllm/model_executor/models/__init__.py
+0 −342		vllm/model_executor/models/aquila.py
+5 −1		vllm/model_executor/models/decilm.py
+0 −299		vllm/model_executor/models/internlm.py
+36 −8		vllm/model_executor/models/llama.py
+26 −1		vllm/model_executor/models/mistral.py
+46 −6		vllm/model_executor/models/mixtral.py
+0 −330		vllm/model_executor/models/yi.py
+9 −4		vllm/model_executor/parallel_utils/communication_op.py
+130 −0		vllm/model_executor/parallel_utils/cupy_utils.py
+4 −0		vllm/model_executor/parallel_utils/custom_all_reduce.py
+37 −0		vllm/model_executor/parallel_utils/parallel_state.py
+5 −2		vllm/test_utils.py
+0 −2		vllm/transformers_utils/config.py
+0 −4		vllm/transformers_utils/configs/__init__.py
+0 −69		vllm/transformers_utils/configs/aquila.py
+0 −64		vllm/transformers_utils/configs/yi.py
+61 −16		vllm/worker/model_runner.py
+31 −3		vllm/worker/worker.py