Skip to content

Commit

Permalink
Update repositories & selfhosted runner fixes (#25)
Browse files Browse the repository at this point in the history
CI:

* Some selfhosted runner fixes

Misc:

* Update repositories
  • Loading branch information
sasha0552 authored Feb 17, 2024
1 parent 45ed817 commit 921272e
Show file tree
Hide file tree
Showing 10 changed files with 24 additions and 12 deletions.
8 changes: 7 additions & 1 deletion .ci/template/sh-build-iso.yml.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,18 @@ jobs:
steps:
- name: Start virtual machine
run: |
# Do nothing for now
# FIXME
exit 0

build:
needs: vm-start
runs-on: self-hosted

steps:
- name: Remove previous artifacts
run: |
sudo rm -fr out/

- name: Checkout repository
uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -70,6 +74,7 @@ jobs:
path: out/

vm-shutdown:
if: "!cancelled()"
needs: build
runs-on: self-hosted

Expand All @@ -80,6 +85,7 @@ jobs:
sudo shutdown 1

vm-shutdown-wait:
if: "!cancelled()"
needs: vm-shutdown
runs-on: ubuntu-latest

Expand Down
8 changes: 7 additions & 1 deletion .github/workflows/sh-build-iso-rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,18 @@ jobs:
steps:
- name: Start virtual machine
run: |
# Do nothing for now
# FIXME
exit 0
build:
needs: vm-start
runs-on: self-hosted

steps:
- name: Remove previous artifacts
run: |
sudo rm -fr out/
- name: Checkout repository
uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -68,6 +72,7 @@ jobs:
path: out/

vm-shutdown:
if: "!cancelled()"
needs: build
runs-on: self-hosted

Expand All @@ -78,6 +83,7 @@ jobs:
sudo shutdown 1
vm-shutdown-wait:
if: "!cancelled()"
needs: vm-shutdown
runs-on: ubuntu-latest

Expand Down
2 changes: 1 addition & 1 deletion airootfs/home/tori/axolotl
2 changes: 1 addition & 1 deletion airootfs/home/tori/koboldcpp
Submodule koboldcpp updated 155 files
2 changes: 1 addition & 1 deletion airootfs/home/tori/llama.cpp
Submodule llama.cpp updated 69 files
+2 −0 .gitignore
+212 −183 CMakeLists.txt
+8 −0 Makefile
+1 −1 README.md
+50 −0 ci/run.sh
+53 −26 common/common.cpp
+2 −2 common/common.h
+1 −1 common/sampling.cpp
+7 −7 common/sampling.h
+101 −39 convert-hf-to-gguf.py
+21 −16 convert.py
+1 −0 examples/CMakeLists.txt
+2 −1 examples/batched-bench/batched-bench.cpp
+1 −1 examples/batched.swift/Sources/main.swift
+2 −1 examples/batched/batched.cpp
+2 −1 examples/beam-search/beam-search.cpp
+108 −37 examples/embedding/embedding.cpp
+3 −3 examples/finetune/README.md
+121 −121 examples/finetune/finetune.cpp
+2 −1 examples/imatrix/imatrix.cpp
+2 −1 examples/infill/infill.cpp
+1 −2 examples/llama-bench/llama-bench.cpp
+2 −2 examples/llama.android/app/src/main/cpp/llama-android.cpp
+1 −1 examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
+48 −4 examples/llava/README.md
+642 −136 examples/llava/clip.cpp
+28 −21 examples/llava/clip.h
+59 −7 examples/llava/convert-image-encoder-to-gguf.py
+26 −3 examples/llava/llava-cli.cpp
+167 −0 examples/llava/llava-surgery-v2.py
+280 −17 examples/llava/llava.cpp
+0 −2 examples/llava/llava.h
+2 −1 examples/lookahead/lookahead.cpp
+2 −1 examples/lookup/lookup.cpp
+5 −1 examples/main/README.md
+2 −1 examples/main/main.cpp
+2 −1 examples/parallel/parallel.cpp
+2 −1 examples/passkey/passkey.cpp
+2 −1 examples/perplexity/perplexity.cpp
+1 −1 examples/quantize/quantize.cpp
+9 −0 examples/server/README.md
+82 −32 examples/server/server.cpp
+2 −1 examples/simple/simple.cpp
+2 −1 examples/speculative/speculative.cpp
+1 −1 examples/tokenize/tokenize.cpp
+27 −27 examples/train-text-from-scratch/train-text-from-scratch.cpp
+8 −0 ggml-backend.c
+1 −0 ggml-cuda.cu
+18 −18 ggml-quants.c
+48 −29 ggml-vulkan.cpp
+67 −13 ggml.c
+11 −1 ggml.h
+45 −0 gguf-py/examples/reader.py
+47 −20 gguf-py/gguf/constants.py
+10 −0 gguf-py/gguf/gguf_writer.py
+10 −2 gguf-py/gguf/tensor_mapping.py
+1 −5 gguf-py/gguf/vocab.py
+287 −102 llama.cpp
+15 −1 llama.h
+37 −0 scripts/compare-commits.sh
+107 −0 scripts/hf.sh
+1 −1 tests/test-autorelease.cpp
+2 −3 tests/test-backend-ops.cpp
+1 −1 tests/test-model-load-cancel.cpp
+1 −1 tests/test-tokenizer-0-falcon.cpp
+1 −1 tests/test-tokenizer-0-llama.cpp
+38 −37 tests/test-tokenizer-1-bpe.cpp
+30 −23 tests/test-tokenizer-1-llama.cpp
+42 −30 unicode.h
2 changes: 1 addition & 1 deletion airootfs/home/tori/vllm
Submodule vllm updated 43 files
+10 −4 .buildkite/run-benchmarks.sh
+7 −0 .buildkite/test-pipeline.yaml
+3 −1 .buildkite/test-template.j2
+5 −16 Dockerfile
+3 −3 Dockerfile.rocm
+284 −0 benchmarks/backend_request_func.py
+258 −120 benchmarks/benchmark_serving.py
+1 −1 benchmarks/launch_tgi_server.sh
+2 −0 docs/source/conf.py
+1 −0 docs/source/index.rst
+2 −5 docs/source/models/supported_models.rst
+1 −0 docs/source/quantization/fp8_e5m2_kv_cache.rst
+1 −0 requirements.txt
+5 −0 tests/lora/conftest.py
+47 −35 tests/lora/test_lora_manager.py
+53 −0 tests/lora/test_mixtral.py
+18 −0 tests/test_regression.py
+1 −1 vllm/__init__.py
+4 −1 vllm/engine/llm_engine.py
+31 −65 vllm/lora/models.py
+164 −170 vllm/lora/punica.py
+11 −10 vllm/lora/worker_manager.py
+3 −1 vllm/model_executor/layers/triton_kernel/prefix_prefill.py
+1 −1 vllm/model_executor/model_loader.py
+3 −4 vllm/model_executor/models/__init__.py
+0 −342 vllm/model_executor/models/aquila.py
+5 −1 vllm/model_executor/models/decilm.py
+0 −299 vllm/model_executor/models/internlm.py
+36 −8 vllm/model_executor/models/llama.py
+26 −1 vllm/model_executor/models/mistral.py
+46 −6 vllm/model_executor/models/mixtral.py
+0 −330 vllm/model_executor/models/yi.py
+9 −4 vllm/model_executor/parallel_utils/communication_op.py
+130 −0 vllm/model_executor/parallel_utils/cupy_utils.py
+4 −0 vllm/model_executor/parallel_utils/custom_all_reduce.py
+37 −0 vllm/model_executor/parallel_utils/parallel_state.py
+5 −2 vllm/test_utils.py
+0 −2 vllm/transformers_utils/config.py
+0 −4 vllm/transformers_utils/configs/__init__.py
+0 −69 vllm/transformers_utils/configs/aquila.py
+0 −64 vllm/transformers_utils/configs/yi.py
+61 −16 vllm/worker/model_runner.py
+31 −3 vllm/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@

sampler_order_max = 7
stop_token_max = 16
@@ -337,6 +338,7 @@ def load_model(model_filename):
@@ -339,6 +340,7 @@ def load_model(model_filename):
return ret

def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}):
+ enter_pstate_high()
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
inputs = generation_inputs()
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
@@ -436,6 +438,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
@@ -438,6 +440,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
sindex = outstr.find(trim_str)
if sindex != -1 and trim_str!="":
outstr = outstr[:sindex]
+ enter_pstate_low()
return outstr

def utfprint(str):
@@ -2332,6 +2335,7 @@ def sanitize_string(input_string):
@@ -2337,6 +2340,7 @@ def sanitize_string(input_string):
return sanitized_string

def main(launch_args,start_server=True):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
llama_params["top_p"] = json_value(body, "top_p", 1.0);
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -526,7 +526,7 @@ struct llama_server_context
@@ -522,7 +522,7 @@ struct llama_server_context
}

slot->params.stream = json_value(data, "stream", false);
Expand Down

0 comments on commit 921272e

Please sign in to comment.