diff --git a/scripts/envs/model-server b/scripts/envs/model-server index 07ed07f1..e89f858c 100644 --- a/scripts/envs/model-server +++ b/scripts/envs/model-server @@ -3,21 +3,21 @@ # env var used only for this sample you can list it here export APP_NAME="model-server" export APP_DISPLAY_NAME="Model Server, No Application" -export APP_DESC="Deploy a granite-3.0 8b model with a vLLM server. While no application is configured, this model server can be utilized in other Software Templates, like a Chatbot Application for instance." -export APP_SUMMARY="A granite-3.0 8b model server deployment." +export APP_DESC="Deploy a granite-3.1 8b model with a vLLM server. While no application is configured, this model server can be utilized in other Software Templates, like a Chatbot Application for instance." +export APP_SUMMARY="A granite-3.1 8b model server deployment." export APP_TAGS='["ai", "vllm", "modelserver"]' -# https://github.com/redhat-ai-dev/developer-images/tree/main/model-servers/vllm/0.6.4 -export VLLM_CONTAINER="quay.io/redhat-ai-dev/vllm-openai-ubi9:v0.6.4" +# https://github.com/redhat-ai-dev/developer-images/tree/main/model-servers/vllm/0.6.6 +export VLLM_CONTAINER="quay.io/redhat-ai-dev/vllm-openai-ubi9:v0.6.6" export VLLM_DESC="A high throughput, memory efficient inference and serving engine with GPU support for LLMs in OpenShift" export VLLM_SRC="https://github.com/rh-aiservices-bu/llm-on-openshift/tree/main/llm-servers/vllm/gpu" -# https://huggingface.co/ibm-granite/granite-3.0-8b-instruct -export LLM_MODEL_NAME="ibm-granite/granite-3.0-8b-instruct" +# https://huggingface.co/ibm-granite/granite-3.1-8b-instruct +export LLM_MODEL_NAME="ibm-granite/granite-3.1-8b-instruct" export LLM_MAX_MODEL_LEN=4096 export LLM_MODEL_CLASSIFICATION="Text Generation" export LLM_MODEL_LICENSE="Apache-2.0" -export LLM_MODEL_SRC="https://huggingface.co/ibm-granite/granite-3.0-8b-instruct" +export LLM_MODEL_SRC="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct" #model configuration export SUPPORT_LLM=true diff --git a/skeleton/source-repo/.tekton/docker-pull-request.yaml b/skeleton/source-repo/.tekton/docker-pull-request.yaml index 5f43a364..87929040 100644 --- a/skeleton/source-repo/.tekton/docker-pull-request.yaml +++ b/skeleton/source-repo/.tekton/docker-pull-request.yaml @@ -6,13 +6,12 @@ metadata: pipelinesascode.tekton.dev/on-event: "[pull_request]" pipelinesascode.tekton.dev/on-target-branch: "[${{ values.defaultBranch }}]" pipelinesascode.tekton.dev/max-keep-runs: "2" - pipelinesascode.tekton.dev/pipeline: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/pipelines/docker-build-ai-rhdh.yaml" + pipelinesascode.tekton.dev/pipeline: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/pipelines/docker-build-ai-rhdh-pull-request.yaml" pipelinesascode.tekton.dev/task-0: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/init.yaml" pipelinesascode.tekton.dev/task-1: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/git-clone.yaml" pipelinesascode.tekton.dev/task-2: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/buildah-ai-rhdh.yaml" - pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/update-deployment.yaml" - pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/show-sbom-rhdh.yaml" - pipelinesascode.tekton.dev/task-5: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/summary.yaml" + pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/show-sbom-rhdh.yaml" + pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/summary.yaml" labels: argocd/app-name: ${{ values.name }} janus-idp.io/tekton: ${{ values.name }} @@ -33,12 +32,8 @@ spec: value: ${{ values.buildContext }} - name: revision value: '{{revision}}' - - name: event-type - value: '{{event_type}}' - - name: gitops-auth-secret-name - value: ${{ values.gitopsSecretName }} pipelineRef: - name: docker-build-ai-rhdh + name: docker-build-ai-rhdh-pull-request workspaces: - name: git-auth secret: diff --git a/skeleton/source-repo/.tekton/docker-push.yaml b/skeleton/source-repo/.tekton/docker-push.yaml index b6f15e41..105c0b24 100644 --- a/skeleton/source-repo/.tekton/docker-push.yaml +++ b/skeleton/source-repo/.tekton/docker-push.yaml @@ -6,11 +6,11 @@ metadata: pipelinesascode.tekton.dev/on-event: "[push]" pipelinesascode.tekton.dev/on-target-branch: "[${{ values.defaultBranch }}]" pipelinesascode.tekton.dev/max-keep-runs: "2" - pipelinesascode.tekton.dev/pipeline: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/pipelines/docker-build-ai-rhdh.yaml" + pipelinesascode.tekton.dev/pipeline: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/pipelines/docker-build-ai-rhdh-push-gitops.yaml" pipelinesascode.tekton.dev/task-0: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/init.yaml" pipelinesascode.tekton.dev/task-1: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/git-clone.yaml" pipelinesascode.tekton.dev/task-2: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/buildah-ai-rhdh.yaml" - pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/update-deployment.yaml" + pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/update-deployment-gitops.yaml" pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/show-sbom-rhdh.yaml" pipelinesascode.tekton.dev/task-5: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/summary.yaml" labels: @@ -33,16 +33,17 @@ spec: value: ${{ values.buildContext }} - name: revision value: '{{revision}}' - - name: event-type - value: '{{event_type}}' - name: gitops-auth-secret-name value: ${{ values.gitopsSecretName }} pipelineRef: - name: docker-build-ai-rhdh + name: docker-build-ai-rhdh-push-gitops workspaces: - name: git-auth secret: secretName: "{{ git_auth_secret }}" + - name: gitops-auth + secret: + secretName: $(params.gitops-auth-secret-name) - name: workspace volumeClaimTemplate: spec: diff --git a/templates/model-server/docs/index.md b/templates/model-server/docs/index.md index 7cdac255..ba1791d9 100644 --- a/templates/model-server/docs/index.md +++ b/templates/model-server/docs/index.md @@ -23,4 +23,4 @@ As a user you will be able to customize each section of the template, such as: - Application deployment namespace -For more information please check out our general template documentation in our [source code repository](https://github.com/redhat-ai-dev/ai-lab-template). If you are interested in the ibm-granite/granite-3.0-8b-instruct model being used for this template, refer to the hugging face [page](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct)! \ No newline at end of file +For more information please check out our general template documentation in our [source code repository](https://github.com/redhat-ai-dev/ai-lab-template). If you are interested in the ibm-granite/granite-3.1-8b-instruct model being used for this template, refer to the hugging face [page](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct)! \ No newline at end of file diff --git a/templates/model-server/template.yaml b/templates/model-server/template.yaml index adf1d220..39de3107 100644 --- a/templates/model-server/template.yaml +++ b/templates/model-server/template.yaml @@ -4,7 +4,7 @@ kind: Template metadata: name: model-server title: Model Server, No Application - description: Deploy a granite-3.0 8b model with a vLLM server. While no application is configured, this model server can be utilized in other Software Templates, like a Chatbot Application for instance. + description: Deploy a granite-3.1 8b model with a vLLM server. While no application is configured, this model server can be utilized in other Software Templates, like a Chatbot Application for instance. tags: ["ai", "vllm", "modelserver"] annotations: backstage.io/techdocs-ref: dir:. @@ -58,11 +58,11 @@ spec: const: vLLM modelNameDeployed: title: Model Name - description: Text Generation | Apache-2.0 | [Learn more](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct) - default: ibm-granite/granite-3.0-8b-instruct + description: Text Generation | Apache-2.0 | [Learn more](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) + default: ibm-granite/granite-3.1-8b-instruct type: string enum: - - ibm-granite/granite-3.0-8b-instruct + - ibm-granite/granite-3.1-8b-instruct # SED_LLM_SERVER_END - title: Application Repository Information @@ -163,8 +163,8 @@ spec: # SED_LLM_SERVER_START # for vllm vllmSelected: ${{ parameters.modelServer === 'vLLM' }} - vllmModelServiceContainer: quay.io/redhat-ai-dev/vllm-openai-ubi9:v0.6.4 - modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else 'ibm-granite/granite-3.0-8b-instruct' }} + vllmModelServiceContainer: quay.io/redhat-ai-dev/vllm-openai-ubi9:v0.6.6 + modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else 'ibm-granite/granite-3.1-8b-instruct' }} maxModelLength: 4096 # SED_LLM_SERVER_END existingModelServer: ${{ parameters.modelServer === 'Existing model server' }}