Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade Granite Model Template to granite-3.1 #61

Merged
merged 2 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions scripts/envs/model-server
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@
# env var used only for this sample you can list it here
export APP_NAME="model-server"
export APP_DISPLAY_NAME="Model Server, No Application"
export APP_DESC="Deploy a granite-3.0 8b model with a vLLM server. While no application is configured, this model server can be utilized in other Software Templates, like a Chatbot Application for instance."
export APP_SUMMARY="A granite-3.0 8b model server deployment."
export APP_DESC="Deploy a granite-3.1 8b model with a vLLM server. While no application is configured, this model server can be utilized in other Software Templates, like a Chatbot Application for instance."
export APP_SUMMARY="A granite-3.1 8b model server deployment."
export APP_TAGS='["ai", "vllm", "modelserver"]'

# https://github.com/redhat-ai-dev/developer-images/tree/main/model-servers/vllm/0.6.4
export VLLM_CONTAINER="quay.io/redhat-ai-dev/vllm-openai-ubi9:v0.6.4"
# https://github.com/redhat-ai-dev/developer-images/tree/main/model-servers/vllm/0.6.6
export VLLM_CONTAINER="quay.io/redhat-ai-dev/vllm-openai-ubi9:v0.6.6"
export VLLM_DESC="A high throughput, memory efficient inference and serving engine with GPU support for LLMs in OpenShift"
export VLLM_SRC="https://github.com/rh-aiservices-bu/llm-on-openshift/tree/main/llm-servers/vllm/gpu"

# https://huggingface.co/ibm-granite/granite-3.0-8b-instruct
export LLM_MODEL_NAME="ibm-granite/granite-3.0-8b-instruct"
# https://huggingface.co/ibm-granite/granite-3.1-8b-instruct
export LLM_MODEL_NAME="ibm-granite/granite-3.1-8b-instruct"
export LLM_MAX_MODEL_LEN=4096
export LLM_MODEL_CLASSIFICATION="Text Generation"
export LLM_MODEL_LICENSE="Apache-2.0"
export LLM_MODEL_SRC="https://huggingface.co/ibm-granite/granite-3.0-8b-instruct"
export LLM_MODEL_SRC="https://huggingface.co/ibm-granite/granite-3.1-8b-instruct"

#model configuration
export SUPPORT_LLM=true
Expand Down
13 changes: 4 additions & 9 deletions skeleton/source-repo/.tekton/docker-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@ metadata:
pipelinesascode.tekton.dev/on-event: "[pull_request]"
pipelinesascode.tekton.dev/on-target-branch: "[${{ values.defaultBranch }}]"
pipelinesascode.tekton.dev/max-keep-runs: "2"
pipelinesascode.tekton.dev/pipeline: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/pipelines/docker-build-ai-rhdh.yaml"
pipelinesascode.tekton.dev/pipeline: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/pipelines/docker-build-ai-rhdh-pull-request.yaml"
pipelinesascode.tekton.dev/task-0: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/init.yaml"
pipelinesascode.tekton.dev/task-1: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/git-clone.yaml"
pipelinesascode.tekton.dev/task-2: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/buildah-ai-rhdh.yaml"
pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/update-deployment.yaml"
pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/show-sbom-rhdh.yaml"
pipelinesascode.tekton.dev/task-5: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/summary.yaml"
pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/show-sbom-rhdh.yaml"
pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/summary.yaml"
labels:
argocd/app-name: ${{ values.name }}
janus-idp.io/tekton: ${{ values.name }}
Expand All @@ -33,12 +32,8 @@ spec:
value: ${{ values.buildContext }}
- name: revision
value: '{{revision}}'
- name: event-type
value: '{{event_type}}'
- name: gitops-auth-secret-name
value: ${{ values.gitopsSecretName }}
pipelineRef:
name: docker-build-ai-rhdh
name: docker-build-ai-rhdh-pull-request
workspaces:
- name: git-auth
secret:
Expand Down
11 changes: 6 additions & 5 deletions skeleton/source-repo/.tekton/docker-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ metadata:
pipelinesascode.tekton.dev/on-event: "[push]"
pipelinesascode.tekton.dev/on-target-branch: "[${{ values.defaultBranch }}]"
pipelinesascode.tekton.dev/max-keep-runs: "2"
pipelinesascode.tekton.dev/pipeline: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/pipelines/docker-build-ai-rhdh.yaml"
pipelinesascode.tekton.dev/pipeline: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/pipelines/docker-build-ai-rhdh-push-gitops.yaml"
pipelinesascode.tekton.dev/task-0: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/init.yaml"
pipelinesascode.tekton.dev/task-1: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/git-clone.yaml"
pipelinesascode.tekton.dev/task-2: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/buildah-ai-rhdh.yaml"
pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/update-deployment.yaml"
pipelinesascode.tekton.dev/task-3: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/update-deployment-gitops.yaml"
pipelinesascode.tekton.dev/task-4: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/show-sbom-rhdh.yaml"
pipelinesascode.tekton.dev/task-5: "https://raw.githubusercontent.com/redhat-ai-dev/rhdh-pipelines/main/pac/tasks/summary.yaml"
labels:
Expand All @@ -33,16 +33,17 @@ spec:
value: ${{ values.buildContext }}
- name: revision
value: '{{revision}}'
- name: event-type
value: '{{event_type}}'
- name: gitops-auth-secret-name
value: ${{ values.gitopsSecretName }}
pipelineRef:
name: docker-build-ai-rhdh
name: docker-build-ai-rhdh-push-gitops
workspaces:
- name: git-auth
secret:
secretName: "{{ git_auth_secret }}"
- name: gitops-auth
secret:
secretName: $(params.gitops-auth-secret-name)
- name: workspace
volumeClaimTemplate:
spec:
Expand Down
2 changes: 1 addition & 1 deletion templates/model-server/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ As a user you will be able to customize each section of the template, such as:
- Application deployment namespace


For more information please check out our general template documentation in our [source code repository](https://github.com/redhat-ai-dev/ai-lab-template). If you are interested in the ibm-granite/granite-3.0-8b-instruct model being used for this template, refer to the hugging face [page](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct)!
For more information please check out our general template documentation in our [source code repository](https://github.com/redhat-ai-dev/ai-lab-template). If you are interested in the ibm-granite/granite-3.1-8b-instruct model being used for this template, refer to the hugging face [page](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct)!
12 changes: 6 additions & 6 deletions templates/model-server/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ kind: Template
metadata:
name: model-server
title: Model Server, No Application
description: Deploy a granite-3.0 8b model with a vLLM server. While no application is configured, this model server can be utilized in other Software Templates, like a Chatbot Application for instance.
description: Deploy a granite-3.1 8b model with a vLLM server. While no application is configured, this model server can be utilized in other Software Templates, like a Chatbot Application for instance.
tags: ["ai", "vllm", "modelserver"]
annotations:
backstage.io/techdocs-ref: dir:.
Expand Down Expand Up @@ -58,11 +58,11 @@ spec:
const: vLLM
modelNameDeployed:
title: Model Name
description: Text Generation | Apache-2.0 | [Learn more](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct)
default: ibm-granite/granite-3.0-8b-instruct
description: Text Generation | Apache-2.0 | [Learn more](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct)
default: ibm-granite/granite-3.1-8b-instruct
type: string
enum:
- ibm-granite/granite-3.0-8b-instruct
- ibm-granite/granite-3.1-8b-instruct
# SED_LLM_SERVER_END

- title: Application Repository Information
Expand Down Expand Up @@ -163,8 +163,8 @@ spec:
# SED_LLM_SERVER_START
# for vllm
vllmSelected: ${{ parameters.modelServer === 'vLLM' }}
vllmModelServiceContainer: quay.io/redhat-ai-dev/vllm-openai-ubi9:v0.6.4
modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else 'ibm-granite/granite-3.0-8b-instruct' }}
vllmModelServiceContainer: quay.io/redhat-ai-dev/vllm-openai-ubi9:v0.6.6
modelName: ${{ parameters.modelName if parameters.modelServer === 'Existing model server' else 'ibm-granite/granite-3.1-8b-instruct' }}
maxModelLength: 4096
# SED_LLM_SERVER_END
existingModelServer: ${{ parameters.modelServer === 'Existing model server' }}
Expand Down
Loading