Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vllm 0.6.6 #26

Merged
merged 1 commit into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions model-servers/vllm/0.6.6/Containerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
FROM registry.access.redhat.com/ubi9/python-311 as cuda-runtime

###################################################################################################
# CUDA 12.1 Layer, from https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.1.1 #
###################################################################################################

# Base
USER 0

ENV NVARCH x86_64
ENV NVIDIA_REQUIRE_CUDA "cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526"
ENV NV_CUDA_CUDART_VERSION 12.1.105-1

COPY cuda.repo-x86_64 /etc/yum.repos.d/cuda.repo

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel9/${NVARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -

ENV CUDA_VERSION 12.1.1

# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
RUN yum upgrade -y && yum install -y \
cuda-cudart-12-1-${NV_CUDA_CUDART_VERSION} \
cuda-compat-12-1 \
&& ln -s cuda-12.1 /usr/local/cuda \
&& yum -y clean all --enablerepo='*' && \
rm -rf /var/cache/dnf && \
find /var/log -type f -name "*.log" -exec rm -f {} \;

# nvidia-docker 1.0
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf

ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

COPY NGC-DL-CONTAINER-LICENSE /

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility

# Runtime
ENV NV_CUDA_LIB_VERSION 12.1.1-1

ENV NV_NVTX_VERSION 12.1.105-1
ENV NV_LIBNPP_VERSION 12.1.0.40-1
ENV NV_LIBNPP_PACKAGE libnpp-12-1-${NV_LIBNPP_VERSION}
ENV NV_LIBCUBLAS_VERSION 12.1.3.1-1
ENV NV_LIBNCCL_PACKAGE_NAME libnccl
ENV NV_LIBNCCL_PACKAGE_VERSION 2.17.1-1
ENV NV_LIBNCCL_VERSION 2.17.1
ENV NCCL_VERSION 2.17.1
ENV NV_LIBNCCL_PACKAGE ${NV_LIBNCCL_PACKAGE_NAME}-${NV_LIBNCCL_PACKAGE_VERSION}+cuda12.1

RUN yum install -y \
cuda-libraries-12-1-${NV_CUDA_LIB_VERSION} \
cuda-nvtx-12-1-${NV_NVTX_VERSION} \
${NV_LIBNPP_PACKAGE} \
libcublas-12-1-${NV_LIBCUBLAS_VERSION} \
${NV_LIBNCCL_PACKAGE} \
&& yum clean all \
&& rm -rf /var/cache/yum/*

# Set this flag so that libraries can find the location of CUDA
ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda

# CUDA Devel image
FROM cuda-runtime as cuda-devel
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility

ENV NV_CUDA_LIB_VERSION 12.1.1-1
ENV NV_NVPROF_VERSION 12.1.105-1
ENV NV_NVPROF_DEV_PACKAGE cuda-nvprof-12-1-${NV_NVPROF_VERSION}
ENV NV_CUDA_CUDART_DEV_VERSION 12.1.105-1
ENV NV_NVML_DEV_VERSION 12.1.105-1
ENV NV_LIBCUBLAS_DEV_VERSION 12.1.3.1-1
ENV NV_LIBNPP_DEV_VERSION 12.1.0.40-1
ENV NV_LIBNPP_DEV_PACKAGE libnpp-devel-12-1-${NV_LIBNPP_DEV_VERSION}
ENV NV_LIBNCCL_DEV_PACKAGE_NAME libnccl-devel
ENV NV_LIBNCCL_DEV_PACKAGE_VERSION 2.17.1-1
ENV NCCL_VERSION 2.17.1
ENV NV_LIBNCCL_DEV_PACKAGE ${NV_LIBNCCL_DEV_PACKAGE_NAME}-${NV_LIBNCCL_DEV_PACKAGE_VERSION}+cuda12.1
ENV NV_CUDA_NSIGHT_COMPUTE_VERSION 12.1.1-1
ENV NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE cuda-nsight-compute-12-1-${NV_CUDA_NSIGHT_COMPUTE_VERSION}


RUN yum install -y \
make \
findutils \
cuda-command-line-tools-12-1-${NV_CUDA_LIB_VERSION} \
cuda-libraries-devel-12-1-${NV_CUDA_LIB_VERSION} \
cuda-minimal-build-12-1-${NV_CUDA_LIB_VERSION} \
cuda-cudart-devel-12-1-${NV_CUDA_CUDART_DEV_VERSION} \
${NV_NVPROF_DEV_PACKAGE} \
cuda-nvml-devel-12-1-${NV_NVML_DEV_VERSION} \
libcublas-devel-12-1-${NV_LIBCUBLAS_DEV_VERSION} \
${NV_LIBNPP_DEV_PACKAGE} \
${NV_LIBNCCL_DEV_PACKAGE} \
${NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE} \
&& yum clean all \
&& rm -rf /var/cache/yum/*

ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs

#############################################
# End of CUDA 12.1 Layer #
#############################################

###################################
# vLLM install in build container #
###################################

FROM cuda-devel as vllm-install

WORKDIR /opt/app-root/src

USER 1001

COPY --chown=1001:0 requirements.txt ./

RUN pip install --no-cache-dir -r requirements.txt && \
rm -f requirements.txt && \
# Install flash-attn from PyPI \
pip install flash-attn==2.5.8 --no-build-isolation && \
# Correction for FIPS mode \
sed -i s/md5/sha1/g /opt/app-root/lib64/python3.11/site-packages/triton/runtime/jit.py && \
# Fix permissions to support pip in Openshift environments \
chmod -R g+w /opt/app-root/lib/python3.11/site-packages && \
fix-permissions /opt/app-root -P

##################
# vLLM container #
##################

FROM cuda-runtime as vllm-container

WORKDIR /opt/app-root/src

COPY --from=vllm-install --chown=1001:0 /opt/app-root/lib64/python3.11/site-packages /opt/app-root/lib64/python3.11/site-packages
COPY --from=vllm-install --chown=1001:0 /opt/app-root/src/.config/vllm/nccl/cu12/libnccl.so.2* /usr/local/lib/libnccl.so.2

# Fix VLLM_NCCL_SO_PATH
ENV VLLM_NCCL_SO_PATH=/usr/local/lib/libnccl.so.2

USER 1001

EXPOSE 8000

ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

Loading
Loading