opea-project · acwrenn · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
@@ -0,0 +1,17 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+ENV LANG C.UTF-8
+
+COPY comps /home/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/comps/imagegen/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home
+
+WORKDIR /home/comps/imagegen
+
+ENTRYPOINT ["python", "imagegen.py"]
@@ -0,0 +1,36 @@
+# ImageGen Microservice
+
+The ImageGen microservice is a solution for generating images using text input. The ImageGen solution service takes a triton endpoint that serves the actual text-to-image model, and in turn this service provides a solution endpoint consumable by users.
+
+# 1. Instructions to launch this solution
+
+This solution requires 1 backing container to operate - a triton-based inference server for executing the diffusion model. We will walk through how to deploy both images below.
+
+## 2.1 Build Model Server Docker Image
+
+```cd triton && make build
+
+```
+
+## 2.2 Build Solution Server Docker Image
+
+```docker build -t opea/image-gen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+
+```
+
+## 2.3 Run Docker with CLI
+
+```bash
+docker run -p 18000:8000 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACE_API_TOKEN=${HUGGINGFACE_API_TOKEN} -e HABANA_VISIBLE_DEVICES=0 -v /opt/intel/huggingface/hub:/root/.cache/huggingface/hub ohio-image-triton:latest
+docker run -p 9765:9765 -e IMAGE_GEN_TRITON_ENDPOINT=http://localhost:18000 opea/image-gen:latest
+```
+
+# 3. Consume Solution Service
+
+You can use the following `curl` command to test whether the service is up. Notice that the first request can be slow because it needs to download the models.
+
+```bash
+curl http://localhost:9765/v1/images/generation \
+    -H "Content-Type: application/json"   \
+    -d '{"text":"A cat holding a fish skeleton"}'
+```
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,78 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import base64
+import os
+import sys
+import time
+from io import BytesIO
+
+import numpy as np
+import tritonclient.http as httpclient
+from PIL import Image
+from tritonclient.utils import *
+
+from comps import Base64ByteStrDoc, ServiceType, TextDoc, opea_microservices, opea_telemetry, register_microservice
+
+
+@opea_telemetry
+def generate_image(*, text, triton_endpoint):
+    start = time.time()
+
+    network_timeout = 1000 * 300
+    with httpclient.InferenceServerClient(triton_endpoint, network_timeout=network_timeout) as client:
+        queries = [text]
+        input_arr = [np.frombuffer(bytes(q, "utf8"), dtype=np.uint8) for q in queries]
+        max_size = max([a.size for a in input_arr])
+        input_arr = [np.pad(a, (0, max_size - a.size)) for a in input_arr]
+        input_arr = np.stack(input_arr)
+
+        inputs = [httpclient.InferInput("INPUT0", input_arr.shape, "UINT8")]
+        inputs[0].set_data_from_numpy(input_arr, binary_data=True)
+
+        outputs = [
+            httpclient.InferRequestedOutput("OUTPUT0"),
+        ]
+
+        ## TODO acwrenn
+        ## Parameterize for other ImageGen models?
+        model_name = "stability"
+        response = client.infer(
+            model_name,
+            inputs,
+            request_id=str(1),
+            outputs=outputs,
+            timeout=network_timeout,
+        )
+
+        result = response.get_response()
+
+    output0_data = response.as_numpy("OUTPUT0")
+    if len(output0_data) == 0:
+        raise Exception("error fetching images from triton server")
+    print(f"generated image in {time.time() - start} seconds")
+    return output0_data[0].asbytes()
+
+
+@register_microservice(
+    name="opea_service@imagegen",
+    service_type=ServiceType.IMAGEGEN,
+    endpoint="/v1/images/generate",
+    host="0.0.0.0",
+    port=9765,
+    input_datatype=TextDoc,
+    output_datatype=Base64ByteStrDoc,
+)
+@opea_telemetry
+async def generate_image(input: TextDoc):
+    triton_endpoint = os.getenv("IMAGE_GEN_TRITON_ENDPOINT", "http://localhost:8080")
+    text = input.text
+    image = generate_image(text=text, triton_endpoint=triton_endpoint)
+    buffered = BytesIO()
+    buffered.write(image.tobytes())
+    return Base64ByteStrDoc(byte_str=base64.b64encode(buffered.getvalue()))
+
+
+if __name__ == "__main__":
+    print("[imagegen - router] ImageGen initialized.")
+    opea_microservices["opea_service@imagegen"].start()
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -eu
+
+default_port=8080
+default_card_num=0
+default_model_cache_directory="${HOME}/.cache/huggingface/hub"
+HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+docker_cmd=<<EOF
+docker run -d \
+    --name=TritonStabilityServer -p ${default_port}:8000 \
+    -e HABANA_VISIBLE_DEVICES=${default_card_num} \
+    -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \
+    --cap-add=sys_nic \
+    --ipc=host \
+    --runtime=habana \
+    -v ${default_model_cache_directory}:/root/.cache/huggingface/hub \
+    ohio-stability-triton
+EOF
+
+eval $docker_cmd
@@ -0,0 +1,11 @@
+docarray[full]
+fastapi
+numpy
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+pillow
+sentencepiece
+shortuuid
+torch
+tritonclient[http]
@@ -0,0 +1,45 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+ARG TRITON_VERSION=24.04
+
+FROM nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3 AS triton
+
+FROM base
+
+ARG MODEL_NAME=stability
+
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
+    dpkg -i cuda-keyring_1.1-1_all.deb && \
+    apt-get update && \
+    apt-get install -y --fix-missing --no-install-recommends \
+        datacenter-gpu-manager
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommeds --fix-missing \
+              build-essential \
+              libaio-dev \
+              libaio1 \
+              libb64-0d \
+              libcupti-dev \
+              libjpeg-dev \
+              libpng-dev \
+              libsndfile-dev \
+              libwebp-dev 
+
+ARG TRITON_VERSION=24.04
+
+COPY --from=triton /opt/tritonserver /opt/tritonserver
+COPY --from=triton /usr/local/cuda-* /usr/local/cuda
+
+ENV PATH=$PATH:/opt/tritonserver/bin
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/tritonserver/lib
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/targets/x86_64-linux/lib
+
+RUN git clone --single-branch -b r${TRITON_VERSION} https://github.com/triton-inference-server/python_backend /opt/tritonserver/backends/opea_backends && \
+    mkdir -p /opt/tritonserver/backends/opea_backends/models/${MODEL_NAME}/1
+
+COPY ./model.py /opt/tritonserver/backends/opea_backends/models/${MODEL_NAME}/1/model.py
+COPY ./config.pbtxt /opt/tritonserver/backends/opea_backends/models/${MODEL_NAME}/config.pbtxt
+
+CMD ["tritonserver", "--model-repository", "/opt/tritonserver/backends/opea_backends/models"]
@@ -0,0 +1,114 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM ubuntu:jammy
+ARG ARTIFACTORY_URL=vault.habana.ai
+ARG VERSION=1.15.1
+ARG REVISION=15
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
+ENV HABANA_LOGS=/var/log/habana_logs/
+ENV OS_NUMBER=2204
+ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw
+ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    apt-transport-https \
+    apt-utils \
+    bc \
+    build-essential \
+    ca-certificates \
+    dkms \
+    ethtool \
+    gcc \
+    git \
+    gnupg \
+    gpg-agent \
+    graphviz \
+    libgl1 \
+    libgoogle-glog0v5 \
+    libjemalloc2 \
+    libpq-dev \
+    locales \
+    lsof \
+    make \
+    openssh-client \
+    openssh-server \
+    protobuf-compiler \
+    python3 \
+    python3-dev \
+    python3-pip \
+    python3-tk \
+    python3-venv \
+    unzip \
+    vim \
+    libkrb5-3 \
+    libgnutls30 \
+    wget && \
+    apt-get autoremove && apt-get clean
+
+RUN locale-gen en_US.UTF-8
+
+ENV LANG=en_US.UTF-8
+ENV LANGUAGE=en_US.UTF-8
+ENV LC_ALL=en_US.UTF-8
+ENV LC_CTYPE=en_US.UTF-8
+
+# There is no need to store pip installation files inside docker image
+ENV PIP_NO_CACHE_DIR=on
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+
+RUN python3 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4
+
+COPY install_efa.sh .
+RUN ./install_efa.sh && rm install_efa.sh && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh
+
+ENV LIBFABRIC_VERSION="1.20.0"
+ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
+ENV MPI_ROOT=/opt/amazon/openmpi
+ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
+ENV PATH=${LIBFABRIC_ROOT}/bin:${MPI_ROOT}/bin:$PATH
+ENV OPAL_PREFIX=${MPI_ROOT}
+ENV MPICC=${MPI_ROOT}/bin/mpicc
+ENV RDMAV_FORK_SAFE=1
+ENV FI_EFA_USE_DEVICE_RDMA=1
+ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
+ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
+
+RUN wget -O- https://${ARTIFACTORY_URL}/artifactory/api/gpg/key/public | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg && \
+    chown root:root /usr/share/keyrings/habana-artifactory.gpg && \
+    chmod 644 /usr/share/keyrings/habana-artifactory.gpg  && \
+    echo "deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://${ARTIFACTORY_URL}/artifactory/debian jammy main" | tee -a /etc/apt/sources.list && \
+    apt-get update && \
+    apt-get install -y habanalabs-rdma-core="$VERSION"-"$REVISION" \
+        habanalabs-thunk="$VERSION"-"$REVISION" \
+        habanalabs-firmware-tools="$VERSION"-"$REVISION" \
+        habanalabs-graph="$VERSION"-"$REVISION" && \
+    apt-get autoremove --yes && apt-get clean && rm -rf /var/lib/apt/lists/* && \
+    sed --in-place "/$ARTIFACTORY_URL/d" /etc/apt/sources.list
+
+RUN wget -nv -O /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
+    cd /tmp/ && tar xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
+    cd /tmp/libfabric-${LIBFABRIC_VERSION} && \
+    ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
+    make && make install
+
+RUN wget -nv -O /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
+    unzip /tmp/main.zip -d /tmp && \
+    cd /tmp/hccl_ofi_wrapper-main && \
+    make && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so && \
+    cd / && \
+    rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
+
+RUN python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}"
+
+# SSH configuration necessary to support mpi-operator v2
+RUN mkdir -p /var/run/sshd && \
+    sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
+    sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
+    echo "    UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
+    sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && \
+    echo "/etc/init.d/ssh start \"-p 3022\"" >> ~/.bashrc && \
+    sed -i '/[ -z "$PS1" ] && return/s/^/#/g' ~/.bashrc
@@ -0,0 +1,53 @@
+# Copyright (c) 2023 HabanaLabs, Ltd0
+#
+# SPDX-License-Identifier: Apache-2.0
+
+ARG BASE_NAME=base-installer-ubuntu22.04
+ARG VERSION=1.15.1
+ARG REVISION=15
+
+FROM ${BASE_NAME}:${VERSION}-${REVISION}
+
+ARG BASE_NAME=base-installer-ubuntu22.04
+ARG VERSION=1.15.1
+ARG REVISION=15
+ARG ARTIFACTORY_URL=vault.habana.ai
+ARG PT_VERSION=2.2.0
+
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/
+
+RUN apt-get update && apt-get install -y \
+    curl \
+    libcurl4 \
+    moreutils \
+    iproute2 \
+    libcairo2-dev \
+    libglib2.0-dev \
+    libhdf5-dev \
+    libselinux1-dev \
+    libnuma-dev \
+    libpcre2-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    numactl \
+    pdsh \
+    libmkl-dev \
+    libgoogle-perftools-dev && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1
+
+RUN echo $BASE_NAME
+COPY install_packages.sh .
+
+RUN ./install_packages.sh && rm -f install_packages.sh && \
+    /sbin/ldconfig && echo "source /etc/profile.d/habanalabs.sh" >> ~/.bashrc
+
+ENV LD_PRELOAD=/lib/x86_64-linux-gnu/libtcmalloc.so.4
+ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768
+
+RUN rm -rf /tmp/*
+
+RUN apt-get update &&  pip3 install optimum[habana] opencv-python