Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions tutorials/serving_ollama_pod_template/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from union.app import App, Input, ScalingMetric
from union import ImageSpec, Artifact, Resources
from flytekit.extras.accelerators import L4

MoonDreamArtifact = Artifact(name="ollama-moondream")

ollama_image = ImageSpec(
name="ollama-serve",
apt_packages=["curl"],
packages=["union-runtime>=0.1.11"],
registry="ghcr.io/unionai-oss",
commands=[
"curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o ollama-linux-amd64.tgz",
"tar -C /usr -xzf ollama-linux-amd64.tgz",
],
)

ollama = App(
name="ollama-serve",
inputs=[
Input(
value=MoonDreamArtifact.query(),
mount="/home/.ollama",
)
],
container_image=ollama_image,
port=11434,
args="ollama serve",
limits=Resources(cpu="8", mem="10Gi", ephemeral_storage="20Gi", gpu="1"),
accelerator=L4,
env={
"OLLAMA_HOST": "0.0.0.0",
"OLLAMA_ORIGINS": "*",
"OLLAMA_MODELS": "/home/.ollama/models",
},
min_replicas=0,
max_replicas=1,
scaledown_after=200,
)


streamlit_image = ImageSpec(
name="streamlit-chat",
packages=["streamlit==1.41.1", "union-runtime>=0.1.11", "ollama==0.4.7"],
registry="ghcr.io/unionai-oss",
)

streamlit_app = App(
name="ollama-streamlit",
inputs=[
Input(
value=ollama.query_endpoint(),
env_var="OLLAMA_ENDPOINT",
)
],
limits=Resources(cpu="2", mem="4Gi"),
container_image=streamlit_image,
port=8082,
include=["./streamlit_app.py"],
args=[
"streamlit",
"run",
"streamlit_app.py",
"--server.port",
"8082",
],
min_replicas=0,
max_replicas=1,
)
65 changes: 65 additions & 0 deletions tutorials/serving_ollama_pod_template/download_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from flytekit import Artifact, ImageSpec, PodTemplate, task, Resources
from typing import Annotated
from kubernetes.client.models import (
V1Container,
V1PodSpec,
V1ResourceRequirements,
V1VolumeMount,
V1Volume,
V1SecurityContext,
V1Probe,
V1HTTPGetAction,
)
from flytekit.types.directory import FlyteDirectory


MoonDreamArtifact = Artifact(name="ollama-moondream")
image = ImageSpec(
name="ollama-serve",
packages=["ollama==0.4.4", "kubernetes==31.0.0"],
registry="ghcr.io/unionai-oss",
)


template = PodTemplate(
pod_spec=V1PodSpec(
containers=[
V1Container(
name="primary",
image=image,
volume_mounts=[
V1VolumeMount(name="ollama-cache", mount_path="/root/.ollama")
],
security_context=V1SecurityContext(
run_as_user=0,
),
),
],
init_containers=[
V1Container(
name="ollama",
image="ollama/ollama:0.6.3",
resources=V1ResourceRequirements(
requests={"cpu": "2", "memory": "6Gi"},
limits={"cpu": "2", "memory": "6Gi"},
),
restart_policy="Always",
volume_mounts=[
V1VolumeMount(name="ollama-cache", mount_path="/root/.ollama")
],
startup_probe=V1Probe(http_get=V1HTTPGetAction(path="/", port=11434)),
),
],
volumes=[V1Volume(name="ollama-cache", empty_dir={})],
)
)


@task(pod_template=template, limits=Resources(cpu="1", mem="4Gi"))
def download_model(
model: str = "moondream",
) -> Annotated[FlyteDirectory, MoonDreamArtifact]:
import ollama

ollama.pull(model)
return FlyteDirectory(path="/root/.ollama")
26 changes: 26 additions & 0 deletions tutorials/serving_ollama_pod_template/streamlit_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import streamlit as st
from ollama import Client

client = Client(
host=os.getenv("OLLAMA_ENDPOINT", "http://localhost"),
)


MODEL = "moondream"


def stream_parser(stream):
for chunk in stream:
yield chunk["message"]["content"]


query = st.text_input("Enter your question:", value="What is VSCode?")

if query:
stream = client.chat(
model=MODEL,
messages=[{"role": "user", "content": query}],
stream=True,
)
response = st.write_stream(chunk["message"]["content"] for chunk in stream)