Skip to content

Commit

Permalink
ci: upload Docker image to GHCR on Git tag
Browse files Browse the repository at this point in the history
Co-authored-by: croumegous <[email protected]>
  • Loading branch information
jlabatut committed Apr 25, 2023
1 parent 99fefe1 commit a370762
Show file tree
Hide file tree
Showing 12 changed files with 167 additions and 8 deletions.
9 changes: 9 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.git

.github
tests

data

.env*
.gitignore
2 changes: 1 addition & 1 deletion nlu/.env.template → .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ OPENAI_API_KEY=OPENAI_API_KEY
DATABASE_URL=ws://localhost:8000/rpc
DATABASE_USERNAME=root
DATABASE_PASSWORD=root
FUNCTIONS_GATEWAY=http://localhost:8080
FUNCTIONS_GATEWAY=http://localhost:8080
36 changes: 36 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
on:
push:
tags:
- "v*"

jobs:
package:
name: Build container images
runs-on: ubuntu-22.04
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v3
- uses: docker/setup-buildx-action@v2
- uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- uses: docker/metadata-action@v4
id: meta
with:
images: ghcr.io/${{ github.repository }}
flavor: |
latest=true
- uses: docker/build-push-action@v4
with:
context: .
push: true
file: Dockerfile
platforms: linux/amd64
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
secrets: |
"HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}"
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,6 @@ dmypy.json
# Cython debug symbols
cython_debug/

.vscode/
.vscode/
models/
data/
25 changes: 25 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
FROM python:3.10-slim-buster

RUN apt-get update && apt-get install -y curl

RUN curl -sSL https://install.python-poetry.org | python3 -
RUN mv /root/.local/bin/poetry /usr/local/bin/poetry

WORKDIR /app

ENV PYTHONUNBUFFERED 1

COPY poetry.lock pyproject.toml README.md /app/

COPY ./nlu nlu

RUN poetry config virtualenvs.create false \
&& poetry install

RUN --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
export HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) \
&& python nlu/load_models.py

EXPOSE 8080

CMD ["uvicorn", "nlu.app:app", "--host", "0.0.0.0", "--port", "8080"]
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ curl -sSL https://install.python-poetry.org | python3 -
3. Agree to share informations on the repo and create a token: https://huggingface.co/joeddav/xlm-roberta-large-xnli

4. Install Hugging Face CLI: `pip install --upgrade huggingface_hub`
> https://huggingface.co/docs/huggingface_hub/quick-start

> https://huggingface.co/docs/huggingface_hub/quick-start
5. Login to Hugging Face: `huggingface-cli login` (token role is `read`)

Expand All @@ -43,3 +44,19 @@ poetry run python nlu/cli.py
docker run --rm --pull always -p 8000:8000 surrealdb/surrealdb:latest start --pass root
poetry run python nlu/app.py
```

### Run with Docker:

Using remote image :

```bash
docker compose up -d
```

Building locally (set your Hugging Face token) :

```bash
export HUGGING_FACE_HUB_TOKEN=<my_token>
docker buildx build --secret id=HUGGING_FACE_HUB_TOKEN -t ghcr.io/polyxia-org/nlu:latest .
docker compose up -d
```
24 changes: 24 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
version: "3"

services:
surrealdb:
image: surrealdb/surrealdb:latest
container_name: surrealdb
restart: unless-stopped
command: start --pass root file:/data/database.db
ports:
- 8000:8000
volumes:
- ./data:/data

api-server:
depends_on:
- surrealdb
image: ghcr.io/polyxia-org/nlu:latest
container_name: api-server
ports:
- 8080:8080
env_file:
- .env.template
environment:
DATABASE_URL: ws://surrealdb:8000/rpc
1 change: 1 addition & 0 deletions nlu/apis/v1/nlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ async def nlu(payload: NluPayload):
)
if res.ok:
return NluResponse(intent=intent, response=res.text)
print(f"Error: {res.text}")
return NluResponse(intent=intent, response="Sorry an error occurred.")
else:
if openai.api_key is None:
Expand Down
22 changes: 21 additions & 1 deletion nlu/app.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,38 @@
import logging
import os
import sys

import openai
import uvicorn
from dotenv import load_dotenv
from fastapi import FastAPI
from contextlib import asynccontextmanager

from nlu.apis.v1 import api_router
from nlu.database.client import Database

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

logger = logging.getLogger(__name__)
app = FastAPI()


@asynccontextmanager
async def lifespan(app: FastAPI):
# Load the ML model
try:
await Database()
except Exception as e:
logger.error(f"Error connecting to database: {e}")
sys.exit(1)

if os.getenv("FUNCTIONS_GATEWAY") is None:
logger.error("FUNCTIONS_GATEWAY is not set")
sys.exit(1)
yield


app = FastAPI(lifespan=lifespan)

app.include_router(api_router, prefix="/v1")

Expand Down
8 changes: 6 additions & 2 deletions nlu/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@

#### NER Extractor
model_name_ner = "qanastek/XLMRoberta-Alexa-Intents-NER-NLU"
tokenizer_ner = AutoTokenizer.from_pretrained(model_name_ner)
model_ner = AutoModelForTokenClassification.from_pretrained(model_name_ner)
tokenizer_ner = AutoTokenizer.from_pretrained(model_name_ner, local_files_only=True)
model_ner = AutoModelForTokenClassification.from_pretrained(
model_name_ner, local_files_only=True
)
predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)


Expand Down Expand Up @@ -81,6 +83,8 @@ async def get_user_intent(user_input: str) -> Tuple[str, float]:

# zero-shot classification
candidate_labels = list(intents_list.keys())
if not candidate_labels:
return None, None
intent_classified = intent_classifier(
user_input, candidate_labels, multi_label=True
)
Expand Down
22 changes: 22 additions & 0 deletions nlu/load_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from transformers import (
AutoModelForTokenClassification,
AutoTokenizer,
TokenClassificationPipeline,
pipeline,
)

# Intent classifier
model_name_intent = "joeddav/xlm-roberta-large-xnli"
intent_classifier = pipeline("zero-shot-classification", model=model_name_intent)

# Sentence similarity
model_name_sim = "paraphrase-multilingual-mpnet-base-v2"
model_sentence_similarity = SentenceTransformer(model_name_sim)

# NER Extractor
model_name_ner = "qanastek/XLMRoberta-Alexa-Intents-NER-NLU"
tokenizer_ner = AutoTokenizer.from_pretrained(model_name_ner)
model_ner = AutoModelForTokenClassification.from_pretrained(model_name_ner)
predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ authors = ["Polyxya org"]
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.9,<3.10"
python = ">=3.9,<3.12"
torch = { version = "=1.13.1", source = "torch" }
numpy = "^1.24.2"
uvicorn = "^0.21.0"
Expand All @@ -23,7 +23,6 @@ url = "https://download.pytorch.org/whl/cpu"
secondary = true
default = false


[tool.poetry.group.dev.dependencies]
black = "^23.1.0"
isort = "^5.12.0"
Expand Down

0 comments on commit a370762

Please sign in to comment.