ci: upload Docker image to GHCR on Git tag

Co-authored-by: croumegous <[email protected]>
polyxia-org · Apr 25, 2023 · a370762 · a370762
1 parent 99fefe1
commit a370762
Show file tree

Hide file tree

Showing 12 changed files with 167 additions and 8 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,9 @@
+.git
+
+.github
+tests
+
+data
+
+.env*
+.gitignore
diff --git a/nlu/.env.template → .env.template b/nlu/.env.template → .env.template
@@ -2,4 +2,4 @@ OPENAI_API_KEY=OPENAI_API_KEY
 DATABASE_URL=ws://localhost:8000/rpc
 DATABASE_USERNAME=root
 DATABASE_PASSWORD=root
-FUNCTIONS_GATEWAY=http://localhost:8080
+FUNCTIONS_GATEWAY=http://localhost:8080
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,36 @@
+on:
+  push:
+    tags:
+      - "v*"
+
+jobs:
+  package:
+    name: Build container images
+    runs-on: ubuntu-22.04
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v3
+      - uses: docker/setup-buildx-action@v2
+      - uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - uses: docker/metadata-action@v4
+        id: meta
+        with:
+          images: ghcr.io/${{ github.repository }}
+          flavor: |
+            latest=true
+      - uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          file: Dockerfile
+          platforms: linux/amd64
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          secrets: |
+            "HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}"
diff --git a/.gitignore b/.gitignore
@@ -152,4 +152,6 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
-.vscode/
+.vscode/
+models/
+data/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,25 @@
+FROM python:3.10-slim-buster
+
+RUN apt-get update && apt-get install -y curl
+
+RUN curl -sSL https://install.python-poetry.org | python3 -
+RUN mv /root/.local/bin/poetry /usr/local/bin/poetry
+
+WORKDIR /app
+
+ENV PYTHONUNBUFFERED 1
+
+COPY poetry.lock pyproject.toml README.md /app/
+
+COPY ./nlu nlu
+
+RUN poetry config virtualenvs.create false \
+  && poetry install
+
+RUN --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
+  export HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) \
+  && python nlu/load_models.py
+
+EXPOSE 8080
+
+CMD ["uvicorn", "nlu.app:app", "--host", "0.0.0.0", "--port", "8080"]
diff --git a/README.md b/README.md
@@ -17,7 +17,8 @@ curl -sSL https://install.python-poetry.org | python3 -
 3. Agree to share informations on the repo and create a token: https://huggingface.co/joeddav/xlm-roberta-large-xnli
 
 4. Install Hugging Face CLI: `pip install --upgrade huggingface_hub`
-> https://huggingface.co/docs/huggingface_hub/quick-start
+
+   > https://huggingface.co/docs/huggingface_hub/quick-start
 
 5. Login to Hugging Face: `huggingface-cli login` (token role is `read`)
 
@@ -43,3 +44,19 @@ poetry run python nlu/cli.py
 docker run --rm --pull always -p 8000:8000 surrealdb/surrealdb:latest start --pass root
 poetry run python nlu/app.py
 ```
+
+### Run with Docker:
+
+Using remote image :
+
+```bash
+docker compose up -d
+```
+
+Building locally (set your Hugging Face token) :
+
+```bash
+export HUGGING_FACE_HUB_TOKEN=<my_token>
+docker buildx build --secret id=HUGGING_FACE_HUB_TOKEN -t ghcr.io/polyxia-org/nlu:latest .
+docker compose up -d
+```
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,24 @@
+version: "3"
+
+services:
+  surrealdb:
+    image: surrealdb/surrealdb:latest
+    container_name: surrealdb
+    restart: unless-stopped
+    command: start --pass root file:/data/database.db
+    ports:
+      - 8000:8000
+    volumes:
+      - ./data:/data
+
+  api-server:
+    depends_on:
+      - surrealdb
+    image: ghcr.io/polyxia-org/nlu:latest
+    container_name: api-server
+    ports:
+      - 8080:8080
+    env_file:
+      - .env.template
+    environment:
+      DATABASE_URL: ws://surrealdb:8000/rpc
diff --git a/nlu/apis/v1/nlu.py b/nlu/apis/v1/nlu.py
@@ -25,6 +25,7 @@ async def nlu(payload: NluPayload):
         )
         if res.ok:
             return NluResponse(intent=intent, response=res.text)
+        print(f"Error: {res.text}")
         return NluResponse(intent=intent, response="Sorry an error occurred.")
     else:
         if openai.api_key is None:

diff --git a/nlu/app.py b/nlu/app.py
@@ -1,18 +1,38 @@
 import logging
 import os
+import sys
 
 import openai
 import uvicorn
 from dotenv import load_dotenv
 from fastapi import FastAPI
+from contextlib import asynccontextmanager
 
 from nlu.apis.v1 import api_router
+from nlu.database.client import Database
 
 load_dotenv()
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
 logger = logging.getLogger(__name__)
-app = FastAPI()
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Load the ML model
+    try:
+        await Database()
+    except Exception as e:
+        logger.error(f"Error connecting to database: {e}")
+        sys.exit(1)
+
+    if os.getenv("FUNCTIONS_GATEWAY") is None:
+        logger.error("FUNCTIONS_GATEWAY is not set")
+        sys.exit(1)
+    yield
+
+
+app = FastAPI(lifespan=lifespan)
 
 app.include_router(api_router, prefix="/v1")
 

diff --git a/nlu/brain.py b/nlu/brain.py
@@ -25,8 +25,10 @@
 
 #### NER Extractor
 model_name_ner = "qanastek/XLMRoberta-Alexa-Intents-NER-NLU"
-tokenizer_ner = AutoTokenizer.from_pretrained(model_name_ner)
-model_ner = AutoModelForTokenClassification.from_pretrained(model_name_ner)
+tokenizer_ner = AutoTokenizer.from_pretrained(model_name_ner, local_files_only=True)
+model_ner = AutoModelForTokenClassification.from_pretrained(
+    model_name_ner, local_files_only=True
+)
 predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)
 
 
@@ -81,6 +83,8 @@ async def get_user_intent(user_input: str) -> Tuple[str, float]:
 
     # zero-shot classification
     candidate_labels = list(intents_list.keys())
+    if not candidate_labels:
+        return None, None
     intent_classified = intent_classifier(
         user_input, candidate_labels, multi_label=True
     )

diff --git a/nlu/load_models.py b/nlu/load_models.py
@@ -0,0 +1,22 @@
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+from transformers import (
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    TokenClassificationPipeline,
+    pipeline,
+)
+
+# Intent classifier
+model_name_intent = "joeddav/xlm-roberta-large-xnli"
+intent_classifier = pipeline("zero-shot-classification", model=model_name_intent)
+
+# Sentence similarity
+model_name_sim = "paraphrase-multilingual-mpnet-base-v2"
+model_sentence_similarity = SentenceTransformer(model_name_sim)
+
+# NER Extractor
+model_name_ner = "qanastek/XLMRoberta-Alexa-Intents-NER-NLU"
+tokenizer_ner = AutoTokenizer.from_pretrained(model_name_ner)
+model_ner = AutoModelForTokenClassification.from_pretrained(model_name_ner)
+predict_ner = TokenClassificationPipeline(model=model_ner, tokenizer=tokenizer_ner)
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ authors = ["Polyxya org"]
 readme = "README.md"
 
 [tool.poetry.dependencies]
-python = ">=3.9,<3.10"
+python = ">=3.9,<3.12"
 torch = { version = "=1.13.1", source = "torch" }
 numpy = "^1.24.2"
 uvicorn = "^0.21.0"
@@ -23,7 +23,6 @@ url = "https://download.pytorch.org/whl/cpu"
 secondary = true
 default = false
 
-
 [tool.poetry.group.dev.dependencies]
 black = "^23.1.0"
 isort = "^5.12.0"