Deploy LLaMA with Lightning App (#1)

aniketmaurya · web-flow · commit 33908f272009 · 2023-04-12T15:13:22.000+05:30
* add app

* update

* update

* refactor
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -47,8 +47,6 @@ jobs:
           python --version
           pip --version
           python -m pip install --upgrade pip
-          pip install flit
-          flit install --deps all
           pip list
         shell: bash
 
diff --git a/src/llama_inference/__init__.py b/src/llama_inference/__init__.py
@@ -1,5 +1,5 @@
 """Inference API for LLaMA"""
 
-from .api import LLaMAInference
+from .model import LLaMAInference
 
 __version__ = "0.0.0"
diff --git a/src/llama_inference/app.py b/src/llama_inference/app.py
@@ -0,0 +1,29 @@
+from typing import Any
+
+import lightning as L
+from lightning.app.components import PythonServer
+from pydantic import BaseModel
+
+from llama_inference.model import LLaMAInference
+
+
+class PromptRequest(BaseModel):
+    prompt: str
+
+
+class Response(BaseModel):
+    result: str
+
+
+class ServeLLaMA(PythonServer):
+    def setup(self, *args: Any, **kwargs: Any) -> None:
+        self._model = LLaMAInference(*args, **kwargs)
+
+    def predict(self, request: PromptRequest) -> Any:
+        result = self._model(request.prompt)
+        return Response(result=result)
+
+
+if __name__ == "__main__":
+    component = ServeLLaMA(input_type=PromptRequest, output_type=Response)
+    app = L.LightningApp(component)
diff --git a/src/llama_inference/model.py b/src/llama_inference/model.py
@@ -1,13 +1,19 @@
+import os
 import sys
 import time
 from pathlib import Path
 from typing import Optional
 
 import lightning as L
 import torch
+from dotenv import load_dotenv
 from lit_llama import LLaMA, Tokenizer
 from lit_llama.utils import EmptyInitOnDevice
 
+load_dotenv()
+
+WEIGHTS_PATH = os.environ.get("WEIGHTS")
+
 
 @torch.no_grad()
 def _generate(
@@ -74,6 +80,10 @@ def __init__(
     ) -> None:
         self.fabric = fabric = L.Fabric(accelerator=accelerator, devices=1)
 
+        if not checkpoint_path and WEIGHTS_PATH:
+            checkpoint_path = f"{WEIGHTS_PATH}/{model_size}/state_dict.pth"
+            tokenizer_path = f"{WEIGHTS_PATH}/tokenizer.model"
+
         if dtype is not None:
             dt = getattr(torch, dtype, None)
             if not isinstance(dt, torch.dtype):
diff --git a/src/requirements.txt b/src/requirements.txt
@@ -0,0 +1 @@
+python-dotenv>=1.0.0