test OpenAISpec

aniketmaurya · aniketmaurya · commit 8b9b4980416d · 2024-08-30T15:26:25.000+01:00
diff --git a/src/litserve/examples/simple_example.py b/src/litserve/examples/simple_example.py
@@ -1,3 +1,5 @@
+import torch
+
 import litserve as ls
 
 
@@ -54,8 +56,6 @@ def forward(self, x):
         self.model = Linear().to(device)
 
     def decode_request(self, request):
-        import torch
-
         # get the input and create a 1D tensor on the correct device
         content = request["input"]
         return torch.tensor([content], device=self.device)
@@ -95,7 +95,7 @@ def decode_request(self, request):
 
     def predict(self, x):
         for i in range(3):
-            yield self.model(i, x.encode("utf-8").decode())
+            yield self.model(i, x)
 
     def encode_response(self, output_stream):
         for output in output_stream:
diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -1,6 +1,14 @@
 import pytest
+import torch.nn
 from asgi_lifespan import LifespanManager
 from httpx import AsyncClient
+
+from litserve.examples.openai_spec_example import (
+    OpenAIWithUsage,
+    OpenAIWithUsageEncodeResponse,
+    OpenAIBatchingWithUsage,
+)
+from litserve.examples.simple_example import SimpleStreamAPI
 from litserve.utils import wrap_litserve_start
 import litserve as ls
 
@@ -33,3 +41,93 @@ async def test_simple_api():
         async with LifespanManager(server.app) as manager, AsyncClient(app=manager.app, base_url="http://test") as ac:
             response = await ac.post("/predict", json={"input": 4.0})
             assert response.json() == {"output": 16.0}
+
+
+@pytest.mark.asyncio()
+async def test_simple_api_without_server():
+    api = ls.examples.SimpleLitAPI()
+    api.setup(None)
+    assert api.model is not None, "Model should be loaded after setup"
+    assert api.predict(4) == 16, "Model should be able to predict"
+
+
+@pytest.mark.asyncio()
+async def test_simple_pytorch_api_without_server():
+    api = ls.examples.SimpleTorchAPI()
+    api.setup("cpu")
+    assert api.model is not None, "Model should be loaded after setup"
+    assert isinstance(api.model, torch.nn.Module)
+    assert api.decode_request({"input": 4}) == 4, "Request should be decoded"
+    assert api.predict(torch.Tensor([4])).cpu() == 9, "Model should be able to predict"
+    assert api.encode_response(9) == {"output": 9}, "Response should be encoded"
+
+
+@pytest.mark.asyncio()
+async def test_simple_stream_api_without_server():
+    api = SimpleStreamAPI()
+    api.setup(None)
+    assert api.model is not None, "Model should be loaded after setup"
+    assert api.decode_request({"input": 4}) == 4, "Request should be decoded"
+    assert list(api.predict(4)) == ["0: 4", "1: 4", "2: 4"], "Model should be able to predict"
+    assert list(api.encode_response(["0: 4", "1: 4", "2: 4"])) == [
+        {"output": "0: 4"},
+        {"output": "1: 4"},
+        {"output": "2: 4"},
+    ], "Response should be encoded"
+
+
+@pytest.mark.asyncio()
+async def test_openai_with_usage():
+    api = OpenAIWithUsage()
+    api.setup(None)
+    response = list(api.predict("10 + 6"))
+    assert response == [
+        {
+            "role": "assistant",
+            "content": "10 + 6 is equal to 16.",
+            "prompt_tokens": 25,
+            "completion_tokens": 10,
+            "total_tokens": 35,
+        }
+    ], "Response should match expected output"
+
+
+@pytest.mark.asyncio()
+async def test_openai_with_usage_encode_response():
+    api = OpenAIWithUsageEncodeResponse()
+    api.setup(None)
+    response = list(api.predict("10 + 6"))
+    encoded_response = list(api.encode_response(response))
+    assert encoded_response == [
+        {"role": "assistant", "content": "10"},
+        {"role": "assistant", "content": " +"},
+        {"role": "assistant", "content": " "},
+        {"role": "assistant", "content": "6"},
+        {"role": "assistant", "content": " is"},
+        {"role": "assistant", "content": " equal"},
+        {"role": "assistant", "content": " to"},
+        {"role": "assistant", "content": " "},
+        {"role": "assistant", "content": "16"},
+        {"role": "assistant", "content": "."},
+        {"role": "assistant", "content": "", "prompt_tokens": 25, "completion_tokens": 10, "total_tokens": 35},
+    ], "Encoded response should match expected output"
+
+
+@pytest.mark.asyncio()
+async def test_openai_batching_with_usage():
+    api = OpenAIBatchingWithUsage()
+    api.setup(None)
+    inputs = ["10 + 6", "10 + 6"]
+    batched_response = list(api.predict(inputs))
+    assert batched_response == [["10 + 6 is equal to 16."] * 2], "Batched response should match expected output"
+    encoded_response = list(api.encode_response(batched_response, [{"temperature": 1.0}, {"temperature": 1.0}]))
+    assert encoded_response == [
+        [
+            {"role": "assistant", "content": "10 + 6 is equal to 16."},
+            {"role": "assistant", "content": "10 + 6 is equal to 16."},
+        ],
+        [
+            {"role": "assistant", "content": "", "prompt_tokens": 25, "completion_tokens": 10, "total_tokens": 35},
+            {"role": "assistant", "content": "", "prompt_tokens": 25, "completion_tokens": 10, "total_tokens": 35},
+        ],
+    ], "Encoded batched response should match expected output"