From cbe65a189e6d63b5a41e5fc07da9eb2732b9c902 Mon Sep 17 00:00:00 2001
From: Aniket Maurya <theaniketmaurya@gmail.com>
Date: Fri, 30 Aug 2024 17:08:43 +0100
Subject: [PATCH] Add test for `litserve.examples` (#254)

* test OpenAISpec

* fix

* rename examples to test_examples.py

* Revert "rename examples to test_examples.py"

This reverts commit dea60b70fd2f28162f074cdb7f39ed835d581ccf.

* test OpenAIBatchContext

---------

Co-authored-by: William Falcon <waf2107@columbia.edu>
---
 src/litserve/examples/openai_spec_example.py |   7 +-
 src/litserve/examples/simple_example.py      |   2 +-
 tests/test_examples.py                       | 172 +++++++++++++++++++
 3 files changed, 174 insertions(+), 7 deletions(-)

diff --git a/src/litserve/examples/openai_spec_example.py b/src/litserve/examples/openai_spec_example.py
index 3e886c9f..9ada8e69 100644
--- a/src/litserve/examples/openai_spec_example.py
+++ b/src/litserve/examples/openai_spec_example.py
@@ -14,7 +14,7 @@
 import time
 
 import litserve as ls
-from litserve.specs.openai import ChatMessage, OpenAISpec
+from litserve.specs.openai import ChatMessage
 
 
 class TestAPI(ls.LitAPI):
@@ -130,8 +130,3 @@ def encode_response(self, output_stream_batch, context):
 
     def unbatch(self, output):
         return output
-
-
-if __name__ == "__main__":
-    server = ls.LitServer(TestAPIWithCustomEncode(), spec=OpenAISpec())
-    server.run(port=8000)
diff --git a/src/litserve/examples/simple_example.py b/src/litserve/examples/simple_example.py
index a1eb8178..549f7590 100644
--- a/src/litserve/examples/simple_example.py
+++ b/src/litserve/examples/simple_example.py
@@ -95,7 +95,7 @@ def decode_request(self, request):
 
     def predict(self, x):
         for i in range(3):
-            yield self.model(i, x.encode("utf-8").decode())
+            yield self.model(i, x)
 
     def encode_response(self, output_stream):
         for output in output_stream:
diff --git a/tests/test_examples.py b/tests/test_examples.py
index 2dc4756d..cd652923 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -1,6 +1,15 @@
 import pytest
+import torch.nn
 from asgi_lifespan import LifespanManager
 from httpx import AsyncClient
+
+from litserve.examples.openai_spec_example import (
+    OpenAIWithUsage,
+    OpenAIWithUsageEncodeResponse,
+    OpenAIBatchingWithUsage,
+    OpenAIBatchContext,
+)
+from litserve.examples.simple_example import SimpleStreamAPI
 from litserve.utils import wrap_litserve_start
 import litserve as ls
 
@@ -33,3 +42,166 @@ async def test_simple_api():
         async with LifespanManager(server.app) as manager, AsyncClient(app=manager.app, base_url="http://test") as ac:
             response = await ac.post("/predict", json={"input": 4.0})
             assert response.json() == {"output": 16.0}
+
+
+@pytest.mark.asyncio()
+async def test_simple_api_without_server():
+    api = ls.examples.SimpleLitAPI()
+    api.setup(None)
+    assert api.model is not None, "Model should be loaded after setup"
+    assert api.predict(4) == 16, "Model should be able to predict"
+
+
+@pytest.mark.asyncio()
+async def test_simple_pytorch_api_without_server():
+    api = ls.examples.SimpleTorchAPI()
+    api.setup("cpu")
+    assert api.model is not None, "Model should be loaded after setup"
+    assert isinstance(api.model, torch.nn.Module)
+    assert api.decode_request({"input": 4}) == 4, "Request should be decoded"
+    assert api.predict(torch.Tensor([4])).cpu() == 9, "Model should be able to predict"
+    assert api.encode_response(9) == {"output": 9}, "Response should be encoded"
+
+
+@pytest.mark.asyncio()
+async def test_simple_stream_api_without_server():
+    api = SimpleStreamAPI()
+    api.setup(None)
+    assert api.model is not None, "Model should be loaded after setup"
+    assert api.decode_request({"input": 4}) == 4, "Request should be decoded"
+    assert list(api.predict(4)) == ["0: 4", "1: 4", "2: 4"], "Model should be able to predict"
+    assert list(api.encode_response(["0: 4", "1: 4", "2: 4"])) == [
+        {"output": "0: 4"},
+        {"output": "1: 4"},
+        {"output": "2: 4"},
+    ], "Response should be encoded"
+
+
+@pytest.mark.asyncio()
+async def test_openai_with_usage():
+    api = OpenAIWithUsage()
+    api.setup(None)
+    response = list(api.predict("10 + 6"))
+    assert response == [
+        {
+            "role": "assistant",
+            "content": "10 + 6 is equal to 16.",
+            "prompt_tokens": 25,
+            "completion_tokens": 10,
+            "total_tokens": 35,
+        }
+    ], "Response should match expected output"
+
+
+@pytest.mark.asyncio()
+async def test_openai_with_usage_encode_response():
+    api = OpenAIWithUsageEncodeResponse()
+    api.setup(None)
+    response = list(api.predict("10 + 6"))
+    encoded_response = list(api.encode_response(response))
+    assert encoded_response == [
+        {"role": "assistant", "content": "10"},
+        {"role": "assistant", "content": " +"},
+        {"role": "assistant", "content": " "},
+        {"role": "assistant", "content": "6"},
+        {"role": "assistant", "content": " is"},
+        {"role": "assistant", "content": " equal"},
+        {"role": "assistant", "content": " to"},
+        {"role": "assistant", "content": " "},
+        {"role": "assistant", "content": "16"},
+        {"role": "assistant", "content": "."},
+        {"role": "assistant", "content": "", "prompt_tokens": 25, "completion_tokens": 10, "total_tokens": 35},
+    ], "Encoded response should match expected output"
+
+
+@pytest.mark.asyncio()
+async def test_openai_batching_with_usage():
+    api = OpenAIBatchingWithUsage()
+    api.setup(None)
+    inputs = ["10 + 6", "10 + 6"]
+    assert api.batch(inputs) == inputs, "Batched inputs should match expected output"
+    batched_response = list(api.predict(inputs))
+    assert batched_response == [["10 + 6 is equal to 16."] * 2], "Batched response should match expected output"
+    assert api.unbatch(batched_response) == batched_response, "Unbatched response should match batched response"
+    encoded_response = list(api.encode_response(batched_response, [{"temperature": 1.0}, {"temperature": 1.0}]))
+    assert encoded_response == [
+        [
+            {"role": "assistant", "content": "10 + 6 is equal to 16."},
+            {"role": "assistant", "content": "10 + 6 is equal to 16."},
+        ],
+        [
+            {"role": "assistant", "content": "", "prompt_tokens": 25, "completion_tokens": 10, "total_tokens": 35},
+            {"role": "assistant", "content": "", "prompt_tokens": 25, "completion_tokens": 10, "total_tokens": 35},
+        ],
+    ], "Encoded batched response should match expected output"
+
+
+@pytest.mark.asyncio()
+async def test_openai_batch_context():
+    api = OpenAIBatchContext()
+    api.setup(None)
+    inputs = ["Hello", "How are you?"]
+    context = [{"temperature": 0.5}, {"temperature": 0.5}]
+
+    # Test batch method
+    assert api.batch(inputs) == inputs, "Batched inputs should match expected output"
+
+    # Test predict method
+    predicted_output = list(api.predict(inputs, context))
+    expected_output = [
+        ["Hi! "] * 2,
+        ["It's "] * 2,
+        ["nice "] * 2,
+        ["to "] * 2,
+        ["meet "] * 2,
+        ["you. "] * 2,
+        ["Is "] * 2,
+        ["there "] * 2,
+        ["something "] * 2,
+        ["I "] * 2,
+        ["can "] * 2,
+        ["help "] * 2,
+        ["you "] * 2,
+        ["with "] * 2,
+        ["or "] * 2,
+        ["would "] * 2,
+        ["you "] * 2,
+        ["like "] * 2,
+        ["to "] * 2,
+        ["chat? "] * 2,
+    ]
+    assert predicted_output == expected_output, "Predicted output should match expected output"
+
+    # Test unbatch method
+    unbatched_output = api.unbatch(predicted_output)
+    assert unbatched_output == predicted_output, "Unbatched output should match predicted output"
+
+    # Test encode_response method
+    encoded_response = list(api.encode_response(predicted_output, context))
+    expected_encoded_response = [
+        [{"role": "assistant", "content": "Hi! "}, {"role": "assistant", "content": "Hi! "}],
+        [{"role": "assistant", "content": "It's "}, {"role": "assistant", "content": "It's "}],
+        [{"role": "assistant", "content": "nice "}, {"role": "assistant", "content": "nice "}],
+        [{"role": "assistant", "content": "to "}, {"role": "assistant", "content": "to "}],
+        [{"role": "assistant", "content": "meet "}, {"role": "assistant", "content": "meet "}],
+        [{"role": "assistant", "content": "you. "}, {"role": "assistant", "content": "you. "}],
+        [{"role": "assistant", "content": "Is "}, {"role": "assistant", "content": "Is "}],
+        [{"role": "assistant", "content": "there "}, {"role": "assistant", "content": "there "}],
+        [{"role": "assistant", "content": "something "}, {"role": "assistant", "content": "something "}],
+        [{"role": "assistant", "content": "I "}, {"role": "assistant", "content": "I "}],
+        [{"role": "assistant", "content": "can "}, {"role": "assistant", "content": "can "}],
+        [{"role": "assistant", "content": "help "}, {"role": "assistant", "content": "help "}],
+        [{"role": "assistant", "content": "you "}, {"role": "assistant", "content": "you "}],
+        [{"role": "assistant", "content": "with "}, {"role": "assistant", "content": "with "}],
+        [{"role": "assistant", "content": "or "}, {"role": "assistant", "content": "or "}],
+        [{"role": "assistant", "content": "would "}, {"role": "assistant", "content": "would "}],
+        [{"role": "assistant", "content": "you "}, {"role": "assistant", "content": "you "}],
+        [{"role": "assistant", "content": "like "}, {"role": "assistant", "content": "like "}],
+        [{"role": "assistant", "content": "to "}, {"role": "assistant", "content": "to "}],
+        [{"role": "assistant", "content": "chat? "}, {"role": "assistant", "content": "chat? "}],
+    ]
+    assert encoded_response == expected_encoded_response, "Encoded response should match expected output"
+
+    # Ensure context temperatures are set to 1.0
+    for ctx in context:
+        assert ctx["temperature"] == 1.0, f"context {ctx} is not 1.0"