pytorch
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎cpp/README.md‎
Lines changed: 5 additions & 5 deletions b/‎cpp/README.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎cpp/build.sh‎
Lines changed: 11 additions & 10 deletions b/‎cpp/build.sh‎
Lines changed: 11 additions & 10 deletions
diff --git a/‎cpp/src/examples/CMakeLists.txt‎
Lines changed: 3 additions & 13 deletions b/‎cpp/src/examples/CMakeLists.txt‎
Lines changed: 3 additions & 13 deletions
diff --git a/‎cpp/test/backends/otf_protocol_and_handler_test.cc‎
Lines changed: 6 additions & 7 deletions b/‎cpp/test/backends/otf_protocol_and_handler_test.cc‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎cpp/test/examples/examples_test.cc‎
Lines changed: 32 additions & 4 deletions b/‎cpp/test/examples/examples_test.cc‎
Lines changed: 32 additions & 4 deletions
diff --git a/‎…/babyllama_handler/MAR-INF/MANIFEST.json‎ ‎…/babyllama_handler/MAR-INF/MANIFEST.json‎cpp/test/resources/torchscript_model/babyllama/babyllama_handler/MAR-INF/MANIFEST.json renamed to cpp/test/resources/examples/babyllama/babyllama_handler/MAR-INF/MANIFEST.json b/‎…/babyllama_handler/MAR-INF/MANIFEST.json‎ ‎…/babyllama_handler/MAR-INF/MANIFEST.json‎cpp/test/resources/torchscript_model/babyllama/babyllama_handler/MAR-INF/MANIFEST.json renamed to cpp/test/resources/examples/babyllama/babyllama_handler/MAR-INF/MANIFEST.json
diff --git a/‎cpp/test/resources/examples/babyllama/babyllama_handler/config.json‎
Lines changed: 4 additions & 0 deletions b/‎cpp/test/resources/examples/babyllama/babyllama_handler/config.json‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎…lama/babyllama_handler/config.properties‎ ‎…lama/babyllama_handler/config.properties‎cpp/test/resources/torchscript_model/babyllama/babyllama_handler/config.properties renamed to cpp/test/resources/examples/babyllama/babyllama_handler/config.properties b/‎…lama/babyllama_handler/config.properties‎ ‎…lama/babyllama_handler/config.properties‎cpp/test/resources/torchscript_model/babyllama/babyllama_handler/config.properties renamed to cpp/test/resources/examples/babyllama/babyllama_handler/config.properties
diff --git a/‎…s/torchscript_model/babyllama/prompt.txt‎ ‎…/resources/examples/babyllama/prompt.txt‎cpp/test/resources/torchscript_model/babyllama/prompt.txt renamed to cpp/test/resources/examples/babyllama/prompt.txt b/‎…s/torchscript_model/babyllama/prompt.txt‎ ‎…/resources/examples/babyllama/prompt.txt‎cpp/test/resources/torchscript_model/babyllama/prompt.txt renamed to cpp/test/resources/examples/babyllama/prompt.txt
@@ -1,3 +1,6 @@
 [submodule "third_party/google/rpc"]
 	path = third_party/google/rpc
 	url = https://github.com/googleapis/googleapis.git
+[submodule "cpp/third-party/llama.cpp"]
+	path = cpp/third-party/llama.cpp
+	url = https://github.com/ggerganov/llama.cpp.git
@@ -49,23 +49,23 @@ By default, TorchServe cpp provides a handler for TorchScript [src/backends/hand
 ```
  torch-model-archiver --model-name mnist_base --version 1.0 --serialized-file mnist_script.pt --handler TorchScriptHandler --runtime LSP
 ```
-Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/torchscript_model/mnist/base_handler) of unzipped model mar file.
+Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/examples/mnist/base_handler) of unzipped model mar file.
 ##### Using Custom Handler
 * build customized handler shared lib. For example [Mnist handler](https://github.com/pytorch/serve/blob/cpp_backend/cpp/src/examples/image_classifier/mnist).
 * set runtime as "LSP" in model archiver option [--runtime](https://github.com/pytorch/serve/tree/master/model-archiver#arguments)
 * set handler as "libmnist_handler:MnistHandler" in model archiver option [--handler](https://github.com/pytorch/serve/tree/master/model-archiver#arguments)
 ```
 torch-model-archiver --model-name mnist_handler --version 1.0 --serialized-file mnist_script.pt --handler libmnist_handler:MnistHandler --runtime LSP
 ```
-Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/torchscript_model/mnist/mnist_handler) of unzipped model mar file.
+Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/examples/mnist/mnist_handler) of unzipped model mar file.
 ##### BabyLLama Example
 The babyllama example can be found [here](https://github.com/pytorch/serve/blob/master/cpp/src/examples/babyllama/).
 To run the example we need to download the weights as well as tokenizer files:
 ```bash
 wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
 wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
 ```
-Subsequently, we need to adjust the paths according to our local file structure in [config.json](https://github.com/pytorch/serve/blob/master/serve/cpp/test/resources/torchscript_model/babyllama/babyllama_handler/config.json).
+Subsequently, we need to adjust the paths according to our local file structure in [config.json](https://github.com/pytorch/serve/blob/master/serve/cpp/test/resources/examples/babyllama/babyllama_handler/config.json).
 ```bash
 {
 "checkpoint_path" : "/home/ubuntu/serve/cpp/stories15M.bin",
@@ -74,7 +74,7 @@ Subsequently, we need to adjust the paths according to our local file structure
 ```
 Then we can create the mar file and deploy it with:
 ```bash
-cd serve/cpp/test/resources/torchscript_model/babyllama/babyllama_handler
+cd serve/cpp/test/resources/examples/babyllama/babyllama_handler
 torch-model-archiver --model-name llm --version 1.0 --handler libbabyllama_handler:BabyLlamaHandler --runtime LSP --extra-files config.json
 mkdir model_store && mv llm.mar model_store/
 torchserve --ncs --start --model-store model_store
@@ -85,7 +85,7 @@ The handler name `libbabyllama_handler:BabyLlamaHandler` consists of our shared
 
 To test the model we can run:
 ```bash
-cd serve/cpp/test/resources/torchscript_model/babyllama/
+cd serve/cpp/test/resources/examples/babyllama/
 curl http://localhost:8080/predictions/llm -T prompt.txt
 ```
 ##### Mnist example
 
@@ -136,6 +136,14 @@ function install_yaml_cpp() {
   cd "$BWD" || exit
 }
 
+function build_llama_cpp() {
+  BWD=$(pwd)
+  LLAMA_CPP_SRC_DIR=$BASE_DIR/third-party/llama.cpp
+  cd "${LLAMA_CPP_SRC_DIR}"
+  make
+  cd "$BWD" || exit
+}
+
 function build() {
   MAYBE_BUILD_QUIC=""
   if [ "$WITH_QUIC" == true ] ; then
@@ -206,16 +214,6 @@ function build() {
   echo -e "${COLOR_GREEN}torchserve_cpp build is complete. To run unit test: \
   ./_build/test/torchserve_cpp_test ${COLOR_OFF}"
 
-  if [ -f "$DEPS_DIR/../src/examples/libmnist_handler.dylib" ]; then
-    mv $DEPS_DIR/../src/examples/libmnist_handler.dylib $DEPS_DIR/../../test/resources/torchscript_model/mnist/mnist_handler/libmnist_handler.dylib
-  elif [ -f "$DEPS_DIR/../src/examples/libmnist_handler.so" ]; then
-    mv $DEPS_DIR/../src/examples/libmnist_handler.so $DEPS_DIR/../../test/resources/torchscript_model/mnist/mnist_handler/libmnist_handler.so
-  fi
-
-  if [ -f "$DEPS_DIR/../src/examples/libbabyllama_handler.so" ]; then
-    mv $DEPS_DIR/../src/examples/libbabyllama_handler.so $DEPS_DIR/../../test/resources/torchscript_model/babyllama/babyllama_handler/libbabyllama_handler.so
-  fi
-
   cd $DEPS_DIR/../..
   if [ -f "$DEPS_DIR/../test/torchserve_cpp_test" ]; then
     $DEPS_DIR/../test/torchserve_cpp_test
@@ -311,10 +309,13 @@ mkdir -p "$LIBS_DIR"
 # Must execute from the directory containing this script
 cd $BASE_DIR
 
+git submodule update --init --recursive
+
 install_folly
 install_kineto
 install_libtorch
 install_yaml_cpp
+build_llama_cpp
 build
 symlink_torch_libs
 symlink_yaml_cpp_lib
 
@@ -1,16 +1,6 @@
-set(MNIST_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/image_classifier/mnist")
 
-set(MNIST_SOURCE_FILES "")
-list(APPEND MNIST_SOURCE_FILES ${MNIST_SRC_DIR}/mnist_handler.cc)
-add_library(mnist_handler SHARED ${MNIST_SOURCE_FILES})
-target_include_directories(mnist_handler PUBLIC ${MNIST_SRC_DIR})
-target_link_libraries(mnist_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
+add_subdirectory("../../../examples/cpp/babyllama/" "../../../test/resources/examples/babyllama/babyllama_handler/")
 
+add_subdirectory("../../../examples/cpp/llamacpp/" "../../../test/resources/examples/llamacpp/llamacpp_handler/")
 
-set(BABYLLAMA_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/babyllama")
-set(BABYLLAMA_SOURCE_FILES "")
-list(APPEND BABYLLAMA_SOURCE_FILES ${BABYLLAMA_SRC_DIR}/baby_llama_handler.cc)
-add_library(babyllama_handler SHARED ${BABYLLAMA_SOURCE_FILES})
-target_include_directories(babyllama_handler PUBLIC ${BABYLLAMA_SRC_DIR})
-target_link_libraries(babyllama_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
-target_compile_options(babyllama_handler PRIVATE -Wall -Wextra -Ofast)
+add_subdirectory("../../../examples/cpp/mnist/" "../../../test/resources/examples/mnist/mnist_handler/")
@@ -24,7 +24,7 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
       // model_name length
       .WillOnce(::testing::Return(5))
       // model_path length
-      .WillOnce(::testing::Return(51))
+      .WillOnce(::testing::Return(42))
       // batch_size
       .WillOnce(::testing::Return(1))
       // handler length
@@ -44,9 +44,8 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
         strncpy(data, "mnist", length);
       }))
       .WillOnce(testing::Invoke([=](size_t length, char* data) {
-        ASSERT_EQ(length, 51);
-        strncpy(data, "test/resources/torchscript_model/mnist/base_handler",
-                length);
+        ASSERT_EQ(length, 42);
+        strncpy(data, "test/resources/examples/mnist/base_handler", length);
       }))
       .WillOnce(testing::Invoke([=](size_t length, char* data) {
         ASSERT_EQ(length, 11);
@@ -60,7 +59,7 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
   EXPECT_CALL(*client_socket, SendAll(testing::_, testing::_)).Times(1);
   auto load_model_request = OTFMessage::RetrieveLoadMsg(*client_socket);
   ASSERT_EQ(load_model_request->model_dir,
-            "test/resources/torchscript_model/mnist/base_handler");
+            "test/resources/examples/mnist/base_handler");
   ASSERT_EQ(load_model_request->model_name, "mnist");
   ASSERT_EQ(load_model_request->envelope, "");
   ASSERT_EQ(load_model_request->model_name, "mnist");
@@ -71,7 +70,7 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
   auto backend = std::make_shared<torchserve::Backend>();
   MetricsRegistry::Initialize("test/resources/metrics/default_config.yaml",
                               MetricsContext::BACKEND);
-  backend->Initialize("test/resources/torchscript_model/mnist/base_handler");
+  backend->Initialize("test/resources/examples/mnist/base_handler");
 
   // load the model
   auto load_model_response = backend->LoadModel(load_model_request);
@@ -126,7 +125,7 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
       .WillOnce(testing::Invoke([=](size_t length, char* data) {
         ASSERT_EQ(length, 3883);
         // strncpy(data, "valu", length);
-        std::ifstream input("test/resources/torchscript_model/mnist/0_png.pt",
+        std::ifstream input("test/resources/examples/mnist/0_png.pt",
                             std::ios::in | std::ios::binary);
         std::vector<char> image((std::istreambuf_iterator<char>(input)),
                                 (std::istreambuf_iterator<char>()));
 
@@ -1,10 +1,38 @@
+#include <fstream>
+
 #include "test/utils/common.hh"
 
 TEST_F(ModelPredictTest, TestLoadPredictBabyLlamaHandler) {
+  std::string base_dir = "test/resources/examples/babyllama/";
+  std::string file1 = base_dir + "babyllama_handler/stories15M.bin";
+  std::string file2 = base_dir + "babyllama_handler/tokenizer.bin";
+
+  std::ifstream f1(file1);
+  std::ifstream f2(file2);
+
+  if (!f1.good() && !f2.good())
+    GTEST_SKIP()
+        << "Skipping TestLoadPredictBabyLlamaHandler because of missing files: "
+        << file1 << " or " << file2;
+
+  this->LoadPredict(
+      std::make_shared<torchserve::LoadModelRequest>(
+          base_dir + "babyllama_handler", "llm", -1, "", "", 1, false),
+      base_dir + "babyllama_handler", base_dir + "prompt.txt", "llm_ts", 200);
+}
+
+TEST_F(ModelPredictTest, TestLoadPredictLlmHandler) {
+  std::string base_dir = "test/resources/examples/llamacpp/";
+  std::string file1 = base_dir + "llamacpp_handler/llama-2-7b-chat.Q5_0.gguf";
+  std::ifstream f(file1);
+
+  if (!f.good())
+    GTEST_SKIP()
+        << "Skipping TestLoadPredictLlmHandler because of missing file: "
+        << file1;
+
   this->LoadPredict(
       std::make_shared<torchserve::LoadModelRequest>(
-          "test/resources/torchscript_model/babyllama/babyllama_handler", "llm",
-          -1, "", "", 1, false),
-      "test/resources/torchscript_model/babyllama/babyllama_handler",
-      "test/resources/torchscript_model/babyllama/prompt.txt", "llm_ts", 200);
+          base_dir + "llamacpp_handler", "llamacpp", -1, "", "", 1, false),
+      base_dir + "llamacpp_handler", base_dir + "prompt.txt", "llm_ts", 200);
 }
@@ -0,0 +1,4 @@
+{
+"checkpoint_path" : "test/resources/examples/babyllama/babyllama_handler/stories15M.bin",
+"tokenizer_path" : "test/resources/examples/babyllama/babyllama_handler/tokenizer.bin"
+}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +"checkpoint_path" : "test/resources/examples/babyllama/babyllama_handler/stories15M.bin",
 +"tokenizer_path" : "test/resources/examples/babyllama/babyllama_handler/tokenizer.bin"
 +}