pytorch · mreso · Jan 26, 2024 · Aug 16, 2023 · Jan 24, 2024 · Jan 25, 2024
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "third_party/google/rpc"]
 	path = third_party/google/rpc
 	url = https://github.com/googleapis/googleapis.git
+[submodule "cpp/third-party/llama.cpp"]
+	path = cpp/third-party/llama.cpp
+	url = https://github.com/ggerganov/llama.cpp.git
diff --git a/cpp/README.md b/cpp/README.md
@@ -49,23 +49,23 @@ By default, TorchServe cpp provides a handler for TorchScript [src/backends/hand
 ```
  torch-model-archiver --model-name mnist_base --version 1.0 --serialized-file mnist_script.pt --handler TorchScriptHandler --runtime LSP
 ```
-Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/torchscript_model/mnist/base_handler) of unzipped model mar file.
+Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/examples/mnist/base_handler) of unzipped model mar file.
 ##### Using Custom Handler
 * build customized handler shared lib. For example [Mnist handler](https://github.com/pytorch/serve/blob/cpp_backend/cpp/src/examples/image_classifier/mnist).
 * set runtime as "LSP" in model archiver option [--runtime](https://github.com/pytorch/serve/tree/master/model-archiver#arguments)
 * set handler as "libmnist_handler:MnistHandler" in model archiver option [--handler](https://github.com/pytorch/serve/tree/master/model-archiver#arguments)
 ```
 torch-model-archiver --model-name mnist_handler --version 1.0 --serialized-file mnist_script.pt --handler libmnist_handler:MnistHandler --runtime LSP
 ```
-Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/torchscript_model/mnist/mnist_handler) of unzipped model mar file.
+Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/examples/mnist/mnist_handler) of unzipped model mar file.
 ##### BabyLLama Example
 The babyllama example can be found [here](https://github.com/pytorch/serve/blob/master/cpp/src/examples/babyllama/).
 To run the example we need to download the weights as well as tokenizer files:
 ```bash
 wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
 wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
 ```
-Subsequently, we need to adjust the paths according to our local file structure in [config.json](https://github.com/pytorch/serve/blob/master/serve/cpp/test/resources/torchscript_model/babyllama/babyllama_handler/config.json).
+Subsequently, we need to adjust the paths according to our local file structure in [config.json](https://github.com/pytorch/serve/blob/master/serve/cpp/test/resources/examples/babyllama/babyllama_handler/config.json).
 ```bash
 {
 "checkpoint_path" : "/home/ubuntu/serve/cpp/stories15M.bin",
@@ -74,7 +74,7 @@ Subsequently, we need to adjust the paths according to our local file structure
 ```
 Then we can create the mar file and deploy it with:
 ```bash
-cd serve/cpp/test/resources/torchscript_model/babyllama/babyllama_handler
+cd serve/cpp/test/resources/examples/babyllama/babyllama_handler
 torch-model-archiver --model-name llm --version 1.0 --handler libbabyllama_handler:BabyLlamaHandler --runtime LSP --extra-files config.json
 mkdir model_store && mv llm.mar model_store/
 torchserve --ncs --start --model-store model_store
@@ -85,7 +85,7 @@ The handler name `libbabyllama_handler:BabyLlamaHandler` consists of our shared
 
 To test the model we can run:
 ```bash
-cd serve/cpp/test/resources/torchscript_model/babyllama/
+cd serve/cpp/test/resources/examples/babyllama/
 curl http://localhost:8080/predictions/llm -T prompt.txt
 ```
 ##### Mnist example

diff --git a/cpp/build.sh b/cpp/build.sh
@@ -136,6 +136,14 @@ function install_yaml_cpp() {
   cd "$BWD" || exit
 }
 
+function build_llama_cpp() {
+  BWD=$(pwd)
+  LLAMA_CPP_SRC_DIR=$BASE_DIR/third-party/llama.cpp
+  cd "${LLAMA_CPP_SRC_DIR}"
+  make
+  cd "$BWD" || exit
+}
+
 function build() {
   MAYBE_BUILD_QUIC=""
   if [ "$WITH_QUIC" == true ] ; then
@@ -207,13 +215,17 @@ function build() {
   ./_build/test/torchserve_cpp_test ${COLOR_OFF}"
 
   if [ -f "$DEPS_DIR/../src/examples/libmnist_handler.dylib" ]; then
-    mv $DEPS_DIR/../src/examples/libmnist_handler.dylib $DEPS_DIR/../../test/resources/torchscript_model/mnist/mnist_handler/libmnist_handler.dylib
+    mv $DEPS_DIR/../src/examples/libmnist_handler.dylib $DEPS_DIR/../../test/resources/examples/mnist/mnist_handler/libmnist_handler.dylib
   elif [ -f "$DEPS_DIR/../src/examples/libmnist_handler.so" ]; then
-    mv $DEPS_DIR/../src/examples/libmnist_handler.so $DEPS_DIR/../../test/resources/torchscript_model/mnist/mnist_handler/libmnist_handler.so
+    mv $DEPS_DIR/../src/examples/libmnist_handler.so $DEPS_DIR/../../test/resources/examples/mnist/mnist_handler/libmnist_handler.so
   fi
 
   if [ -f "$DEPS_DIR/../src/examples/libbabyllama_handler.so" ]; then
-    mv $DEPS_DIR/../src/examples/libbabyllama_handler.so $DEPS_DIR/../../test/resources/torchscript_model/babyllama/babyllama_handler/libbabyllama_handler.so
+    mv $DEPS_DIR/../src/examples/libbabyllama_handler.so $DEPS_DIR/../../test/resources/examples/babyllama/babyllama_handler/libbabyllama_handler.so
+  fi
+
+  if [ -f "$DEPS_DIR/../src/examples/libllamacpp_handler.so" ]; then
+    mv $DEPS_DIR/../src/examples/libllamacpp_handler.so $DEPS_DIR/../../test/resources/examples/llamacpp/llamacpp_handler/libllamacpp_handler.so
   fi
 
   cd $DEPS_DIR/../..
@@ -311,10 +323,13 @@ mkdir -p "$LIBS_DIR"
 # Must execute from the directory containing this script
 cd $BASE_DIR
 
+git submodule update --init --recursive
+
 install_folly
 install_kineto
 install_libtorch
 install_yaml_cpp
+build_llama_cpp
 build
 symlink_torch_libs
 symlink_yaml_cpp_lib

diff --git a/cpp/src/examples/CMakeLists.txt b/cpp/src/examples/CMakeLists.txt
@@ -14,3 +14,28 @@ add_library(babyllama_handler SHARED ${BABYLLAMA_SOURCE_FILES})
 target_include_directories(babyllama_handler PUBLIC ${BABYLLAMA_SRC_DIR})
 target_link_libraries(babyllama_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
 target_compile_options(babyllama_handler PRIVATE -Wall -Wextra -Ofast)
+
+set(LLM_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/llamacpp")
+set(LLAMACPP_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/third-party/llama.cpp")
+set(LLM_SOURCE_FILES "")
+list(APPEND LLM_SOURCE_FILES ${LLM_SRC_DIR}/llamacpp_handler.cc)
+add_library(llamacpp_handler SHARED ${LLM_SOURCE_FILES})
+target_include_directories(llamacpp_handler PUBLIC ${LLM_SRC_DIR})
+target_include_directories(llamacpp_handler PUBLIC ${LLAMACPP_SRC_DIR})
+target_link_libraries(llamacpp_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
+
+
+set(MY_OBJECT_FILES
+    ${LLAMACPP_SRC_DIR}/ggml.o
+    ${LLAMACPP_SRC_DIR}/llama.o
+    ${LLAMACPP_SRC_DIR}/common.o
+    ${LLAMACPP_SRC_DIR}/ggml-quants.o
+    ${LLAMACPP_SRC_DIR}/ggml-alloc.o
+    ${LLAMACPP_SRC_DIR}/grammar-parser.o
+    ${LLAMACPP_SRC_DIR}/console.o
+    ${LLAMACPP_SRC_DIR}/build-info.o
+    ${LLAMACPP_SRC_DIR}/ggml-backend.o
+
+)
+
+target_sources(llamacpp_handler PRIVATE ${MY_OBJECT_FILES})
diff --git a/cpp/src/examples/babyllama/baby_llama_handler.cc b/cpp/src/examples/babyllama/baby_llama_handler.cc
@@ -233,7 +233,6 @@ c10::IValue BabyLlamaHandler::Inference(
   } catch (...) {
     TS_LOG(ERROR, "Failed to run inference on this batch");
   }
-  std::cout << "WOOT?" << std::endl;
   return batch_output_vector;
 }