Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "third_party/google/rpc"]
path = third_party/google/rpc
url = https://github.com/googleapis/googleapis.git
[submodule "cpp/third-party/llama.cpp"]
path = cpp/third-party/llama.cpp
url = https://github.com/ggerganov/llama.cpp.git
10 changes: 5 additions & 5 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,23 +49,23 @@ By default, TorchServe cpp provides a handler for TorchScript [src/backends/hand
```
torch-model-archiver --model-name mnist_base --version 1.0 --serialized-file mnist_script.pt --handler TorchScriptHandler --runtime LSP
```
Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/torchscript_model/mnist/base_handler) of unzipped model mar file.
Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/examples/mnist/base_handler) of unzipped model mar file.
##### Using Custom Handler
* build customized handler shared lib. For example [Mnist handler](https://github.com/pytorch/serve/blob/cpp_backend/cpp/src/examples/image_classifier/mnist).
* set runtime as "LSP" in model archiver option [--runtime](https://github.com/pytorch/serve/tree/master/model-archiver#arguments)
* set handler as "libmnist_handler:MnistHandler" in model archiver option [--handler](https://github.com/pytorch/serve/tree/master/model-archiver#arguments)
```
torch-model-archiver --model-name mnist_handler --version 1.0 --serialized-file mnist_script.pt --handler libmnist_handler:MnistHandler --runtime LSP
```
Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/torchscript_model/mnist/mnist_handler) of unzipped model mar file.
Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/examples/mnist/mnist_handler) of unzipped model mar file.
##### BabyLLama Example
The babyllama example can be found [here](https://github.com/pytorch/serve/blob/master/cpp/src/examples/babyllama/).
To run the example we need to download the weights as well as tokenizer files:
```bash
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
```
Subsequently, we need to adjust the paths according to our local file structure in [config.json](https://github.com/pytorch/serve/blob/master/serve/cpp/test/resources/torchscript_model/babyllama/babyllama_handler/config.json).
Subsequently, we need to adjust the paths according to our local file structure in [config.json](https://github.com/pytorch/serve/blob/master/serve/cpp/test/resources/examples/babyllama/babyllama_handler/config.json).
```bash
{
"checkpoint_path" : "/home/ubuntu/serve/cpp/stories15M.bin",
Expand All @@ -74,7 +74,7 @@ Subsequently, we need to adjust the paths according to our local file structure
```
Then we can create the mar file and deploy it with:
```bash
cd serve/cpp/test/resources/torchscript_model/babyllama/babyllama_handler
cd serve/cpp/test/resources/examples/babyllama/babyllama_handler
torch-model-archiver --model-name llm --version 1.0 --handler libbabyllama_handler:BabyLlamaHandler --runtime LSP --extra-files config.json
mkdir model_store && mv llm.mar model_store/
torchserve --ncs --start --model-store model_store
Expand All @@ -85,7 +85,7 @@ The handler name `libbabyllama_handler:BabyLlamaHandler` consists of our shared

To test the model we can run:
```bash
cd serve/cpp/test/resources/torchscript_model/babyllama/
cd serve/cpp/test/resources/examples/babyllama/
curl http://localhost:8080/predictions/llm -T prompt.txt
```
##### Mnist example
Expand Down
21 changes: 18 additions & 3 deletions cpp/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ function install_yaml_cpp() {
cd "$BWD" || exit
}

function build_llama_cpp() {
BWD=$(pwd)
LLAMA_CPP_SRC_DIR=$BASE_DIR/third-party/llama.cpp
cd "${LLAMA_CPP_SRC_DIR}"
make
cd "$BWD" || exit
}

function build() {
MAYBE_BUILD_QUIC=""
if [ "$WITH_QUIC" == true ] ; then
Expand Down Expand Up @@ -207,13 +215,17 @@ function build() {
./_build/test/torchserve_cpp_test ${COLOR_OFF}"

if [ -f "$DEPS_DIR/../src/examples/libmnist_handler.dylib" ]; then
mv $DEPS_DIR/../src/examples/libmnist_handler.dylib $DEPS_DIR/../../test/resources/torchscript_model/mnist/mnist_handler/libmnist_handler.dylib
mv $DEPS_DIR/../src/examples/libmnist_handler.dylib $DEPS_DIR/../../test/resources/examples/mnist/mnist_handler/libmnist_handler.dylib
elif [ -f "$DEPS_DIR/../src/examples/libmnist_handler.so" ]; then
mv $DEPS_DIR/../src/examples/libmnist_handler.so $DEPS_DIR/../../test/resources/torchscript_model/mnist/mnist_handler/libmnist_handler.so
mv $DEPS_DIR/../src/examples/libmnist_handler.so $DEPS_DIR/../../test/resources/examples/mnist/mnist_handler/libmnist_handler.so
fi

if [ -f "$DEPS_DIR/../src/examples/libbabyllama_handler.so" ]; then
mv $DEPS_DIR/../src/examples/libbabyllama_handler.so $DEPS_DIR/../../test/resources/torchscript_model/babyllama/babyllama_handler/libbabyllama_handler.so
mv $DEPS_DIR/../src/examples/libbabyllama_handler.so $DEPS_DIR/../../test/resources/examples/babyllama/babyllama_handler/libbabyllama_handler.so
fi

if [ -f "$DEPS_DIR/../src/examples/libllamacpp_handler.so" ]; then
mv $DEPS_DIR/../src/examples/libllamacpp_handler.so $DEPS_DIR/../../test/resources/examples/llamacpp/llamacpp_handler/libllamacpp_handler.so
fi

cd $DEPS_DIR/../..
Expand Down Expand Up @@ -311,10 +323,13 @@ mkdir -p "$LIBS_DIR"
# Must execute from the directory containing this script
cd $BASE_DIR

git submodule update --init --recursive

install_folly
install_kineto
install_libtorch
install_yaml_cpp
build_llama_cpp
build
symlink_torch_libs
symlink_yaml_cpp_lib
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,28 @@ add_library(babyllama_handler SHARED ${BABYLLAMA_SOURCE_FILES})
target_include_directories(babyllama_handler PUBLIC ${BABYLLAMA_SRC_DIR})
target_link_libraries(babyllama_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
target_compile_options(babyllama_handler PRIVATE -Wall -Wextra -Ofast)

set(LLM_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/llamacpp")
set(LLAMACPP_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/third-party/llama.cpp")
set(LLM_SOURCE_FILES "")
list(APPEND LLM_SOURCE_FILES ${LLM_SRC_DIR}/llamacpp_handler.cc)
add_library(llamacpp_handler SHARED ${LLM_SOURCE_FILES})
target_include_directories(llamacpp_handler PUBLIC ${LLM_SRC_DIR})
target_include_directories(llamacpp_handler PUBLIC ${LLAMACPP_SRC_DIR})
target_link_libraries(llamacpp_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})


set(MY_OBJECT_FILES
${LLAMACPP_SRC_DIR}/ggml.o
${LLAMACPP_SRC_DIR}/llama.o
${LLAMACPP_SRC_DIR}/common.o
${LLAMACPP_SRC_DIR}/ggml-quants.o
${LLAMACPP_SRC_DIR}/ggml-alloc.o
${LLAMACPP_SRC_DIR}/grammar-parser.o
${LLAMACPP_SRC_DIR}/console.o
${LLAMACPP_SRC_DIR}/build-info.o
${LLAMACPP_SRC_DIR}/ggml-backend.o

)

target_sources(llamacpp_handler PRIVATE ${MY_OBJECT_FILES})
1 change: 0 additions & 1 deletion cpp/src/examples/babyllama/baby_llama_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ c10::IValue BabyLlamaHandler::Inference(
} catch (...) {
TS_LOG(ERROR, "Failed to run inference on this batch");
}
std::cout << "WOOT?" << std::endl;
return batch_output_vector;
}

Expand Down
Loading