Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "third_party/google/rpc"]
path = third_party/google/rpc
url = https://github.com/googleapis/googleapis.git
[submodule "cpp/third-party/llama.cpp"]
path = cpp/third-party/llama.cpp
url = https://github.com/ggerganov/llama.cpp.git
10 changes: 5 additions & 5 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,23 +49,23 @@ By default, TorchServe cpp provides a handler for TorchScript [src/backends/hand
```
torch-model-archiver --model-name mnist_base --version 1.0 --serialized-file mnist_script.pt --handler TorchScriptHandler --runtime LSP
```
Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/torchscript_model/mnist/base_handler) of unzipped model mar file.
Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/examples/mnist/base_handler) of unzipped model mar file.
##### Using Custom Handler
* build customized handler shared lib. For example [Mnist handler](https://github.com/pytorch/serve/blob/cpp_backend/cpp/src/examples/image_classifier/mnist).
* set runtime as "LSP" in model archiver option [--runtime](https://github.com/pytorch/serve/tree/master/model-archiver#arguments)
* set handler as "libmnist_handler:MnistHandler" in model archiver option [--handler](https://github.com/pytorch/serve/tree/master/model-archiver#arguments)
```
torch-model-archiver --model-name mnist_handler --version 1.0 --serialized-file mnist_script.pt --handler libmnist_handler:MnistHandler --runtime LSP
```
Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/torchscript_model/mnist/mnist_handler) of unzipped model mar file.
Here is an [example](https://github.com/pytorch/serve/tree/cpp_backend/cpp/test/resources/examples/mnist/mnist_handler) of unzipped model mar file.
##### BabyLLama Example
The babyllama example can be found [here](https://github.com/pytorch/serve/blob/master/cpp/src/examples/babyllama/).
To run the example we need to download the weights as well as tokenizer files:
```bash
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
```
Subsequently, we need to adjust the paths according to our local file structure in [config.json](https://github.com/pytorch/serve/blob/master/serve/cpp/test/resources/torchscript_model/babyllama/babyllama_handler/config.json).
Subsequently, we need to adjust the paths according to our local file structure in [config.json](https://github.com/pytorch/serve/blob/master/serve/cpp/test/resources/examples/babyllama/babyllama_handler/config.json).
```bash
{
"checkpoint_path" : "/home/ubuntu/serve/cpp/stories15M.bin",
Expand All @@ -74,7 +74,7 @@ Subsequently, we need to adjust the paths according to our local file structure
```
Then we can create the mar file and deploy it with:
```bash
cd serve/cpp/test/resources/torchscript_model/babyllama/babyllama_handler
cd serve/cpp/test/resources/examples/babyllama/babyllama_handler
torch-model-archiver --model-name llm --version 1.0 --handler libbabyllama_handler:BabyLlamaHandler --runtime LSP --extra-files config.json
mkdir model_store && mv llm.mar model_store/
torchserve --ncs --start --model-store model_store
Expand All @@ -85,7 +85,7 @@ The handler name `libbabyllama_handler:BabyLlamaHandler` consists of our shared

To test the model we can run:
```bash
cd serve/cpp/test/resources/torchscript_model/babyllama/
cd serve/cpp/test/resources/examples/babyllama/
curl http://localhost:8080/predictions/llm -T prompt.txt
```
##### Mnist example
Expand Down
21 changes: 11 additions & 10 deletions cpp/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ function install_yaml_cpp() {
cd "$BWD" || exit
}

function build_llama_cpp() {
BWD=$(pwd)
LLAMA_CPP_SRC_DIR=$BASE_DIR/third-party/llama.cpp
cd "${LLAMA_CPP_SRC_DIR}"
make
cd "$BWD" || exit
}

function build() {
MAYBE_BUILD_QUIC=""
if [ "$WITH_QUIC" == true ] ; then
Expand Down Expand Up @@ -206,16 +214,6 @@ function build() {
echo -e "${COLOR_GREEN}torchserve_cpp build is complete. To run unit test: \
./_build/test/torchserve_cpp_test ${COLOR_OFF}"

if [ -f "$DEPS_DIR/../src/examples/libmnist_handler.dylib" ]; then
mv $DEPS_DIR/../src/examples/libmnist_handler.dylib $DEPS_DIR/../../test/resources/torchscript_model/mnist/mnist_handler/libmnist_handler.dylib
elif [ -f "$DEPS_DIR/../src/examples/libmnist_handler.so" ]; then
mv $DEPS_DIR/../src/examples/libmnist_handler.so $DEPS_DIR/../../test/resources/torchscript_model/mnist/mnist_handler/libmnist_handler.so
fi

if [ -f "$DEPS_DIR/../src/examples/libbabyllama_handler.so" ]; then
mv $DEPS_DIR/../src/examples/libbabyllama_handler.so $DEPS_DIR/../../test/resources/torchscript_model/babyllama/babyllama_handler/libbabyllama_handler.so
fi

cd $DEPS_DIR/../..
if [ -f "$DEPS_DIR/../test/torchserve_cpp_test" ]; then
$DEPS_DIR/../test/torchserve_cpp_test
Expand Down Expand Up @@ -311,10 +309,13 @@ mkdir -p "$LIBS_DIR"
# Must execute from the directory containing this script
cd $BASE_DIR

git submodule update --init --recursive

install_folly
install_kineto
install_libtorch
install_yaml_cpp
build_llama_cpp
build
symlink_torch_libs
symlink_yaml_cpp_lib
Expand Down
16 changes: 3 additions & 13 deletions cpp/src/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
set(MNIST_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/image_classifier/mnist")

set(MNIST_SOURCE_FILES "")
list(APPEND MNIST_SOURCE_FILES ${MNIST_SRC_DIR}/mnist_handler.cc)
add_library(mnist_handler SHARED ${MNIST_SOURCE_FILES})
target_include_directories(mnist_handler PUBLIC ${MNIST_SRC_DIR})
target_link_libraries(mnist_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
add_subdirectory("../../../examples/cpp/babyllama/" "../../../test/resources/examples/babyllama/babyllama_handler/")

add_subdirectory("../../../examples/cpp/llamacpp/" "../../../test/resources/examples/llamacpp/llamacpp_handler/")

set(BABYLLAMA_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/babyllama")
set(BABYLLAMA_SOURCE_FILES "")
list(APPEND BABYLLAMA_SOURCE_FILES ${BABYLLAMA_SRC_DIR}/baby_llama_handler.cc)
add_library(babyllama_handler SHARED ${BABYLLAMA_SOURCE_FILES})
target_include_directories(babyllama_handler PUBLIC ${BABYLLAMA_SRC_DIR})
target_link_libraries(babyllama_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
target_compile_options(babyllama_handler PRIVATE -Wall -Wextra -Ofast)
add_subdirectory("../../../examples/cpp/mnist/" "../../../test/resources/examples/mnist/mnist_handler/")
13 changes: 6 additions & 7 deletions cpp/test/backends/otf_protocol_and_handler_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
// model_name length
.WillOnce(::testing::Return(5))
// model_path length
.WillOnce(::testing::Return(51))
.WillOnce(::testing::Return(42))
// batch_size
.WillOnce(::testing::Return(1))
// handler length
Expand All @@ -44,9 +44,8 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
strncpy(data, "mnist", length);
}))
.WillOnce(testing::Invoke([=](size_t length, char* data) {
ASSERT_EQ(length, 51);
strncpy(data, "test/resources/torchscript_model/mnist/base_handler",
length);
ASSERT_EQ(length, 42);
strncpy(data, "test/resources/examples/mnist/base_handler", length);
}))
.WillOnce(testing::Invoke([=](size_t length, char* data) {
ASSERT_EQ(length, 11);
Expand All @@ -60,7 +59,7 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
EXPECT_CALL(*client_socket, SendAll(testing::_, testing::_)).Times(1);
auto load_model_request = OTFMessage::RetrieveLoadMsg(*client_socket);
ASSERT_EQ(load_model_request->model_dir,
"test/resources/torchscript_model/mnist/base_handler");
"test/resources/examples/mnist/base_handler");
ASSERT_EQ(load_model_request->model_name, "mnist");
ASSERT_EQ(load_model_request->envelope, "");
ASSERT_EQ(load_model_request->model_name, "mnist");
Expand All @@ -71,7 +70,7 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
auto backend = std::make_shared<torchserve::Backend>();
MetricsRegistry::Initialize("test/resources/metrics/default_config.yaml",
MetricsContext::BACKEND);
backend->Initialize("test/resources/torchscript_model/mnist/base_handler");
backend->Initialize("test/resources/examples/mnist/base_handler");

// load the model
auto load_model_response = backend->LoadModel(load_model_request);
Expand Down Expand Up @@ -126,7 +125,7 @@ TEST(BackendIntegTest, TestOTFProtocolAndHandler) {
.WillOnce(testing::Invoke([=](size_t length, char* data) {
ASSERT_EQ(length, 3883);
// strncpy(data, "valu", length);
std::ifstream input("test/resources/torchscript_model/mnist/0_png.pt",
std::ifstream input("test/resources/examples/mnist/0_png.pt",
std::ios::in | std::ios::binary);
std::vector<char> image((std::istreambuf_iterator<char>(input)),
(std::istreambuf_iterator<char>()));
Expand Down
36 changes: 32 additions & 4 deletions cpp/test/examples/examples_test.cc
Original file line number Diff line number Diff line change
@@ -1,10 +1,38 @@
#include <fstream>

#include "test/utils/common.hh"

TEST_F(ModelPredictTest, TestLoadPredictBabyLlamaHandler) {
std::string base_dir = "test/resources/examples/babyllama/";
std::string file1 = base_dir + "babyllama_handler/stories15M.bin";
std::string file2 = base_dir + "babyllama_handler/tokenizer.bin";

std::ifstream f1(file1);
std::ifstream f2(file2);

if (!f1.good() && !f2.good())
GTEST_SKIP()
<< "Skipping TestLoadPredictBabyLlamaHandler because of missing files: "
<< file1 << " or " << file2;

this->LoadPredict(
std::make_shared<torchserve::LoadModelRequest>(
base_dir + "babyllama_handler", "llm", -1, "", "", 1, false),
base_dir + "babyllama_handler", base_dir + "prompt.txt", "llm_ts", 200);
}

TEST_F(ModelPredictTest, TestLoadPredictLlmHandler) {
std::string base_dir = "test/resources/examples/llamacpp/";
std::string file1 = base_dir + "llamacpp_handler/llama-2-7b-chat.Q5_0.gguf";
std::ifstream f(file1);

if (!f.good())
GTEST_SKIP()
<< "Skipping TestLoadPredictLlmHandler because of missing file: "
<< file1;

this->LoadPredict(
std::make_shared<torchserve::LoadModelRequest>(
"test/resources/torchscript_model/babyllama/babyllama_handler", "llm",
-1, "", "", 1, false),
"test/resources/torchscript_model/babyllama/babyllama_handler",
"test/resources/torchscript_model/babyllama/prompt.txt", "llm_ts", 200);
base_dir + "llamacpp_handler", "llamacpp", -1, "", "", 1, false),
base_dir + "llamacpp_handler", base_dir + "prompt.txt", "llm_ts", 200);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"checkpoint_path" : "test/resources/examples/babyllama/babyllama_handler/stories15M.bin",
"tokenizer_path" : "test/resources/examples/babyllama/babyllama_handler/tokenizer.bin"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"createdOn": "28/07/2020 06:32:08",
"runtime": "LSP",
"model": {
"modelName": "llamacpp",
"handler": "libllamacpp_handler:LlamaCppHandler",
"modelVersion": "2.0"
},
"archiverVersion": "0.2.0"
}

This file was deleted.

33 changes: 15 additions & 18 deletions cpp/test/torch_scripted/torch_scripted_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,47 +9,44 @@

TEST_F(ModelPredictTest, TestLoadPredictBaseHandler) {
this->LoadPredict(std::make_shared<torchserve::LoadModelRequest>(
"test/resources/torchscript_model/mnist/mnist_handler",
"test/resources/examples/mnist/mnist_handler",
"mnist_scripted_v2", -1, "", "", 1, false),
"test/resources/torchscript_model/mnist/base_handler",
"test/resources/torchscript_model/mnist/0_png.pt",
"mnist_ts", 200);
"test/resources/examples/mnist/base_handler",
"test/resources/examples/mnist/0_png.pt", "mnist_ts", 200);
}

TEST_F(ModelPredictTest, TestLoadPredictMnistHandler) {
this->LoadPredict(std::make_shared<torchserve::LoadModelRequest>(
"test/resources/torchscript_model/mnist/mnist_handler",
"test/resources/examples/mnist/mnist_handler",
"mnist_scripted_v2", -1, "", "", 1, false),
"test/resources/torchscript_model/mnist/mnist_handler",
"test/resources/torchscript_model/mnist/0_png.pt",
"mnist_ts", 200);
"test/resources/examples/mnist/mnist_handler",
"test/resources/examples/mnist/0_png.pt", "mnist_ts", 200);
}

TEST_F(ModelPredictTest, TestBackendInitWrongModelDir) {
auto result = backend_->Initialize("test/resources/torchscript_model/mnist");
auto result = backend_->Initialize("test/resources/examples/mnist");
ASSERT_EQ(result, false);
}

TEST_F(ModelPredictTest, TestBackendInitWrongHandler) {
auto result = backend_->Initialize(
"test/resources/torchscript_model/mnist/wrong_handler");
auto result =
backend_->Initialize("test/resources/examples/mnist/wrong_handler");
ASSERT_EQ(result, false);
}

TEST_F(ModelPredictTest, TestLoadModelFailure) {
backend_->Initialize("test/resources/torchscript_model/mnist/wrong_model");
backend_->Initialize("test/resources/examples/mnist/wrong_model");
auto result =
backend_->LoadModel(std::make_shared<torchserve::LoadModelRequest>(
"test/resources/torchscript_model/mnist/wrong_model",
"mnist_scripted_v2", -1, "", "", 1, false));
"test/resources/examples/mnist/wrong_model", "mnist_scripted_v2", -1,
"", "", 1, false));
ASSERT_EQ(result->code, 500);
}

TEST_F(ModelPredictTest, TestLoadPredictMnistHandlerFailure) {
this->LoadPredict(std::make_shared<torchserve::LoadModelRequest>(
"test/resources/torchscript_model/mnist/mnist_handler",
"test/resources/examples/mnist/mnist_handler",
"mnist_scripted_v2", -1, "", "", 1, false),
"test/resources/torchscript_model/mnist/mnist_handler",
"test/resources/torchscript_model/mnist/0.png", "mnist_ts",
500);
"test/resources/examples/mnist/mnist_handler",
"test/resources/examples/mnist/0.png", "mnist_ts", 500);
}
2 changes: 1 addition & 1 deletion cpp/test/utils/model_archiver_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace torchserve {
TEST(ManifestTest, TestInitialize) {
torchserve::Manifest manifest;
manifest.Initialize(
"test/resources/torchscript_model/mnist/base_handler/MAR-INF/"
"test/resources/examples/mnist/base_handler/MAR-INF/"
"MANIFEST.json");
ASSERT_EQ(manifest.GetCreatOn(), "28/07/2020 06:32:08");
ASSERT_EQ(manifest.GetArchiverVersion(), "0.2.0");
Expand Down
1 change: 1 addition & 0 deletions cpp/third-party/llama.cpp
Submodule llama.cpp added at cd4fdd
5 changes: 5 additions & 0 deletions examples/cpp/babyllama/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

add_library(babyllama_handler SHARED src/baby_llama_handler.cc)

target_link_libraries(babyllama_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
target_compile_options(babyllama_handler PRIVATE -Wall -Wextra -Ofast)
4 changes: 4 additions & 0 deletions examples/cpp/babyllama/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"checkpoint_path" : "/home/ubuntu/serve/examples/cpp/babyllama/stories15M.bin",
"tokenizer_path" : "/home/ubuntu/serve/examples/cpp/babyllama/tokenizer.bin"
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#include "src/examples/babyllama/baby_llama_handler.hh"
#include "baby_llama_handler.hh"

#include <folly/FileUtil.h>
#include <folly/json.h>

#include <typeinfo>

#include "src/examples/babyllama/llama2.c/run.c"
#include "llama2.c/run.c"

namespace llm {

Expand Down Expand Up @@ -233,7 +233,6 @@ c10::IValue BabyLlamaHandler::Inference(
} catch (...) {
TS_LOG(ERROR, "Failed to run inference on this batch");
}
std::cout << "WOOT?" << std::endl;
return batch_output_vector;
}

Expand Down
20 changes: 20 additions & 0 deletions examples/cpp/llamacpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
set(LLAMACPP_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/third-party/llama.cpp")

add_library(llamacpp_handler SHARED src/llamacpp_handler.cc)

set(MY_OBJECT_FILES
${LLAMACPP_SRC_DIR}/ggml.o
${LLAMACPP_SRC_DIR}/llama.o
${LLAMACPP_SRC_DIR}/common.o
${LLAMACPP_SRC_DIR}/ggml-quants.o
${LLAMACPP_SRC_DIR}/ggml-alloc.o
${LLAMACPP_SRC_DIR}/grammar-parser.o
${LLAMACPP_SRC_DIR}/console.o
${LLAMACPP_SRC_DIR}/build-info.o
${LLAMACPP_SRC_DIR}/ggml-backend.o

)

target_sources(llamacpp_handler PRIVATE ${MY_OBJECT_FILES})
target_include_directories(llamacpp_handler PUBLIC ${LLAMACPP_SRC_DIR})
target_link_libraries(llamacpp_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
Loading