Adapt to removal of TS backend

mreso · mreso · commit b1a45f051070 · 2024-01-25T00:13:05.000Z
diff --git a/cpp/src/examples/CMakeLists.txt b/cpp/src/examples/CMakeLists.txt
@@ -14,7 +14,6 @@ add_library(babyllama_handler SHARED ${BABYLLAMA_SOURCE_FILES})
 target_include_directories(babyllama_handler PUBLIC ${BABYLLAMA_SRC_DIR})
 target_link_libraries(babyllama_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
 target_compile_options(babyllama_handler PRIVATE -Wall -Wextra -Ofast)
-target_link_libraries(mnist_handler PRIVATE ts_backends_torch_scripted ts_utils ${TORCH_LIBRARIES})
 
 set(LLM_SRC_DIR "${torchserve_cpp_SOURCE_DIR}/src/examples/llamacpp")
 set(LLAMACPP_SRC_DIR "/home/ubuntu/llama.cpp")
@@ -23,7 +22,7 @@ list(APPEND LLM_SOURCE_FILES ${LLM_SRC_DIR}/llamacpp_handler.cc)
 add_library(llamacpp_handler SHARED ${LLM_SOURCE_FILES})
 target_include_directories(llamacpp_handler PUBLIC ${LLM_SRC_DIR})
 target_include_directories(llamacpp_handler PUBLIC ${LLAMACPP_SRC_DIR})
-target_link_libraries(llamacpp_handler PRIVATE ts_backends_torch_scripted ts_utils ${TORCH_LIBRARIES})
+target_link_libraries(llamacpp_handler PRIVATE ts_backends_core ts_utils ${TORCH_LIBRARIES})
 
 
 set(MY_OBJECT_FILES
diff --git a/cpp/src/examples/llamacpp/llamacpp_handler.cc b/cpp/src/examples/llamacpp/llamacpp_handler.cc
@@ -17,8 +17,7 @@ void LlamacppHandler::initialize_context() {
   }
 }
 
-std::pair<std::shared_ptr<torch::jit::script::Module>,
-          std::shared_ptr<torch::Device>>
+std::pair<std::shared_ptr<void>, std::shared_ptr<torch::Device>>
 LlamacppHandler::LoadModel(
     std::shared_ptr<torchserve::LoadModelRequest>& load_model_request) {
   try {
@@ -156,8 +155,7 @@ std::vector<torch::jit::IValue> LlamacppHandler::Preprocess(
 }
 
 torch::Tensor LlamacppHandler::Inference(
-    std::shared_ptr<torch::jit::script::Module> model,
-    std::vector<torch::jit::IValue>& inputs,
+    std::shared_ptr<void> model, std::vector<torch::jit::IValue>& inputs,
     std::shared_ptr<torch::Device>& device,
     std::pair<std::string&, std::map<uint8_t, std::string>&>& idx_to_req_id,
     std::shared_ptr<torchserve::InferenceResponseBatch>& response_batch) {
@@ -290,11 +288,11 @@ LlamacppHandler::~LlamacppHandler() noexcept {
 
 #if defined(__linux__) || defined(__APPLE__)
 extern "C" {
-torchserve::torchscripted::BaseHandler* allocatorLlamacppHandler() {
+torchserve::BaseHandler* allocatorLlamacppHandler() {
   return new llm::LlamacppHandler();
 }
 
-void deleterLlamacppHandler(torchserve::torchscripted::BaseHandler* p) {
+void deleterLlamacppHandler(torchserve::BaseHandler* p) {
   if (p != nullptr) {
     delete static_cast<llm::LlamacppHandler*>(p);
   }
diff --git a/cpp/src/examples/llamacpp/llamacpp_handler.hh b/cpp/src/examples/llamacpp/llamacpp_handler.hh
@@ -7,10 +7,10 @@
 #include "common/common.h"
 #include "ggml.h"
 #include "llama.h"
-#include "src/backends/torch_scripted/handler/base_handler.hh"
+#include "src/backends/handler/base_handler.hh"
 
 namespace llm {
-class LlamacppHandler : public torchserve::torchscripted::BaseHandler {
+class LlamacppHandler : public torchserve::BaseHandler {
  private:
   gpt_params params;
   llama_model_params model_params;
@@ -27,8 +27,7 @@ class LlamacppHandler : public torchserve::torchscripted::BaseHandler {
 
   void initialize_context();
 
-  virtual std::pair<std::shared_ptr<torch::jit::script::Module>,
-                    std::shared_ptr<torch::Device>>
+  virtual std::pair<std::shared_ptr<void>, std::shared_ptr<torch::Device>>
   LoadModel(std::shared_ptr<torchserve::LoadModelRequest>& load_model_request);
 
   std::vector<torch::jit::IValue> Preprocess(
@@ -39,8 +38,7 @@ class LlamacppHandler : public torchserve::torchscripted::BaseHandler {
       override;
 
   torch::Tensor Inference(
-      std::shared_ptr<torch::jit::script::Module> model,
-      std::vector<torch::jit::IValue>& inputs,
+      std::shared_ptr<void> model, std::vector<torch::jit::IValue>& inputs,
       std::shared_ptr<torch::Device>& device,
       std::pair<std::string&, std::map<uint8_t, std::string>&>& idx_to_req_id,
       std::shared_ptr<torchserve::InferenceResponseBatch>& response_batch)

Original file line number	Diff line number	Diff line change
`@@ -17,8 +17,7 @@ void LlamacppHandler::initialize_context() {`
`17`	`17`	`}`
`18`	`18`	`}`
`19`	`19`
`20`		`-std::pair<std::shared_ptr<torch::jit::script::Module>,`
`21`		`- std::shared_ptr<torch::Device>>`
	`20`	`+std::pair<std::shared_ptr<void>, std::shared_ptr<torch::Device>>`
`22`	`21`	`LlamacppHandler::LoadModel(`
`23`	`22`	`std::shared_ptr<torchserve::LoadModelRequest>& load_model_request) {`
`24`	`23`	`try {`
`@@ -156,8 +155,7 @@ std::vector<torch::jit::IValue> LlamacppHandler::Preprocess(`
`156`	`155`	`}`
`157`	`156`
`158`	`157`	`torch::Tensor LlamacppHandler::Inference(`
`159`		`- std::shared_ptr<torch::jit::script::Module> model,`
`160`		`- std::vector<torch::jit::IValue>& inputs,`
	`158`	`+ std::shared_ptr<void> model, std::vector<torch::jit::IValue>& inputs,`
`161`	`159`	`std::shared_ptr<torch::Device>& device,`
`162`	`160`	`std::pair<std::string&, std::map<uint8_t, std::string>&>& idx_to_req_id,`
`163`	`161`	`std::shared_ptr<torchserve::InferenceResponseBatch>& response_batch) {`
`@@ -290,11 +288,11 @@ LlamacppHandler::~LlamacppHandler() noexcept {`
`290`	`288`
`291`	`289`	`#if defined(__linux__) \|\| defined(__APPLE__)`
`292`	`290`	`extern "C" {`
`293`		`-torchserve::torchscripted::BaseHandler* allocatorLlamacppHandler() {`
	`291`	`+torchserve::BaseHandler* allocatorLlamacppHandler() {`
`294`	`292`	`return new llm::LlamacppHandler();`
`295`	`293`	`}`
`296`	`294`
`297`		`-void deleterLlamacppHandler(torchserve::torchscripted::BaseHandler* p) {`
	`295`	`+void deleterLlamacppHandler(torchserve::BaseHandler* p) {`
`298`	`296`	`if (p != nullptr) {`
`299`	`297`	`delete static_cast<llm::LlamacppHandler*>(p);`
`300`	`298`	`}`