RedisAI · koleini · Feb 24, 2021
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -137,14 +137,25 @@ ENDIF()
 #----------------------------------------------------------------------------------------------
 
 IF(BUILD_TFLITE)
-    FIND_LIBRARY(TFLITE_LIBRARIES_1 NAMES tensorflow-lite
+    FIND_LIBRARY(TFLITE_LIBRARIES_1 NAMES tensorflowlite
             PATHS ${depsAbs}/libtensorflow-lite/lib)
-    FIND_LIBRARY(TFLITE_LIBRARIES_2 NAMES benchmark-lib.a
+    IF (${DEVICE} STREQUAL "gpu")
+        FIND_LIBRARY(TFLITE_LIBRARIES_2 NAMES tensorflowlite_gpu_delegate
             PATHS ${depsAbs}/libtensorflow-lite/lib)
-    SET(TFLITE_LIBRARIES ${TFLITE_LIBRARIES_1} ${TFLITE_LIBRARIES_2})
-    MESSAGE(STATUS "Found TensorFlow Lite Libraries: \"${TFLITE_LIBRARIES}\")")
+        IF (NOT APPLE)
+	    FIND_LIBRARY(OPENGL_LIBRARIES NAMES GL
+		PATHS /usr/lib/${MACH}-linux-gnu)
+	    FIND_LIBRARY(EGL_LIBRARIES NAMES EGL
+		PATHS /usr/lib/${MACH}-linux-gnu)
+        ELSE()
+		MESSAGE(FATAL_ERROR "Build for TensorFlow Lite GPU backend on Apple machines.")
+        ENDIF()
+    ENDIF()
+    SET(TFLITE_LIBRARIES ${TFLITE_LIBRARIES_1} ${TFLITE_LIBRARIES_2} ${OPENGL_LIBRARIES} ${EGL_LIBRARIES})
     IF (NOT TFLITE_LIBRARIES)
         MESSAGE(FATAL_ERROR "Could not find TensorFlow Lite")
+    ELSE()
+        MESSAGE(STATUS "Found TensorFlow Lite Libraries: \"${TFLITE_LIBRARIES}\")")
     ENDIF()
     IF (${DEVICE} STREQUAL "gpu")
         ADD_DEFINITIONS(-DRAI_TFLITE_USE_CUDA)
@@ -320,4 +331,4 @@ if(PACKAGE_UNIT_TESTS)
     enable_testing()
     include(GoogleTest)
     add_subdirectory(tests/unit)
-endif()
+endif()
diff --git a/opt/Makefile b/opt/Makefile
@@ -135,7 +135,8 @@ CMAKE_FLAGS += \
 	-DUSE_COVERAGE=$(USE_COVERAGE) \
 	-DUSE_PROFILE=$(USE_PROFILE) \
 	-DREDISAI_GIT_SHA=\"$(GIT_SHA)\" \
-	-DDEVICE=$(DEVICE)
+	-DDEVICE=$(DEVICE) \
+	-DMACH=$(shell uname -m)
 
 ifeq ($(WITH_TF),0)
 CMAKE_FLAGS += -DBUILD_TF=off

diff --git a/src/libtflite_c/tflite_c.cpp b/src/libtflite_c/tflite_c.cpp
@@ -4,7 +4,7 @@
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/model.h"
-#include "tensorflow/lite/tools/evaluation/utils.h"
+#include "tensorflow/lite/delegates/gpu/delegate.h"
 #include "../redismodule.h"
 
 namespace {
@@ -132,6 +132,7 @@ void deleter(DLManagedTensor *arg) {
 
 DLManagedTensor *toManagedDLPack(std::shared_ptr<tflite::Interpreter> interpreter,
                                  int tflite_output) {
+
     TfLiteTensor *tensor = interpreter->tensor(tflite_output);
 
     TfLiteIntArray *output_dims = tensor->dims;
@@ -209,6 +210,9 @@ struct ModelContext {
     std::string buffer;
     DLDeviceType device;
     int64_t device_id;
+#if RAI_TFLITE_USE_CUDA
+    TfLiteDelegate *delegate;
+#endif
 };
 
 } // namespace
@@ -235,17 +239,6 @@ extern "C" void *tfliteLoadModel(const char *graph, size_t graphlen, DLDeviceTyp
         return NULL;
     }
 
-#if RAI_TFLITE_USE_CUDA
-    if (device == DLDeviceType::kDLGPU) {
-        tflite::Interpreter::TfLiteDelegatePtr delegate =
-            tflite::evaluation::CreateGPUDelegate(model.get());
-        if (interpreter_->ModifyGraphWithDelegate(std::move(delegate)) != kTfLiteOk) {
-            _setError("Failed to set GPU delegate", error);
-            return NULL;
-        }
-    }
-#endif
-
     if (interpreter_->AllocateTensors() != kTfLiteOk) {
         _setError("Failed to allocate tensors", error);
         return NULL;
@@ -259,6 +252,9 @@ extern "C" void *tfliteLoadModel(const char *graph, size_t graphlen, DLDeviceTyp
     ctx->model = std::move(model);
     ctx->interpreter = std::move(interpreter);
     ctx->buffer = std::move(graphstr);
+#if RAI_TFLITE_USE_CUDA
+    ctx->delegate = nullptr;
+#endif
 
     return ctx;
 }
@@ -342,6 +338,19 @@ extern "C" void tfliteRunModel(void *ctx, long n_inputs, DLManagedTensor **input
         return;
     }
 
+#if RAI_TFLITE_USE_CUDA
+    if (ctx_->device == DLDeviceType::kDLGPU) {
+      if (!ctx_->delegate) {
+        auto* delegate = TfLiteGpuDelegateV2Create(/*default options=*/nullptr);
+        if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
+          _setError("Failed to set GPU delegate", error);
+          return;
+        }
+	ctx_->delegate = delegate;
+      }
+   }
+#endif
+
     if (interpreter->Invoke() != kTfLiteOk) {
         _setError("Failed to invoke TfLite", error);
         return;
@@ -363,7 +372,17 @@ extern "C" void tfliteSerializeModel(void *ctx, char **buffer, size_t *len, char
 
 extern "C" void tfliteDeallocContext(void *ctx) {
     ModelContext *ctx_ = (ModelContext *)ctx;
+
+#if RAI_TFLITE_USE_CUDA
+    if (ctx_->device == DLDeviceType::kDLGPU) {
+      if (ctx_->delegate) {
+        TfLiteGpuDelegateV2Delete(ctx_->delegate);
+      }
+   }
+#endif
+
     if (ctx_) {
-        delete ctx_;
+        //delete ctx_;
     }
 }
+