diff --git a/.gitignore b/.gitignore index 882c8c6cd..1791006fd 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ venv*/ *.tar.gz /VARIANT -# Docs API reference +# Docs API reference docs/api_reference.md ### Cmake auto tools @@ -137,4 +137,8 @@ dkms.conf .idea_modules/ # docs site -site/ \ No newline at end of file +site/ + +# docker remnants +*.iid +*.cid diff --git a/CMakeLists.txt b/CMakeLists.txt index 1926d76aa..9bb420537 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,14 +138,25 @@ ENDIF() #---------------------------------------------------------------------------------------------- IF(BUILD_TFLITE) - FIND_LIBRARY(TFLITE_LIBRARIES_1 NAMES tensorflow-lite + FIND_LIBRARY(TFLITE_LIBRARIES_1 NAMES tensorflowlite PATHS ${depsAbs}/libtensorflow-lite/lib) - FIND_LIBRARY(TFLITE_LIBRARIES_2 NAMES benchmark-lib.a + IF (${DEVICE} STREQUAL "gpu") + FIND_LIBRARY(TFLITE_LIBRARIES_2 NAMES tensorflowlite_gpu_delegate PATHS ${depsAbs}/libtensorflow-lite/lib) - SET(TFLITE_LIBRARIES ${TFLITE_LIBRARIES_1} ${TFLITE_LIBRARIES_2}) - MESSAGE(STATUS "Found TensorFlow Lite Libraries: \"${TFLITE_LIBRARIES}\")") + IF (NOT APPLE) + FIND_LIBRARY(OPENGL_LIBRARIES NAMES GL + PATHS /usr/lib/${MACH}-linux-gnu) + FIND_LIBRARY(EGL_LIBRARIES NAMES EGL + PATHS /usr/lib/${MACH}-linux-gnu) + ELSE() + MESSAGE(FATAL_ERROR "Build for TensorFlow Lite GPU backend on Apple machines.") + ENDIF() + ENDIF() + SET(TFLITE_LIBRARIES ${TFLITE_LIBRARIES_1} ${TFLITE_LIBRARIES_2} ${OPENGL_LIBRARIES} ${EGL_LIBRARIES}) IF (NOT TFLITE_LIBRARIES) MESSAGE(FATAL_ERROR "Could not find TensorFlow Lite") + ELSE() + MESSAGE(STATUS "Found TensorFlow Lite Libraries: \"${TFLITE_LIBRARIES}\")") ENDIF() IF (${DEVICE} STREQUAL "gpu") ADD_DEFINITIONS(-DRAI_TFLITE_USE_CUDA) @@ -202,6 +213,7 @@ ENDIF() ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(tests/module) + ADD_LIBRARY(redisai SHARED $) TARGET_LINK_LIBRARIES(redisai ${CMAKE_DL_LIBS}) @@ -322,4 +334,4 @@ if(PACKAGE_UNIT_TESTS) enable_testing() include(GoogleTest) add_subdirectory(tests/unit) -endif() \ No newline at end of file +endif() diff --git a/README.md b/README.md index d82a4b36e..3bc27c163 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ redis-cli ## Building -You should obtain the module's source code and submodule using git like so: +You should obtain the module's source code and submodule using git like so: ```sh git clone --recursive https://github.com/RedisAI/RedisAI @@ -96,6 +96,8 @@ ALL=1 make -C opt clean build Note: in order to use the PyTorch backend on Linux, at least `gcc 4.9.2` is required. +[See this document](docs/developer-backends.md) for building AI backends. + ### Running the server You will need a redis-server version 5.0.7 or greater. This should be diff --git a/docs/developer-backends.md b/docs/developer-backends.md new file mode 100644 index 000000000..7388b9f5d --- /dev/null +++ b/docs/developer-backends.md @@ -0,0 +1,19 @@ +# RedisAI dependency builds + +Platform dependency build systems are located in this folder. Dependencies are to be pre-built, and published to S3. To do so, they rely *(ultimately)* on running **make build publish** in a given directory. The goal is for this to be true on all target platforms (x86_64, arm64), though at this time it's only true for: tensorflowlite. + +## Background + +Items are built in docker images, for the target platform whenever possible. If needed (i.e a future planned MacOS build) items are built on the dedicated hardware. There are design wrinkles to each build. Though the ideal is to build a base docker (see the [automata repository](https://github.com/redislabsmodules/automata). That base docker is then used as the base build system injector for the dependency itself. A docker image is built from the base docker, accepting externalized variables such as the dependency version. Compilation of external requirements takes place in a build file, mounted inside the docker image. + +Ideally a per-platform Docker file (i.e Dockerfile.x64, Dockerfile.arm) will exist in the underlying folder, assuming building within a docker is tenable. + +-------------- + +## tensorflowlite (tflite) + +### arm64 + +The arm build of tflite currently occurs on **jetson arm devices** only, as portions of the root filesystem of the Jetson device are mounted during the build. Given the symlinks that exist on the device between things in /usr/lib to /etc/alternatives, and in turn to /usr/local/cuda, which is itself a symlink to /usr/local/cuda-10.2, this is the current philosophy. + +WThe *build_arm* target in the [Makefile](Makefile) describes the process in detail. The code to build the base docker build image can be found in the [automata repository](https://github.com/RedisLabsModules/automata/tree/master/dockers/buildsystem/bazelbuilder). The *bazelbuilder* image is published to the [redisfab dockerhub repositories](https://hub.docker.com/r/redisfab/). diff --git a/docs/developer.md b/docs/developer.md index feb85e753..aff8e2e9b 100644 --- a/docs/developer.md +++ b/docs/developer.md @@ -106,7 +106,7 @@ Within the `backends` folder you will find the implementations code required to * **ONNX**: `onnxruntime.h` and `onnxruntime.c` exporting the functions to to register the ONNXRuntime backend ## Building and Testing -You can compile and build the module from its source code - refer to the [Building and Running section](quickstart.md#building-and-running) of the Quickstart page for instructions on how to do that. +You can compile and build the module from its source code - refer to the [Building and Running section](quickstart.md#building-and-running) of the Quickstart page for instructions on how to do that, or view the detailed instructions on [building backends](developer-backends.md). **Running Tests** diff --git a/get_deps.sh b/get_deps.sh index a5d0f07e1..64d8c54b1 100755 --- a/get_deps.sh +++ b/get_deps.sh @@ -145,7 +145,7 @@ fi # WITH_TF ################################################################################# LIBTFLITE -TFLITE_VERSION="2.0.0" +TFLITE_VERSION="2.4.1" if [[ $WITH_TFLITE != 0 ]]; then [[ $FORCE == 1 ]] && rm -rf $LIBTFLITE @@ -156,18 +156,16 @@ if [[ $WITH_TFLITE != 0 ]]; then LIBTF_URL_BASE=https://s3.amazonaws.com/redismodules/tensorflow if [[ $OS == linux ]]; then TFLITE_OS="linux" - # if [[ $GPU != 1 ]]; then - # TFLITE_BUILD="cpu" - # else - # TFLITE_BUILD="gpu" - # fi + if [[ $GPU != 1 ]]; then + TFLITE_PLATFORM="cpu" + else + TFLITE_PLATFORM="cuda" + fi if [[ $ARCH == x64 ]]; then TFLITE_ARCH=x86_64 elif [[ $ARCH == arm64v8 ]]; then TFLITE_ARCH=arm64 - elif [[ $ARCH == arm32v7 ]]; then - TFLITE_ARCH=arm fi elif [[ $OS == macos ]]; then TFLITE_OS=darwin @@ -175,7 +173,7 @@ if [[ $WITH_TFLITE != 0 ]]; then TFLITE_ARCH=x86_64 fi - LIBTFLITE_ARCHIVE=libtensorflowlite-${TFLITE_OS}-${TFLITE_ARCH}-${TFLITE_VERSION}.tar.gz + LIBTFLITE_ARCHIVE=libtensorflowlite-${TFLITE_OS}-${TFLITE_PLATFORM}-${TFLITE_ARCH}-${TFLITE_VERSION}.tar.gz [[ ! -f $LIBTFLITE_ARCHIVE || $FORCE == 1 ]] && wget -q $LIBTF_URL_BASE/$LIBTFLITE_ARCHIVE diff --git a/opt/Makefile b/opt/Makefile index 53f0e42d7..fc2436c51 100755 --- a/opt/Makefile +++ b/opt/Makefile @@ -135,7 +135,8 @@ CMAKE_FLAGS += \ -DUSE_COVERAGE=$(USE_COVERAGE) \ -DUSE_PROFILE=$(USE_PROFILE) \ -DREDISAI_GIT_SHA=\"$(GIT_SHA)\" \ - -DDEVICE=$(DEVICE) + -DDEVICE=$(DEVICE) \ + -DMACH=$(shell uname -m) ifeq ($(WITH_TF),0) CMAKE_FLAGS += -DBUILD_TF=off diff --git a/opt/build/tflite/Dockerfile.x64 b/opt/build/tflite/Dockerfile.x64 old mode 100755 new mode 100644 index 53a474c69..89ff73a10 --- a/opt/build/tflite/Dockerfile.x64 +++ b/opt/build/tflite/Dockerfile.x64 @@ -1,29 +1,34 @@ +ARG BAZEL_VERSION=3.1.0 +ARG TFLITE_ARCH=x86_64 -ARG OS=debian:buster +ARG OS=redisfab/ubuntu1804-${TFLITE_ARCH}-bazel${BAZEL_VERSION} -ARG FTLITE_VER=2.0.0 +# cuda | cpu +ARG REDISAI_PLATFORM=cuda + +ARG TFLITE_VERSION=2.4.0 #---------------------------------------------------------------------------------------------- FROM ${OS} ARG FTLITE_VER - -WORKDIR /build - -RUN set -e ;\ - apt-get -qq update ;\ - apt-get -q install -y git ca-certificates curl wget unzip python3 ;\ - apt-get -q install -y git build-essential zlib1g-dev - -RUN git clone --single-branch --branch v${FTLITE_VER} --depth 1 https://github.com/tensorflow/tensorflow.git - -ADD ./opt/build/tflite/build /build/ -ADD ./opt/readies/ /build/readies/ -ADD ./opt/build/tflite/collect.py /build/ - -RUN set -e ;\ - cd tensorflow/tensorflow/lite/tools/make ;\ - ./download_dependencies.sh ;\ - ./build_lib.sh - -RUN ./collect.py --version ${FTLITE_VER} --dest /build/dest +ARG TFLITE_VERSION +ARG TFLITE_ARCH +ARG REDISAI_PLATFORM + +ADD ./opt/build/tflite /tflite + +RUN apt-get -qq update && apt-get install -yqq python3 +RUN apt-get install -qqy git \ + unzip \ + wget \ + curl \ + build-essential \ + zlib1g-dev \ + libegl1-mesa-dev \ + libgles2-mesa-dev \ + python3-distutils \ + python3-numpy +RUN ln -s /usr/bin/python3 /usr/bin/python +WORKDIR /tflite +RUN bash build.sh ${TFLITE_ARCH} ${TFLITE_VERSION} ${REDISAI_PLATFORM} diff --git a/opt/build/tflite/Makefile b/opt/build/tflite/Makefile index 8962177b7..704906916 100755 --- a/opt/build/tflite/Makefile +++ b/opt/build/tflite/Makefile @@ -1,19 +1,34 @@ - ROOT=../../.. -export VERSION ?= 2.0.0 -OSNICK ?= buster +export VERSION ?= 2.4.0 #---------------------------------------------------------------------------------------------- S3_URL=redismodules/tensorflow OS:=$(shell $(ROOT)/opt/readies/bin/platform --os) +ARCH:=$(shell $(ROOT)/opt/readies/bin/platform --arch) + +# cuda | cpu +REDISAI_PLATFORM=cuda +# non-arm linux ifeq ($(OS),linux) +ifeq ($(ARCH),x64) OS.publish:=$(OS) -ARCH.publish:=$(ARCH) +ARCH.publish:=x86_64 +BUILD_TARGETS:=build_x64 +PUBLISH_TARGETS:=publish_x64 +else ifeq ($(ARCH),arm64v8) +BUILD_TARGETS:=build_arm +PUBLISH_TARGETS:=publish_arm +else ifeq ($(ARCH),) +BUILD_TARGETS:= +PUBLISH_TARGETS:= +endif + +# mac else ifeq ($(OS),macos) OS.publish:=darwin ARCH.publish:=x86_64 @@ -21,119 +36,55 @@ endif STEM=libtensorflowlite-$(OS.publish) -DOCKER_OS.bionic=ubuntu:bionic -DOCKER_OS.stretch=debian:stretch-slim -DOCKER_OS.buster=debian:buster-slim -DOCKER_OS=$(DOCKER_OS.$(OSNICK)) - -#---------------------------------------------------------------------------------------------- - -ifeq ($(OS),linux) - -define targets # (1=OP, 2=op) -$(1)_TARGETS := -$(1)_TARGETS += $(if $(findstring $(X64),1),$(2)_x64) -$(1)_TARGETS += $(if $(findstring $(ARM7),1),$(2)_arm32v7) -$(1)_TARGETS += $(if $(findstring $(ARM8),1),$(2)_arm64v8) - -$(1)_TARGETS += $$(if $$(strip $$($(1)_TARGETS)),,$(2)_x64 $(2)_arm32v7 $(2)_arm64v8) -endef - -else ifeq ($(OS),macos) - -define targets # (1=OP, 2=op) -$(1)_TARGETS := $(2)_x64 -endef - -endif - -$(eval $(call targets,BUILD,build)) -$(eval $(call targets,PUBLISH,publish)) - -#---------------------------------------------------------------------------------------------- - -define build_x64 # (1=arch, 2=tar-arch) -IID_$(1)=$(1)_$(VERSION).iid -CID_$(1)=$(1)_$(VERSION).cid +# x86 linux ------------------------------------------------------------------------------------------ +#IID_X64=x86_64_$(VERSION).iid +#CID_X64=x86_64_$(VERSION).cid +# build_x64: - @docker build --iidfile $$(IID_$(1)) -t redisfab/$(STEM)-$(1):$(VERSION) -f Dockerfile.x64 \ - --build-arg OS=$(DOCKER_OS) $(ROOT) - @docker create --cidfile $$(CID_$(1)) `cat $$(IID_$(1))` - @docker cp `cat $$(CID_$(1))`:/build/dest/$(STEM)-$(2)-$(VERSION).tar.gz . - + -@rm *.iid *.cid + @docker build --iidfile x64.iid -t redisfab/$(STEM)-x86_64:$(VERSION) -f Dockerfile.x64 \ + --build-arg TFLITE_VERSION=$(VERSION) --build-arg REDISAI_PLATFORM=$(REDISAI_PLATFORM) $(ROOT) + @docker create --cidfile x64.cid `cat x64.iid` + @docker cp `cat x64.cid`:/tflite/tensorflow-$(VERSION)/tmp/libtensorflowlite-linux-$(REDISAI_PLATFORM)-x86_64-$(VERSION).tar.gz . .PHONY: build_x64 -endef - -define build_arm # (1=arch, 2=tar-arch) -IID_$(1)=$(1)_$(VERSION).iid -CID_$(1)=$(1)_$(VERSION).cid -build_$(1): - @docker build --iidfile $$(IID_$(1)) -t redisfab/$(STEM)-$(1):$(VERSION) -f Dockerfile.arm \ - --build-arg ARCH=$(1) $(ROOT) - @docker create --cidfile $$(CID_$(1)) `cat $$(IID_$(1))` - @docker cp `cat $$(CID_$(1))`:/build/$(STEM)-$(2)-$(VERSION).tar.gz . +publish_x64: + @aws s3 cp $(STEM)-$(REDISAI_PLATFORM)-x86_64-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read +.PHONY: publish_x64 -.PHONY: build_$(1) -endef +# arm linux ------------------------------------------------------------------------------------------ -#---------------------------------------------------------------------------------------------- +ifeq ($(ARCH),arm64v8) +ARCH.publish:=arm64 +DOCKERBASE=redisfab/ubuntu1804-$(ARCH.publish)-bazel3.1.0-jetson:latest +endif -define publish_x64 # (1=arch, 2=tar-arch) -publish_x64: - @aws s3 cp $(STEM)-$(2)-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read +build_arm: + @cd ../../../ ; docker run -v `pwd`/opt/build/tflite:/tflite -v /etc/alternatives:/etc/alternatives -v /usr/lib:/usr/lib -v /usr/local:/usr/local -v /usr/include:/usr/include ${DOCKERBASE} /tflite/build.sh $(ARCH.publish) $(VERSION) $(REDISAI_PLATFORM) -.PHONY: publish_x64 -endef +.PHONY: build_arm -define publish_arm # (1=arch, 2=tar-arch) -publish_$(1): - @aws s3 cp $(STEM)-$(2)-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read +publish_arm: + @aws s3 cp `pwd`/tensorflow-$(VERSION)/tmp/$(STEM)$(OS)-$(REDISAI_PLATFORM)-$(ARCH.publish)-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read -.PHONY: publish_$(1) -endef +.PHONY: publish_arm #---------------------------------------------------------------------------------------------- all: build publish -ifeq ($(OS),linux) - -build: $(BUILD_TARGETS) - -$(eval $(call build_x64,x64,x86_64)) -$(eval $(call build_arm,arm64v8,arm64)) -$(eval $(call build_arm,arm32v7,arm)) - ifneq ($(filter publish,$(MAKECMDGOALS)),) ifeq ($(wildcard $(HOME)/.aws/credentials),) $(error Please run 'aws configure' and provide it with access credentials) endif endif -publish: $(PUBLISH_TARGETS) - -$(eval $(call publish_x64,x64,x86_64)) -$(eval $(call publish_arm,arm64v8,arm64)) -$(eval $(call publish_arm,arm32v7,arm)) - -help: - @echo "make [build|publish] [X64=1|ARM7=1|ARM8=1]" - -else ifeq ($(OS),macos) - -build: - @VERSION=$(VERSION) ./build.macos - @mv macos/dest/$(STEM)-$(ARCH.publish)-$(VERSION).tar.gz . - -publish: $(PUBLISH_TARGETS) - -$(eval $(call publish_x64,x64,x86_64)) - help: @echo "make [build|publish]" -endif # macos +build: $(BUILD_TARGETS) +publish: $(PUBLISH_TARGETS) .PHONY: all build publish help + diff --git a/opt/build/tflite/build b/opt/build/tflite/build deleted file mode 100755 index ba9a8cb71..000000000 --- a/opt/build/tflite/build +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -OS=$(python3 readies/bin/platform --os) -ARCH=$(python3 readies/bin/platform --arch) - -cd tensorflow/tensorflow/lite/tools/make -bash download_dependencies.sh -if [[ $OS == linux ]]; then - TARGET=linux - if [[ $ARCH == x64 ]]; then - bash build_lib.sh - elif [[ $ARCH == arm64v8 ]]; then - bash build_aarch64_lib.sh - elif [[ $ARCH == arm32v7 ]]; then - bash build_rpi_lib.sh - fi -elif [[ $OS == macos ]]; then - TARGET=osx - bash build_lib.sh -fi diff --git a/opt/build/tflite/build.macos b/opt/build/tflite/build.macos deleted file mode 100755 index 316d38e0f..000000000 --- a/opt/build/tflite/build.macos +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" -cd $HERE - -set -e -mkdir -p macos -cd macos -cp ../collect.py . -ln -s ../../../readies/ -git clone --single-branch --branch v${VERSION} --depth 1 https://github.com/tensorflow/tensorflow.git -./collect.py --version $VERSION --dest dest diff --git a/opt/build/tflite/build.sh b/opt/build/tflite/build.sh new file mode 100755 index 000000000..e55c44178 --- /dev/null +++ b/opt/build/tflite/build.sh @@ -0,0 +1,74 @@ +#!/bin/bash +set -e +set -x + +cd `dirname ${BASH_SOURCE[0]}` +ARCH=$1 + +VERSION=$2 +if [ "X$VERSION" == "X" ]; then + VERSION=2.4.0 +fi + +# cuda | cpu +REDISAI_TARGET=$3 +if [ "X$REDISAI_TARGET" == "X" ]; then + REDISAI_TARGET="cuda" +fi + +BASEOS=$4 +if [ "X$BASEOS" == "X" ]; then + BASEOS=linux +fi + +if [ ! -f v$VERSION.tar.gz ]; then + wget -q https://github.com/tensorflow/tensorflow/archive/v$VERSION.tar.gz + tar -xzf v$VERSION.tar.gz +fi +cd tensorflow-$VERSION + +# fetch dependencies +./tensorflow/lite/tools/make/download_dependencies.sh + +# build tensorflow lite library +if [ "X$REDISAI_TARGET" == "cuda" ]; then + BAZEL_VARIANT="--config=cuda" +fi +bazel build --jobs $(nproc) --config=monolithic ${BAZEL_VARIANT} //tensorflow/lite:libtensorflowlite.so +# bazel build --jobs ${nproc} --config=monolithic --config=cuda //tensorflow/lite:libtensorflowlite.so + +TMP_LIB="tmp" +# flatbuffer header files +mkdir -p $TMP_LIB/include +cp -r tensorflow/lite/tools/make/downloads/flatbuffers/include/flatbuffers $TMP_LIB/include/ +# tensorflow lite header files +TFLITE_DIR="tensorflow/lite" +declare -a tfLiteDirectories=( + "$TFLITE_DIR" + "$TFLITE_DIR/c" + "$TFLITE_DIR/core" + "$TFLITE_DIR/core/api" + "$TFLITE_DIR/delegates/gpu" + "$TFLITE_DIR/delegates/nnapi" + "$TFLITE_DIR/delegates/xnnpack" + "$TFLITE_DIR/experimental/resource" + "$TFLITE_DIR/kernels" + "$TFLITE_DIR/nnapi" + "$TFLITE_DIR/schema" + "$TFLITE_DIR/tools/evaluation" +) +for dir in "${tfLiteDirectories[@]}" +do + mkdir -p $TMP_LIB/include/$dir + cp $dir/*h $TMP_LIB/include/$dir +done +mkdir -p $TMP_LIB/lib +cp bazel-bin/tensorflow/lite/libtensorflowlite.so $TMP_LIB/lib +bazel build -c opt --copt -DMESA_EGL_NO_X10_HEADERS --copt -DEGL_NO_X11 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_delegate.so +cp bazel-bin/tensorflow/lite/delegates/gpu/libtensorflowlite_gpu_delegate.so $TMP_LIB/lib + +bazel build --config=monolithic //tensorflow/lite:libtensorflowlite.so +# create .tar.gz file +cd $TMP_LIB +tar -cvzf libtensorflowlite-${BASEOS}-${REDISAI_TARGET}-${ARCH}-${VERSION}.tar.gz include lib + diff --git a/opt/build/tflite/collect.py b/opt/build/tflite/collect.py deleted file mode 100755 index 4d4e4b347..000000000 --- a/opt/build/tflite/collect.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import argparse -from pathlib import Path -import shutil -import tarfile - -# this refers to deps directory inside a container -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "readies")) -import paella - -#---------------------------------------------------------------------------------------------- - -TFLITE_VERSION = '2.0.0' - -parser = argparse.ArgumentParser(description='Prepare RedisAI dependant distribution packages.') -parser.add_argument('--tensorflow', default='tensorflow', help='root of tensorflow repository') -parser.add_argument('--version', default=TFLITE_VERSION, help='tensorflow version') -parser.add_argument('--dest', default='dest', help='destination directory') -parser.add_argument('-n', '--nop', action="store_true", help='no operation') -args = parser.parse_args() - -#---------------------------------------------------------------------------------------------- - -tensorflow = Path(args.tensorflow).resolve() -dest = Path(args.dest).resolve() - -#---------------------------------------------------------------------------------------------- - -platform = paella.Platform() - -tf_os = platform.os -tf_os_internal = tf_os -if tf_os == 'macos': - tf_os = 'darwin' - tf_os_internal = 'osx' - -tf_arch = platform.arch -if tf_arch == 'x64': - tf_arch = 'x86_64' -elif tf_arch == 'arm64v8': - tf_arch = 'arm64' - -tf_ver = args.version - -#---------------------------------------------------------------------------------------------- - -def copy_p(src, dest): - f = dest/src - paella.mkdir_p(os.path.dirname(f)) - shutil.copy(src, f, follow_symlinks=False) - -def create_tar(name, basedir, dir='.'): - def reset_uid(tarinfo): - tarinfo.uid = tarinfo.gid = 0 - tarinfo.uname = tarinfo.gname = "root" - return tarinfo - with cwd(basedir): - with tarfile.open(name, 'w:gz') as tar: - tar.add(dir, filter=reset_uid) - -def collect_tflite(): - d_tensorflow = dest - with cwd(tensorflow): - for f in Path('tensorflow/lite').glob('**/*.h'): - copy_p(f, d_tensorflow/'include') - with cwd('tensorflow/lite/tools/make'): - with cwd('downloads/flatbuffers/include'): - for f in Path('.').glob('**/*.h'): - copy_p(f, d_tensorflow/'include') - with cwd(f'gen/{tf_os_internal}_{tf_arch}/lib'): - for f in Path('.').glob('*.a'): - copy_p(f, d_tensorflow/'lib') - create_tar(dest/f'libtensorflowlite-{tf_os}-{tf_arch}-{tf_ver}.tar.gz', dest) - -#---------------------------------------------------------------------------------------------- - -collect_tflite() diff --git a/opt/system-setup.py b/opt/system-setup.py index 07fdd50eb..24dd16c42 100755 --- a/opt/system-setup.py +++ b/opt/system-setup.py @@ -30,7 +30,8 @@ def debian_compat(self): self.install("gawk") self.install("libssl-dev") self.install("python3-regex") - self.install("python3-networkx python3-numpy") + self.install("python3-psutil python3-networkx python3-numpy") + self.install("libegl1-mesa-dev libgles2-mesa-dev") if self.platform.is_arm(): self.install("python3-dev") # python3-skimage self.install("libmpich-dev libopenblas-dev") # for libtorch diff --git a/src/backends/libtflite_c/tflite_c.cpp b/src/backends/libtflite_c/tflite_c.cpp index c0b75bc19..eb26fb2f9 100644 --- a/src/backends/libtflite_c/tflite_c.cpp +++ b/src/backends/libtflite_c/tflite_c.cpp @@ -5,6 +5,8 @@ #include "tensorflow/lite/model.h" #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/model.h" +#include "tensorflow/lite/delegates/gpu/delegate.h" #include "tensorflow/lite/tools/evaluation/utils.h" namespace { @@ -204,6 +206,9 @@ struct ModelContext { std::string buffer; DLDeviceType device; int64_t device_id; +#if RAI_TFLITE_USE_CUDA + TfLiteDelegate *delegate; +#endif }; } // namespace @@ -230,16 +235,16 @@ extern "C" void *tfliteLoadModel(const char *graph, size_t graphlen, DLDeviceTyp return NULL; } -#if RAI_TFLITE_USE_CUDA - if (device == DLDeviceType::kDLGPU) { - tflite::Interpreter::TfLiteDelegatePtr delegate = - tflite::evaluation::CreateGPUDelegate(model.get()); - if (interpreter_->ModifyGraphWithDelegate(std::move(delegate)) != kTfLiteOk) { - _setError("Failed to set GPU delegate", error); - return NULL; - } - } -#endif +// #if RAI_TFLITE_USE_CUDA +// if (device == DLDeviceType::kDLGPU) { +// tflite::Interpreter::TfLiteDelegatePtr delegate = +// tflite::evaluation::CreateGPUDelegate(model.get()); +// if (interpreter_->ModifyGraphWithDelegate(std::move(delegate)) != kTfLiteOk) { +// _setError("Failed to set GPU delegate", error); +// return NULL; +// } +// } +// #endif if (interpreter_->AllocateTensors() != kTfLiteOk) { _setError("Failed to allocate tensors", error); @@ -254,7 +259,9 @@ extern "C" void *tfliteLoadModel(const char *graph, size_t graphlen, DLDeviceTyp ctx->model = std::move(model); ctx->interpreter = std::move(interpreter); ctx->buffer = std::move(graphstr); - +#if RAI_TFLITE_USE_CUDA + ctx->delegate = nullptr; +#endif return ctx; } @@ -342,6 +349,19 @@ extern "C" void tfliteRunModel(void *ctx, long n_inputs, DLManagedTensor **input return; } +#if RAI_TFLITE_USE_CUDA + if (ctx_->device == DLDeviceType::kDLGPU) { + if (!ctx_->delegate) { + auto* delegate = TfLiteGpuDelegateV2Create(/*default options=*/nullptr); + if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) { + _setError("Failed to set GPU delegate", error); + return; + } + ctx_->delegate = delegate; + } + } +#endif + try { for (size_t i = 0; i < tflite_outputs.size(); i++) { outputs[i] = toManagedDLPack(interpreter, tflite_outputs[i]); @@ -358,7 +378,14 @@ extern "C" void tfliteSerializeModel(void *ctx, char **buffer, size_t *len, char extern "C" void tfliteDeallocContext(void *ctx) { ModelContext *ctx_ = (ModelContext *)ctx; +#if RAI_TFLITE_USE_CUDA + if (ctx_->device == DLDeviceType::kDLGPU) { + if (ctx_->delegate) { + TfLiteGpuDelegateV2Delete(ctx_->delegate); + } + } +#endif if (ctx_) { - delete ctx_; + //delete ctx_; } } diff --git a/tests/flow/tests_tflite.py b/tests/flow/tests_tflite.py index a10eda36c..49c008630 100644 --- a/tests/flow/tests_tflite.py +++ b/tests/flow/tests_tflite.py @@ -16,14 +16,14 @@ def test_run_tflite_model(env): model_pb = load_file_content('mnist_model_quant.tflite') sample_raw = load_file_content('one.raw') - ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TFLITE', 'CPU', 'BLOB', model_pb) + ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TFLITE', DEVICE, 'BLOB', model_pb) env.assertEqual(ret, b'OK') ret = con.execute_command('AI.MODELGET', 'm{1}', 'META') env.assertEqual(len(ret), 14) env.assertEqual(ret[5], b'') - ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TFLITE', 'CPU', 'TAG', 'asdf', 'BLOB', model_pb) + ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TFLITE', DEVICE, 'TAG', 'asdf', 'BLOB', model_pb) env.assertEqual(ret, b'OK') ret = con.execute_command('AI.MODELGET', 'm{1}', 'META') @@ -40,7 +40,7 @@ def test_run_tflite_model(env): # TODO: enable me. CI is having issues on GPU asserts of TFLITE and CPU if DEVICE == "CPU": env.assertEqual(ret[1], b'TFLITE') - env.assertEqual(ret[3], b'CPU') + env.assertEqual(ret[3], bDEVICE) con.execute_command('AI.MODELEXECUTE', 'm{1}', 'INPUTS', 1, 'a{1}', 'OUTPUTS', 2, 'b{1}', 'c{1}') values = con.execute_command('AI.TENSORGET', 'b{1}', 'VALUES') @@ -58,17 +58,17 @@ def test_run_tflite_model_errors(env): sample_raw = load_file_content('one.raw') wrong_model_pb = load_file_content('graph.pb') - ret = con.execute_command('AI.MODELSTORE', 'm_2{1}', 'TFLITE', 'CPU', 'BLOB', model_pb) + ret = con.execute_command('AI.MODELSTORE', 'm_2{1}', 'TFLITE', DEVICE, 'BLOB', model_pb) env.assertEqual(ret, b'OK') check_error_message(env, con, "Failed to load model from buffer", - 'AI.MODELSTORE', 'm{1}', 'TFLITE', 'CPU', 'TAG', 'asdf', 'BLOB', wrong_model_pb) + 'AI.MODELSTORE', 'm{1}', 'TFLITE', DEVICE, 'TAG', 'asdf', 'BLOB', wrong_model_pb) # TODO: Autobatch is tricky with TFLITE because TFLITE expects a fixed batch # size. At least we should constrain MINBATCHSIZE according to the # hard-coded dims in the tflite model. check_error_message(env, con, "Auto-batching not supported by the TFLITE backend", - 'AI.MODELSTORE', 'm{1}', 'TFLITE', 'CPU', + 'AI.MODELSTORE', 'm{1}', 'TFLITE', DEVICE, 'BATCHSIZE', 2, 'MINBATCHSIZE', 2, 'BLOB', model_pb) ret = con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw) @@ -96,7 +96,7 @@ def test_tflite_modelinfo(env): model_pb = load_file_content('mnist_model_quant.tflite') sample_raw = load_file_content('one.raw') - ret = con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', 'CPU', 'BLOB', model_pb) + ret = con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', DEVICE, 'BLOB', model_pb) env.assertEqual(ret, b'OK') ret = con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw) @@ -143,7 +143,7 @@ def test_tflite_modelrun_disconnect(env): model_pb = load_file_content('mnist_model_quant.tflite') sample_raw = load_file_content('one.raw') - ret = red.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', 'CPU', 'BLOB', model_pb) + ret = red.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', DEVICE, 'BLOB', model_pb) env.assertEqual(ret, b'OK') ret = red.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw) @@ -164,7 +164,7 @@ def test_tflite_model_rdb_save_load(env): con = env.getConnection() model_pb = load_file_content('mnist_model_quant.tflite') - ret = con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', 'CPU', 'BLOB', model_pb) + ret = con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', DEVICE, 'BLOB', model_pb) env.assertEqual(ret, b'OK') model_serialized_memory = con.execute_command('AI.MODELGET', 'mnist{1}', 'BLOB') @@ -196,7 +196,7 @@ def test_tflite_info(env): model_pb = load_file_content('mnist_model_quant.tflite') - con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', 'CPU', 'BLOB', model_pb) + con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', DEVICE, 'BLOB', model_pb) ret = con.execute_command('AI.INFO') env.assertEqual(8, len(ret))