apache · dcslin · Aug 11, 2019
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -67,6 +67,7 @@ OPTION(ENABLE_DIST "Enable distributed training" OFF)
 OPTION(DISABLE_WARNINGS "Disable warnings under windows" ON)
 OPTION(USE_MODULES "Compile dependent libs as submodules together with singa" OFF)
 OPTION(USE_MKLDNN "Use mkl-dnn libs" OFF)
+OPTION(USE_TC "Use tensor comprehension" OFF)
 
 
 # TODO: remove all USE_CBLAS in codes

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
@@ -125,11 +125,11 @@ ENDIF()
 
 IF(USE_PYTHON)
     IF(USE_PYTHON3)
-        set(Python_ADDITIONAL_VERSIONS 3.6 3.5 3.4)        
+        set(Python_ADDITIONAL_VERSIONS 3.6 3.5 3.4)
         FIND_PACKAGE(PythonInterp 3 REQUIRED)
         FIND_PACKAGE(PythonLibs 3 REQUIRED)
 	    FIND_PACKAGE(SWIG 3.0.10 REQUIRED)
-    ELSE()        
+    ELSE()
         FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
         FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
 	    FIND_PACKAGE(SWIG 3.0.8 REQUIRED)
@@ -142,10 +142,53 @@ IF(USE_JAVA)
     FIND_PACKAGE(SWIG 3.0 REQUIRED)
 ENDIF()
 
+
 IF(USE_MKLDNN)
     FIND_PATH(MKLDNN_INCLUDE_DIR NAME "mkldnn.hpp" PATHS "$ENV{CMAKE_INCLUDE_PATH}")
     FIND_LIBRARY(MKLDNN_LIBRARIES NAME "mkldnn" PATHS "$ENV{CMAKE_LIBRARY_PATH}")
     MESSAGE(STATUS "Found MKLDNN at ${MKLDNN_INCLUDE_DIR}")
     INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR})
     LIST(APPEND SINGA_LINKER_LIBS ${MKLDNN_LIBRARIES})
 ENDIF()
+
+
+IF(USE_TC)
+    ### Tensor comprehensions
+    INCLUDE_DIRECTORIES(/root/TensorComprehensions)
+    INCLUDE_DIRECTORIES(/root/TensorComprehensions/tc/version)
+    INCLUDE_DIRECTORIES(/root/TensorComprehensions/build)
+    # polyhedral model required
+    INCLUDE_DIRECTORIES(/root/TensorComprehensions/isl_interface/include)
+    # dlpack
+    INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/dlpack/include)
+    # islpp
+    INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/islpp/include)
+    # gflags
+    INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/gflags/include)
+    # glog
+    INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/glog)
+    # Halide
+    INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include/Halide)
+    # llvm
+    INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include)
+    # torch ATen header
+    INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib/include)
+
+    # find Halide lib
+    set(HALIDE_PREFIX "/root/conda/envs/tc_build")
+    find_library(HALIDE_LIBRARIES REQUIRED NAMES Halide PATHS ${HALIDE_PREFIX} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH)
+    message(STATUS "Found Halide.so file: ${HALIDE_LIBRARIES}")
+
+    # find tc lib
+    link_directories(/root/TensorComprehensions/build/tc/aten)
+    link_directories(/root/TensorComprehensions/build/tc/lang)
+    link_directories(/root/TensorComprehensions/build/tc/core)
+    link_directories(/root/TensorComprehensions/build/tc/autotuner)
+    link_directories(/root/TensorComprehensions/build/tc/proto)
+
+    # torch(aten)
+    link_directories(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib)
+
+    LIST(APPEND SINGA_LINKER_LIBS ${HALIDE_LIBRARIES} tc_aten tc_lang tc_core_cpu tc_cuda tc_core_cuda_no_sdk tc_core tc_autotuner tc_proto ATen)
+    ### Tensor comprehensions
+ENDIF()
diff --git a/cmake/Templates/singa_config.h.in b/cmake/Templates/singa_config.h.in
@@ -53,3 +53,4 @@
 // #cmakedefine CUDNN_VERSION @CUDNN_VERSION@
 
 #cmakedefine USE_MKLDNN
+#cmakedefine USE_TC
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
@@ -23,6 +23,20 @@
 #include <tuple>
 #include <memory>
 
+#ifdef USE_TC
+#include <dlpack/dlpack.h>
+#include <tc/core/tensor.h>
+#include <tc/utils/compiler_options.h>
+#include <tc/core/compiler.h>
+#include <tc/core/utils/time.h>
+#include <tc/core/cuda/cuda_backend.h>
+#include <tc/core/cuda/cuda_tc_executor.h>
+#include <tc/core/cpu/cpu_backend.h>
+#include <tc/core/cpu/cpu_tc_executor.h>
+#include <tc/core/check.h>
+#include <tc/core/tc_executor.h>
+#endif // USE_TC
+
 #include "singa/core/common.h"
 #include "singa/core/device.h"
 #include "singa/proto/core.pb.h"
@@ -603,6 +617,85 @@ Tensor ConcatRows(const vector<Tensor> &in);
 Tensor ConcatenateColumns(const vector<Tensor> &in);
 /// Alias name for function ConcatenateColumns
 Tensor ConcatColumns(const vector<Tensor> &in);
+
+
+
+
+#ifdef USE_TC
+/// tc integration start
+DLManagedTensor *toDLPack(const Tensor &src);
+
+inline std::vector<tc::DLTensorUPtr>
+makeDLTensors(const std::vector<Tensor> &tensors);
+
+template <typename Backend>
+std::unique_ptr<typename Backend::ExecutorType>
+compileTC(const std::string &tc, const std::string &entryPoint,
+          const std::vector<Tensor> &inputs,
+          const typename Backend::MappingOptionsType &options,
+          const tc::CompilerOptions &compilerOptions = tc::CompilerOptions());
+
+std::vector<tc::DLTensorUPtr>
+inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
+                      const std::vector<Tensor> &inputs);
+
+std::vector<Tensor> prepareOutputs(const std::string &tc,
+                                   const std::string &entryPoint,
+                                   const std::vector<Tensor> &inputs);
+
+template <typename Executor>
+void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
+           std::vector<Tensor> &outputs);
+
+// makeDLConstTensors implementation
+inline std::vector<tc::DLConstTensorUPtr>
+makeDLConstTensors(const std::vector<Tensor> &tensors) {
+  std::vector<tc::DLConstTensorUPtr> dlTensors;
+  for (auto tensor : tensors) {
+    auto dlMTensor = toDLPack(tensor);
+    dlTensors.push_back(tc::makeDLConstTensor(&(dlMTensor->dl_tensor)));
+    dlMTensor->deleter(dlMTensor);
+  }
+  return dlTensors;
+}
+
+// makeDLTensors implementation
+inline std::vector<tc::DLTensorUPtr>
+makeDLTensors(const std::vector<Tensor> &tensors) {
+  std::vector<tc::DLTensorUPtr> dlTensors;
+  for (auto tensor : tensors) {
+    auto dlMTensor = toDLPack(tensor);
+    dlTensors.push_back(tc::makeDLTensor(&(dlMTensor->dl_tensor)));
+    dlMTensor->deleter(dlMTensor);
+  }
+  return dlTensors;
+}
+
+// compile implementation
+template <typename Backend>
+std::unique_ptr<typename Backend::ExecutorType>
+compileTC(const std::string &tc, const std::string &entryPoint,
+          const std::vector<Tensor> &inputs,
+          const typename Backend::MappingOptionsType &options,
+          const tc::CompilerOptions &compilerOptions) {
+  auto inputDLTensors = makeDLConstTensors(inputs);
+  return tc::compile<Backend>(tc, entryPoint, extractRawPtrs(inputDLTensors),
+                              options, compilerOptions);
+}
+
+// run implementation
+template <typename Executor>
+void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
+           std::vector<Tensor> &outputs) {
+  auto inputDLTensors = makeDLConstTensors(inputs);
+  auto outputDLTensors = makeDLTensors(outputs);
+  return executor.run(extractRawPtrs(inputDLTensors),
+                      extractRawPtrs(outputDLTensors));
+}
+
+/// tc integration end
+#endif // USE_TC
+
 }  // namespace singa
 
 #endif  // SINGA_CORE_TENSOR_H_
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
@@ -27,6 +27,12 @@
 
 #define Noaxis 9999
 
+#ifdef USE_TC
+// namespace is already exist in singa
+// aliasing to avoid duplicates
+namespace tclang = lang;
+#endif // USE_TC
+
 namespace singa {
 
 Tensor::~Tensor() {
@@ -1334,4 +1340,111 @@ Tensor Reshape(const Tensor &in, const Shape &s) {
   return out.Reshape(s);
 }
 
+
+#ifdef USE_TC
+/// tc integration start
+struct SingaDLManagedTensor {
+  Tensor handle;
+  DLManagedTensor tensor;
+};
+
+void deleter(DLManagedTensor *arg) {
+  delete static_cast<SingaDLManagedTensor *>(arg->manager_ctx);
+}
+
+static DLDataType getDLDataType(const Tensor &t) {
+  DLDataType dtype;
+  dtype.lanes = 1;
+  dtype.bits = SizeOf(t.data_type()) * 8;
+  switch (t.data_type()) {
+  case kFloat32:
+    dtype.code = DLDataTypeCode::kDLFloat;
+    break;
+  default:
+    throw std::logic_error("only kFloat32 is supported for dlpack conversion");
+    break;
+  }
+  return dtype;
+}
+
+static DLContext getDLContext(const Tensor &tensor, const int64_t &device_id) {
+  DLContext ctx;
+  ctx.device_id = device_id;
+  if (tensor.device()->lang() == kCuda) {
+    ctx.device_type = DLDeviceType::kDLGPU;
+  } else {
+    ctx.device_type = DLDeviceType::kDLCPU;
+  }
+  return ctx;
+}
+
+// This function returns a shared_ptr to memory managed DLpack tensor
+// constructed out of ATen tensor
+DLManagedTensor *toDLPack(const Tensor &src) {
+  SingaDLManagedTensor *singaDLManagedTensor(new SingaDLManagedTensor);
+  singaDLManagedTensor->handle = src;
+  singaDLManagedTensor->tensor.manager_ctx = singaDLManagedTensor;
+  singaDLManagedTensor->tensor.deleter = &deleter;
+  singaDLManagedTensor->tensor.dl_tensor.data = src.block()->mutable_data();
+  int64_t device_id = src.device()->id();
+  singaDLManagedTensor->tensor.dl_tensor.ctx = getDLContext(src, device_id);
+  singaDLManagedTensor->tensor.dl_tensor.ndim = src.nDim();
+  singaDLManagedTensor->tensor.dl_tensor.dtype = getDLDataType(src);
+
+  auto shapeVec =
+      new std::vector<int64_t>(src.shape().begin(), src.shape().end());
+  singaDLManagedTensor->tensor.dl_tensor.shape = shapeVec->data();
+
+  auto strideVec =
+      new std::vector<int64_t>(src.stride().begin(), src.stride().end());
+  singaDLManagedTensor->tensor.dl_tensor.strides = strideVec->data();
+
+  singaDLManagedTensor->tensor.dl_tensor.byte_offset = 0;
+  return &(singaDLManagedTensor->tensor);
+}
+
+// prepare output
+std::vector<tc::DLTensorUPtr>
+inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
+                      const std::vector<Tensor> &inputs) {
+  auto parsedTcs = tc::detail::parse(tc);
+  if (parsedTcs.count(entryPoint) != 1u) {
+    TC_CHECK_GE(parsedTcs.size(), 1u)
+        << "No TC was parsed, should have thrown earlier";
+    throw tclang::ErrorReport(parsedTcs.begin()->second)
+        << "\nattempting to access undefined entryPoint: " << entryPoint;
+  }
+  auto inputDLTensors = makeDLConstTensors(inputs);
+  return makeDLTensorVector(tc::detail::inferOutputTensorInfo(
+      parsedTcs.at(entryPoint), extractRawPtrs(inputDLTensors)));
+}
+
+std::vector<Tensor> prepareOutputs(const std::string &tc,
+                                   const std::string &entryPoint,
+                                   const std::vector<Tensor> &inputs) {
+  std::vector<Tensor> outputs;
+  auto outTensorInfo = inferOutputTensorInfo(tc, entryPoint, inputs);
+  if (outTensorInfo.size() == 0) {
+    return outputs;
+  }
+  TC_CHECK_GE(inputs.size(), 1u)
+      << "NYI: Need >= 1 input tensors to determine "
+      << "backend and prepare ATen outputs. Add an overload with just an ATen "
+      << "backend";
+
+  auto dev = inputs[0].device();
+  auto dtype = inputs[0].data_type();
+  for (size_t i = 0; i < outTensorInfo.size(); ++i) {
+    tc::TensorInfo info(outTensorInfo[i]);
+    Shape shape(info.shape.begin(), info.shape.end());
+
+    Tensor tmp(shape, dev, dtype);
+    outputs.push_back(tmp);
+  }
+  return outputs;
+}
+/// tc integration end
+#endif // USE_TC
+
+
 }  // namespace singa
diff --git a/src/model/operation/tc_fn.cc b/src/model/operation/tc_fn.cc
@@ -0,0 +1,42 @@
+/*********************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+************************************************************/
+#ifdef USE_TC
+#include "./tc_fn.h"
+
+namespace singa {
+
+TcFnHandle::TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs)
+{
+  tc_string = tcDefinition;
+  tc_name = entryFn;
+  auto naiveOptions = tc::CudaBackend::MappingOptionsType::makeNaiveMappingOptions();
+  pExecutor = singa::compileTC<tc::CudaBackend>(tcDefinition, entryFn, inputs, {naiveOptions});
+};
+
+Tensor tcExecute(const TcFnHandle &tcFnhandle, const std::vector<Tensor> &inputs)
+{
+  auto outputs = singa::prepareOutputs(tcFnhandle.tc_string, tcFnhandle.tc_name, inputs);
+  singa::runTC(*(tcFnhandle.pExecutor), inputs, outputs);
+  return outputs[0];
+}
+
+}
+#endif // USE_TC
diff --git a/src/model/operation/tc_fn.h b/src/model/operation/tc_fn.h
@@ -0,0 +1,42 @@
+/*********************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+************************************************************/
+//#ifndef SINGA_MODEL_OPERATION_TC_FN_H_
+//#define SINGA_MODEL_OPERATION_TC_FN_H_
+
+#ifdef USE_TC
+
+#include "singa/core/tensor.h"
+
+namespace singa {
+
+class TcFnHandle {
+ public:
+  TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs);
+  std::string tc_string;
+  std::string tc_name;
+  std::unique_ptr<typename tc::CudaBackend::ExecutorType> pExecutor;
+};
+
+Tensor tcExecute(const TcFnHandle &smh, const std::vector<Tensor> &inputs);
+
+}  // namespace singa
+
+#endif // USE_TC
Original file line number	Diff line number	Diff line change
Expand Up		@@ -53,3 +53,4 @@
		// #cmakedefine CUDNN_VERSION @CUDNN_VERSION@

		#cmakedefine USE_MKLDNN
		#cmakedefine USE_TC