Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SINGA-482 tc comprehension integration #514

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ OPTION(ENABLE_DIST "Enable distributed training" OFF)
OPTION(DISABLE_WARNINGS "Disable warnings under windows" ON)
OPTION(USE_MODULES "Compile dependent libs as submodules together with singa" OFF)
OPTION(USE_MKLDNN "Use mkl-dnn libs" OFF)
OPTION(USE_TC "Use tensor comprehension" OFF)


# TODO: remove all USE_CBLAS in codes
Expand Down
47 changes: 45 additions & 2 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ ENDIF()

IF(USE_PYTHON)
IF(USE_PYTHON3)
set(Python_ADDITIONAL_VERSIONS 3.6 3.5 3.4)
set(Python_ADDITIONAL_VERSIONS 3.6 3.5 3.4)
FIND_PACKAGE(PythonInterp 3 REQUIRED)
FIND_PACKAGE(PythonLibs 3 REQUIRED)
FIND_PACKAGE(SWIG 3.0.10 REQUIRED)
ELSE()
ELSE()
FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
FIND_PACKAGE(SWIG 3.0.8 REQUIRED)
Expand All @@ -142,10 +142,53 @@ IF(USE_JAVA)
FIND_PACKAGE(SWIG 3.0 REQUIRED)
ENDIF()


IF(USE_MKLDNN)
FIND_PATH(MKLDNN_INCLUDE_DIR NAME "mkldnn.hpp" PATHS "$ENV{CMAKE_INCLUDE_PATH}")
FIND_LIBRARY(MKLDNN_LIBRARIES NAME "mkldnn" PATHS "$ENV{CMAKE_LIBRARY_PATH}")
MESSAGE(STATUS "Found MKLDNN at ${MKLDNN_INCLUDE_DIR}")
INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR})
LIST(APPEND SINGA_LINKER_LIBS ${MKLDNN_LIBRARIES})
ENDIF()


IF(USE_TC)
### Tensor comprehensions
INCLUDE_DIRECTORIES(/root/TensorComprehensions)
INCLUDE_DIRECTORIES(/root/TensorComprehensions/tc/version)
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build)
# polyhedral model required
INCLUDE_DIRECTORIES(/root/TensorComprehensions/isl_interface/include)
# dlpack
INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/dlpack/include)
# islpp
INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/islpp/include)
# gflags
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/gflags/include)
# glog
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/glog)
# Halide
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include/Halide)
# llvm
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include)
# torch ATen header
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib/include)

# find Halide lib
set(HALIDE_PREFIX "/root/conda/envs/tc_build")
find_library(HALIDE_LIBRARIES REQUIRED NAMES Halide PATHS ${HALIDE_PREFIX} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH)
message(STATUS "Found Halide.so file: ${HALIDE_LIBRARIES}")

# find tc lib
link_directories(/root/TensorComprehensions/build/tc/aten)
link_directories(/root/TensorComprehensions/build/tc/lang)
link_directories(/root/TensorComprehensions/build/tc/core)
link_directories(/root/TensorComprehensions/build/tc/autotuner)
link_directories(/root/TensorComprehensions/build/tc/proto)

# torch(aten)
link_directories(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib)

LIST(APPEND SINGA_LINKER_LIBS ${HALIDE_LIBRARIES} tc_aten tc_lang tc_core_cpu tc_cuda tc_core_cuda_no_sdk tc_core tc_autotuner tc_proto ATen)
### Tensor comprehensions
ENDIF()
1 change: 1 addition & 0 deletions cmake/Templates/singa_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@
// #cmakedefine CUDNN_VERSION @CUDNN_VERSION@

#cmakedefine USE_MKLDNN
#cmakedefine USE_TC
93 changes: 93 additions & 0 deletions include/singa/core/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@
#include <tuple>
#include <memory>

#ifdef USE_TC
#include <dlpack/dlpack.h>
#include <tc/core/tensor.h>
#include <tc/utils/compiler_options.h>
#include <tc/core/compiler.h>
#include <tc/core/utils/time.h>
#include <tc/core/cuda/cuda_backend.h>
#include <tc/core/cuda/cuda_tc_executor.h>
#include <tc/core/cpu/cpu_backend.h>
#include <tc/core/cpu/cpu_tc_executor.h>
#include <tc/core/check.h>
#include <tc/core/tc_executor.h>
#endif // USE_TC

#include "singa/core/common.h"
#include "singa/core/device.h"
#include "singa/proto/core.pb.h"
Expand Down Expand Up @@ -603,6 +617,85 @@ Tensor ConcatRows(const vector<Tensor> &in);
Tensor ConcatenateColumns(const vector<Tensor> &in);
/// Alias name for function ConcatenateColumns
Tensor ConcatColumns(const vector<Tensor> &in);




#ifdef USE_TC
/// tc integration start
DLManagedTensor *toDLPack(const Tensor &src);

inline std::vector<tc::DLTensorUPtr>
makeDLTensors(const std::vector<Tensor> &tensors);

template <typename Backend>
std::unique_ptr<typename Backend::ExecutorType>
compileTC(const std::string &tc, const std::string &entryPoint,
const std::vector<Tensor> &inputs,
const typename Backend::MappingOptionsType &options,
const tc::CompilerOptions &compilerOptions = tc::CompilerOptions());

std::vector<tc::DLTensorUPtr>
inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
const std::vector<Tensor> &inputs);

std::vector<Tensor> prepareOutputs(const std::string &tc,
const std::string &entryPoint,
const std::vector<Tensor> &inputs);

template <typename Executor>
void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
std::vector<Tensor> &outputs);

// makeDLConstTensors implementation
inline std::vector<tc::DLConstTensorUPtr>
makeDLConstTensors(const std::vector<Tensor> &tensors) {
std::vector<tc::DLConstTensorUPtr> dlTensors;
for (auto tensor : tensors) {
auto dlMTensor = toDLPack(tensor);
dlTensors.push_back(tc::makeDLConstTensor(&(dlMTensor->dl_tensor)));
dlMTensor->deleter(dlMTensor);
}
return dlTensors;
}

// makeDLTensors implementation
inline std::vector<tc::DLTensorUPtr>
makeDLTensors(const std::vector<Tensor> &tensors) {
std::vector<tc::DLTensorUPtr> dlTensors;
for (auto tensor : tensors) {
auto dlMTensor = toDLPack(tensor);
dlTensors.push_back(tc::makeDLTensor(&(dlMTensor->dl_tensor)));
dlMTensor->deleter(dlMTensor);
}
return dlTensors;
}

// compile implementation
template <typename Backend>
std::unique_ptr<typename Backend::ExecutorType>
compileTC(const std::string &tc, const std::string &entryPoint,
const std::vector<Tensor> &inputs,
const typename Backend::MappingOptionsType &options,
const tc::CompilerOptions &compilerOptions) {
auto inputDLTensors = makeDLConstTensors(inputs);
return tc::compile<Backend>(tc, entryPoint, extractRawPtrs(inputDLTensors),
options, compilerOptions);
}

// run implementation
template <typename Executor>
void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
std::vector<Tensor> &outputs) {
auto inputDLTensors = makeDLConstTensors(inputs);
auto outputDLTensors = makeDLTensors(outputs);
return executor.run(extractRawPtrs(inputDLTensors),
extractRawPtrs(outputDLTensors));
}

/// tc integration end
#endif // USE_TC

} // namespace singa

#endif // SINGA_CORE_TENSOR_H_
113 changes: 113 additions & 0 deletions src/core/tensor/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@

#define Noaxis 9999

#ifdef USE_TC
// namespace is already exist in singa
// aliasing to avoid duplicates
namespace tclang = lang;
#endif // USE_TC

namespace singa {

Tensor::~Tensor() {
Expand Down Expand Up @@ -1334,4 +1340,111 @@ Tensor Reshape(const Tensor &in, const Shape &s) {
return out.Reshape(s);
}


#ifdef USE_TC
/// tc integration start
struct SingaDLManagedTensor {
Tensor handle;
DLManagedTensor tensor;
};

void deleter(DLManagedTensor *arg) {
delete static_cast<SingaDLManagedTensor *>(arg->manager_ctx);
}

static DLDataType getDLDataType(const Tensor &t) {
DLDataType dtype;
dtype.lanes = 1;
dtype.bits = SizeOf(t.data_type()) * 8;
switch (t.data_type()) {
case kFloat32:
dtype.code = DLDataTypeCode::kDLFloat;
break;
default:
throw std::logic_error("only kFloat32 is supported for dlpack conversion");
break;
}
return dtype;
}

static DLContext getDLContext(const Tensor &tensor, const int64_t &device_id) {
DLContext ctx;
ctx.device_id = device_id;
if (tensor.device()->lang() == kCuda) {
ctx.device_type = DLDeviceType::kDLGPU;
} else {
ctx.device_type = DLDeviceType::kDLCPU;
}
return ctx;
}

// This function returns a shared_ptr to memory managed DLpack tensor
// constructed out of ATen tensor
DLManagedTensor *toDLPack(const Tensor &src) {
SingaDLManagedTensor *singaDLManagedTensor(new SingaDLManagedTensor);
singaDLManagedTensor->handle = src;
singaDLManagedTensor->tensor.manager_ctx = singaDLManagedTensor;
singaDLManagedTensor->tensor.deleter = &deleter;
singaDLManagedTensor->tensor.dl_tensor.data = src.block()->mutable_data();
int64_t device_id = src.device()->id();
singaDLManagedTensor->tensor.dl_tensor.ctx = getDLContext(src, device_id);
singaDLManagedTensor->tensor.dl_tensor.ndim = src.nDim();
singaDLManagedTensor->tensor.dl_tensor.dtype = getDLDataType(src);

auto shapeVec =
new std::vector<int64_t>(src.shape().begin(), src.shape().end());
singaDLManagedTensor->tensor.dl_tensor.shape = shapeVec->data();

auto strideVec =
new std::vector<int64_t>(src.stride().begin(), src.stride().end());
singaDLManagedTensor->tensor.dl_tensor.strides = strideVec->data();

singaDLManagedTensor->tensor.dl_tensor.byte_offset = 0;
return &(singaDLManagedTensor->tensor);
}

// prepare output
std::vector<tc::DLTensorUPtr>
inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
const std::vector<Tensor> &inputs) {
auto parsedTcs = tc::detail::parse(tc);
if (parsedTcs.count(entryPoint) != 1u) {
TC_CHECK_GE(parsedTcs.size(), 1u)
<< "No TC was parsed, should have thrown earlier";
throw tclang::ErrorReport(parsedTcs.begin()->second)
<< "\nattempting to access undefined entryPoint: " << entryPoint;
}
auto inputDLTensors = makeDLConstTensors(inputs);
return makeDLTensorVector(tc::detail::inferOutputTensorInfo(
parsedTcs.at(entryPoint), extractRawPtrs(inputDLTensors)));
}

std::vector<Tensor> prepareOutputs(const std::string &tc,
const std::string &entryPoint,
const std::vector<Tensor> &inputs) {
std::vector<Tensor> outputs;
auto outTensorInfo = inferOutputTensorInfo(tc, entryPoint, inputs);
if (outTensorInfo.size() == 0) {
return outputs;
}
TC_CHECK_GE(inputs.size(), 1u)
<< "NYI: Need >= 1 input tensors to determine "
<< "backend and prepare ATen outputs. Add an overload with just an ATen "
<< "backend";

auto dev = inputs[0].device();
auto dtype = inputs[0].data_type();
for (size_t i = 0; i < outTensorInfo.size(); ++i) {
tc::TensorInfo info(outTensorInfo[i]);
Shape shape(info.shape.begin(), info.shape.end());

Tensor tmp(shape, dev, dtype);
outputs.push_back(tmp);
}
return outputs;
}
/// tc integration end
#endif // USE_TC


} // namespace singa
42 changes: 42 additions & 0 deletions src/model/operation/tc_fn.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*********************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
************************************************************/
#ifdef USE_TC
#include "./tc_fn.h"

namespace singa {

TcFnHandle::TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs)
{
tc_string = tcDefinition;
tc_name = entryFn;
auto naiveOptions = tc::CudaBackend::MappingOptionsType::makeNaiveMappingOptions();
pExecutor = singa::compileTC<tc::CudaBackend>(tcDefinition, entryFn, inputs, {naiveOptions});
};

Tensor tcExecute(const TcFnHandle &tcFnhandle, const std::vector<Tensor> &inputs)
{
auto outputs = singa::prepareOutputs(tcFnhandle.tc_string, tcFnhandle.tc_name, inputs);
singa::runTC(*(tcFnhandle.pExecutor), inputs, outputs);
return outputs[0];
}

}
#endif // USE_TC
42 changes: 42 additions & 0 deletions src/model/operation/tc_fn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*********************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
************************************************************/
//#ifndef SINGA_MODEL_OPERATION_TC_FN_H_
//#define SINGA_MODEL_OPERATION_TC_FN_H_

#ifdef USE_TC

#include "singa/core/tensor.h"

namespace singa {

class TcFnHandle {
public:
TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs);
std::string tc_string;
std::string tc_name;
std::unique_ptr<typename tc::CudaBackend::ExecutorType> pExecutor;
};

Tensor tcExecute(const TcFnHandle &smh, const std::vector<Tensor> &inputs);

} // namespace singa

#endif // USE_TC
Loading