Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ngraph_bridge/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ set(SRC
tf_graphcycles.cc
tf_deadness_analysis.cc
version.cc
ie_backend_engine.cc
ie_basic_engine.cc
ie_vadm_engine.cc
)

message(STATUS "NGRAPH_TF_USE_GRAPPLER_OPTIMIZER: ${NGRAPH_TF_USE_GRAPPLER_OPTIMIZER}")
Expand Down
57 changes: 38 additions & 19 deletions ngraph_bridge/executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
#include "logging/ngraph_log.h"
#include "ngraph_bridge/default_opset.h"
#include "ngraph_bridge/executable.h"
#include "ngraph_bridge/ie_basic_engine.h"
#include "ngraph_bridge/ie_tensor.h"
#include "ngraph_bridge/ie_utils.h"
#include "ngraph_bridge/ie_vadm_engine.h"
#include "ngraph_bridge/ngraph_utils.h"

using namespace std;
Expand Down Expand Up @@ -138,22 +141,25 @@ Executable::Executable(shared_ptr<Function> func, string device)
"ie_" + m_device + "_" + name;
}

NGRAPH_VLOG(2) << "Loading IE CNN network to device " << m_device;

// Load network to the plugin (m_device) and create an infer request
InferenceEngine::ExecutableNetwork exe_network =
ie.LoadNetwork(m_network, m_device, options);
m_infer_req = exe_network.CreateInferRequest();
NGRAPH_VLOG(2) << "Creating IE Execution Engine";
if (m_device == "HDDL") {
m_ie_engine = make_shared<IE_VADM_Engine>(m_network);
} else {
m_ie_engine = make_shared<IE_Basic_Engine>(m_network, m_device);
}
}

bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
vector<shared_ptr<runtime::Tensor>>& outputs) {
vector<shared_ptr<runtime::Tensor>>& outputs,
bool multi_req_execution) {
if (m_trivial_fn) {
NGRAPH_VLOG(2) << "Calling trivial IE function with inputs="
<< inputs.size() << " outputs=" << outputs.size();
return call_trivial(inputs, outputs);
}

shared_ptr<ngraph::Function> func = m_ie_engine->get_func();

// Check if the number of inputs that the CNN network expects is equal to the
// sum of the
// inputs specified and the inputs we hoisted, if any.
Expand All @@ -166,7 +172,8 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
}

// Prepare input blobs
auto func = m_network.getFunction();
std::vector<std::shared_ptr<IETensor>> ie_inputs(inputs.size());
std::vector<std::string> input_names(inputs.size());
auto parameters = func->get_parameters();
int j = 0;
for (int i = 0; i < inputs.size(); i++) {
Expand All @@ -179,18 +186,23 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,
NGRAPH_VLOG(1) << "Skipping unused input " << input_name;
continue;
}
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(inputs[i]);
m_infer_req.SetBlob(input_name, tv->get_blob());
ie_inputs[i] = nullptr;
ie_inputs[i] = static_pointer_cast<IETensor>(inputs[i]);
input_names[i] = input_name;
}

std::vector<std::shared_ptr<IETensor>> ie_hoisted_params(
m_hoisted_params.size());
std::vector<std::string> param_names(m_hoisted_params.size());
for (const auto& it : m_hoisted_params) {
auto input_name = it.first;
if (input_info.find(input_name) == input_info.end()) {
NGRAPH_VLOG(1) << "Skipping unused hoisted param " << input_name;
continue;
}
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(it.second);
m_infer_req.SetBlob(input_name, tv->get_blob());
ie_hoisted_params[j] = nullptr;
ie_hoisted_params[j] = static_pointer_cast<IETensor>(it.second);
param_names[j++] = input_name;
}

InferenceEngine::OutputsDataMap output_info = m_network.getOutputsInfo();
Expand All @@ -213,22 +225,29 @@ bool Executable::call(const vector<shared_ptr<runtime::Tensor>>& inputs,

// Prepare output blobs
auto results = func->get_results();
std::vector<std::shared_ptr<IETensor>> ie_outputs(outputs.size());
std::vector<std::string> output_names(outputs.size());
for (int i = 0; i < results.size(); i++) {
if (outputs[i] != nullptr) {
NGRAPH_VLOG(4) << "Executable::call() SetBlob()";
shared_ptr<IETensor> tv = static_pointer_cast<IETensor>(outputs[i]);
m_infer_req.SetBlob(get_output_name(results[i]), tv->get_blob());
ie_outputs[i] = static_pointer_cast<IETensor>(outputs[i]);
}
output_names[i] = get_output_name(results[i]);
}

if (multi_req_execution) {
m_ie_engine->enable_multi_req_execution();
} else {
m_ie_engine->disable_multi_req_execution();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed, isn't it disabled by default?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's added in case batching might be disabled during consecutive executions. But we don't have advanced checks for now and there is no scenario to disable batching if it's already enabled. I updated it accordingly.

}

m_infer_req.Infer();
m_ie_engine->infer(ie_inputs, input_names, ie_outputs, output_names,
ie_hoisted_params, param_names);

// Set dynamic output blobs
for (int i = 0; i < results.size(); i++) {
if (outputs[i] == nullptr) {
NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
auto blob = m_infer_req.GetBlob(get_output_name(results[i]));
outputs[i] = make_shared<IETensor>(blob);
// NGRAPH_VLOG(4) << "Executable::call() GetBlob()";
outputs[i] = ie_outputs[i];
}
}

Expand Down
13 changes: 12 additions & 1 deletion ngraph_bridge/executable.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <ie_core.hpp>
#include "ngraph/ngraph.hpp"
#include "ngraph_bridge/ie_backend_engine.h"

using namespace std;

Expand All @@ -35,12 +36,21 @@ class Executable {
Executable(shared_ptr<ngraph::Function> func, string device);
~Executable() {}
bool call(const vector<shared_ptr<ngraph::runtime::Tensor>>& inputs,
vector<shared_ptr<ngraph::runtime::Tensor>>& outputs);
vector<shared_ptr<ngraph::runtime::Tensor>>& outputs,
bool multi_req_execution = false);

const ngraph::ResultVector& get_results() {
return m_function->get_results();
};

const vector<size_t> get_output_shape(const int i) {
if (m_trivial_fn) {
return get_results()[i]->get_shape();
} else {
return m_ie_engine->get_output_shape(i);
}
}

private:
bool call_trivial(const vector<shared_ptr<ngraph::runtime::Tensor>>& inputs,
vector<shared_ptr<ngraph::runtime::Tensor>>& outputs);
Expand All @@ -56,6 +66,7 @@ class Executable {
shared_ptr<ngraph::Function> m_trivial_fn;
// This is the original nGraph function corresponding to this executable
shared_ptr<ngraph::Function> m_function;
shared_ptr<IE_Backend_Engine> m_ie_engine;
};
}
}
103 changes: 103 additions & 0 deletions ngraph_bridge/ie_backend_engine.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*******************************************************************************
* Copyright 2017-2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#include "ngraph_bridge/ie_backend_engine.h"
#include <iostream>
#include "ngraph_bridge/ie_utils.h"
Copy link
Contributor

@kanvi-nervana kanvi-nervana Dec 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

General guideline followed in the bridge
standard includes
TF
OV
nGraph
bridge


namespace tensorflow {
namespace ngraph_bridge {

IE_Backend_Engine::IE_Backend_Engine(InferenceEngine::CNNNetwork ie_network,
std::string device)
: m_network(ie_network),
m_func(ie_network.getFunction()),
m_device(device),
m_multi_req_execution(false),
m_network_ready(false) {
if (std::getenv("NGRAPH_TF_DUMP_GRAPHS")) {
auto& name = m_network.getName();
m_network.serialize(name + ".xml", name + ".bin");
}
}

IE_Backend_Engine::~IE_Backend_Engine() {}

void IE_Backend_Engine::load_network() {
if (m_network_ready) return;

std::map<std::string, std::string> config;

if (m_device == "MYRIAD") {
// Set MYRIAD configurations
if (IE_Utils::VPUConfigEnabled()) {
config["MYRIAD_DETECT_NETWORK_BATCH"] = "NO";
}

if (IE_Utils::VPUFastCompileEnabled()) {
config["MYRIAD_HW_INJECT_STAGES"] = "NO";
config["MYRIAD_COPY_OPTIMIZATION"] = "NO";
}
}

InferenceEngine::Core ie;
// Load network to the plugin (m_device)
m_exe_network = ie.LoadNetwork(m_network, m_device, config);
m_network_ready = true;
}

void IE_Backend_Engine::start_async_inference(const int req_id) {
// Start Async inference
try {
m_infer_reqs[req_id].StartAsync();
} catch (InferenceEngine::details::InferenceEngineException e) {
THROW_IE_EXCEPTION << "Couldn't start Inference: ";
} catch (...) {
THROW_IE_EXCEPTION << "Couldn't start Inference: ";
}
}

void IE_Backend_Engine::complete_async_inference(const int req_id) {
// Wait for Async inference completion
try {
m_infer_reqs[req_id].Wait(
InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
} catch (InferenceEngine::details::InferenceEngineException e) {
THROW_IE_EXCEPTION << " Exception with completing Inference: ";
} catch (...) {
THROW_IE_EXCEPTION << " Exception with completing Inference: ";
}
}

size_t IE_Backend_Engine::getOutputBatchSize(size_t inputBatchSize) const {
return m_network.getBatchSize() *
IE_Utils::GetNumRequests(inputBatchSize, m_device);
}

// Enables multi request execution if the execution engine supprts
void IE_Backend_Engine::enable_multi_req_execution() {
m_multi_req_execution = true;
}
// Disables multi request execution
void IE_Backend_Engine::disable_multi_req_execution() {
m_multi_req_execution = false;
}

std::shared_ptr<ngraph::Function> IE_Backend_Engine::get_func() {
return m_func;
}
}
}
72 changes: 72 additions & 0 deletions ngraph_bridge/ie_backend_engine.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*******************************************************************************
* Copyright 2017-2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#ifndef IE_BACKEND_ENGINE_H_
#define IE_BACKEND_ENGINE_H_

#include <ie_core.hpp>
#include <memory>
#include <string>
#include <vector>
#include "ngraph_bridge/ie_tensor.h"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

namespace tensorflow {
namespace ngraph_bridge {

class IE_Backend_Engine {
public:
IE_Backend_Engine(InferenceEngine::CNNNetwork ie_network, std::string device);
~IE_Backend_Engine();

// Executes the inference
virtual void infer(std::vector<std::shared_ptr<IETensor>>& inputs,
std::vector<std::string>& input_names,
std::vector<std::shared_ptr<IETensor>>& outputs,
std::vector<std::string>& output_names,
std::vector<std::shared_ptr<IETensor>>& hoisted_params,
std::vector<std::string>& param_names) = 0;

// Returns output batch size based on the input batch size and the device
// FIXME: This may not be needed
virtual size_t getOutputBatchSize(size_t inputBatchSize) const;

// Enables multi request execution if the execution engine supprts
void enable_multi_req_execution();
// Disables multi request execution
void disable_multi_req_execution();

// Returns the NGraph Function from the CNNNetwork
std::shared_ptr<ngraph::Function> get_func();

virtual const std::vector<size_t> get_output_shape(const int i) = 0;

protected:
InferenceEngine::CNNNetwork m_network;
std::shared_ptr<ngraph::Function> m_func;
std::vector<InferenceEngine::InferRequest> m_infer_reqs;
std::string m_device;
bool m_multi_req_execution;
InferenceEngine::ExecutableNetwork m_exe_network;
bool m_network_ready;

virtual void start_async_inference(const int req_id);
virtual void complete_async_inference(const int req_id);
virtual void load_network();
};
}
}

#endif // IE_BACKEND_ENGINE_H_
Loading