NCS | NCS2 | 说明 | |
---|---|---|---|
出品公司 | Movidius | Intel | 以色列公司 Movidius 在 2016 年被 Intel 收购 |
架构 | Mygrid 2 VPU | Mygrid X VPU | Mygrid X VPU 全名为 Intel®Movidius™Myriad™X VPU |
计算核心数 | 12 核 | 16 核 | |
深度神经网络推理 | 专用硬件加速 |
NCSDK(Movidius SDK) | OpenVINO | 说明 | |
---|---|---|---|
公司 | 原 Movidius 公司开发,后被 Intel 收购 | Intel 公司 | |
对 NCS 支持 | NCSDK v1/v2 仅支持一代 NCS | 支持 NCS 和 NCS2 | NCSDK v1 与 NCSDK v2 互不兼容。 NCSDK v1 基本已被弃用。 OpenVINO 实际包含 Movidius SDK |
功能 | 只做编码、解码的加速 | 集成 OpenCV、OpenVX 的支持 | |
支持框架 | Caffe、TensorFlow | Caffe、TensorFlow、MXNet、Kaldi、ONNX | |
API 接口 | C、Python | C++、Python | |
多 NCS 设备 | 提供 NCS 列表,用户决定在特定设备上推理 | 根据设备负载向 NCS 设备分发推理任务,无需管理 NCS 设备 | |
限制 | 目前支持的 ARM 设备:树莓派(Raspberry) | ||
支持设备 | NCS | Intel CPU、NCS、NCS2、Movidius VPU、Intel GPU、FPGA |
"""NCAPI v2"""
### initialization
from mvnc import mvncapi
######################## Device Initialization #######################
device_list = mvncapi.enumerate_devices()
device = mvncapi.Device(device_list[0])
device.open()
########################################################################
####################### Load Neural Network ########################
# Initialize a graph from file at some GRAPH_FILEPATH
GRAPH_FILEPATH = './graph'
with open(GRAPH_FILEPATH, mode='rb') as f:
graph_buffer = f.read()
graph = mvncapi.Graph('graph1')
# CONVENIENCE FUNCTION:
# Allocate the graph to the device and create input/output Fifos with default options in one call
input_fifo, output_fifo = graph.allocate_with_fifos(device, graph_buffer
########################################################################
######################## Obtain Input Tensor #########################
# Read and pre-process input (data type must match input Fifo data type)
input_tensor = ...
########################################################################
######################### Start Inference #########################
# CONVENIENCE FUNCTION:
# Write the image to the input queue and queue the inference in one call
graph.queue_inference_with_fifo_elem(input_fifo, output_fifo, input_tensor, None)
########################################################################
######################### Get Inference result #######################
# Get the results from the output queue
output, user_obj = output_fifo.read_elem()
########################################################################
# Do something with the results...
############################# Clean up ###############################
input_fifo.destroy()
output_fifo.destroy()
graph.destroy()
device.close()
device.destroy()
########################################################################
def main():
####################### Device Initialization ########################
# Plugin initialization for specified device and load extensions library if specified
plugin = IEPlugin(device="MYRIAD")
#########################################################################
######################### Load Neural Network #########################
# Read in Graph file (IR)
net = IENetwork.from_ir(model="graph1.xml", weights="graph1.bin")
input_blob = next(iter(net.inputs))
out_blob = next(iter(net.outputs))
# Load network to the plugin
exec_net = plugin.load(network=net)
del net
########################################################################
######################### Obtain Input Tensor ########################
# Obtain and preprocess input tensor (image)
# Read and pre-process input image maybe we don't need to show these details
image = cv2.imread("input_image.jpg")
# Preprocessing is neural network dependent maybe we don't show this
n, c, h, w = net.inputs[input_blob]
image = cv2.resize(image, (w, h))
image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW
image = image.reshape((n, c, h, w))
########################################################################
########################## Start Inference ##########################
# Start synchronous inference and get inference result
req_handle = exec_net.start_async(inputs={input_blob: image})
########################################################################
######################## Get Inference Result #########################
status = req_handle.wait()
res = req_handle.outputs[out_blob
# Do something with the results... (like print top 5)
top_ind = np.argsort(res[out_blob], axis=1)[0, -5:][::-1]
for i in top_ind:
print("%f #%d" % (res[out_blob][0, i], i))
############################### Clean Up ############################
del exec_net
del plugin
########################################################################
if __name__ == '__main__':
sys.exit(main() or 0)
// initialization
#include
/////////////////////// Device Initialization ///////////////////////
struct ncDeviceHandle_t* deviceHandle;
ncDeviceCreate(deviceIndex, &deviceHandle);
ncDeviceOpen(deviceHandle);
/////////////////////////////////////////////////////////////////////
////////////////////////// Load Neural Network //////////////////////
// Create the graph handle
struct ncGraphHandle_t* graphHandle = NULL;
ncGraphCreate("graph", &graphHandle);
// Initialize and read in a graph from some GRAPHFILE_PATH
fp = fopen(GRAPHFILE_PATH, "rb");
if(fp == NULL)
return 0;
fseek(fp, 0, SEEK_END);
*length = ftell(fp);
rewind(fp);
if(!(graphBuffer = (char*) malloc(*length))) {
fclose(fp);
}
if(fread(graphBuffer, 1, *length, fp) != *length) {
fclose(fp);
free(graphBuffer);
}
fclose(fp);
struct ncFifoHandle_t* inputFifo = NULL;
struct ncFifoHandle_t* outputFifo = NULL;
// CONVENIENCE FUNCTION:
// Allocate and create input/output fifos in one call
ncGraphAllocateWithFifos(deviceHandle, graphHandle, graphBuffer, graphLength, &inputFifo, &outputFifo);
/////////////////////////////////////////////////////////////////////
/////////////////////// Obtain Input Tensor ///////////////////////
// Read and preprocess input from image file or camera etc.
inputTensor = ...
/////////////////////////////////////////////////////////////////////
//////////////////////// Start Inference /////////////////////////
// CONVENIENCE FUNCTION:
// Write the image to the input queue and queue the inference in one call
ncGraphQueueInferenceWithFifoElem(graphHandle, &inputFifo, &outputFifo, inputTensor, &inputTensorLength, 0);
/////////////////////////////////////////////////////////////////////
/////////////////////// Get Inference Result ////////////////////////
// Get the results from the output queue
ncFifoReadElem(outputFifo, outputData, &outputdataLength, NULL);
/////////////////////////////////////////////////////////////////////
// Do something with the results...
///////////////////////////// Clean up /////////////////////////////
ncFifoDestroy(&inputFifo);
ncFifoDestroy(&outputFifo);
ncGraphDestroy(&graphHandle);
ncDeviceClose(deviceHandle);
ncDeviceDestroy(&deviceHandle);
/////////////////////////////////////////////////////////////////////
///////////////////// Device Initialization //////////////////////
// Plugin initialization
InferenceEngine::PluginDispatcher dispatcher({"../../../lib/intel64", ""});
InferencePlugin plugin(dispatcher.getSuitablePlugin(TargetDevice::eMYRIAD));
/////////////////////////////////////////////////////////////////////
////////////////////// Load Neural Network //////////////////////
// Read in Graph file (IR)
CNNNetReader network_reader;
network_reader.ReadNetwork(input_model);
network_reader.ReadWeights(input_model.substr(0, input_model.size() - 4) + ".bin");
network_reader.getNetwork().setBatchSize(1);
CNNNetwork network = network_reader.getNetwork();
// Prepare input blobs
auto input_info = network.getInputsInfo().begin()->second;
auto input_name = network.getInputsInfo().begin()->first;
input_info->setPrecision(Precision::U8);
// Prepare output blobs
auto output_info = network.getOutputsInfo().begin()->second;
auto output_name = network.getOutputsInfo().begin()->first;
output_info->setPrecision(Precision::FP32);
// Load network to the plugin
auto executable_network = plugin.LoadNetwork(network, {});
auto infer_request = executable_network.CreateInferRequest();
auto input = infer_request.GetBlob(input_name);
auto input_data = input->buffer().as<PrecisionTrait<Precision::U8>::value_type*>();
//////////////////////////////////////////////////////////////////////
/////////////////////// Obtain input tensor ////////////////////////
// Obtain and preprocess input tensor (image)
cv::Mat image = cv::imread(input_image_path);
cv::resize(image, image, cv::Size(input_info->getDims()[0], input_info->getDims()[1]));
size_t channels_number = input->dims()[2];
size_t image_size = input->dims()[1] * input->dims()[0];
for (size_t pid = 0; pid < image_size; ++pid) {
for (size_t ch = 0; ch < channels_number; ++ch) {
input_data[ch * image_size + pid] = image.at<cv::Vec3b>(pid)[ch];
}
}
//////////////////////////////////////////////////////////////////////
///////////////////////// Start Inference /////////////////////////
// Start synchronous inference and get inference result
infer_request.Infer();
//////////////////////////////////////////////////////////////////////
////////////////////// Get Inference Result ///////////////////////
auto output = infer_request.GetBlob(output_name);
auto output_data = output->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
//////////////////////////////////////////////////////////////////////
// Do something with the results... (like print top 5)
std::cout << std::endl << "Top " << results_to_display << " results:" << std::endl << std::endl;
for (size_t id = 0; id < results_to_display; ++id) {
std::cout.precision(7);
auto result = output_data[results[id]];
std::cout << std::left << std::fixed << result << " label #" << results[id] << std::endl;
}
////////////////////////////// clean up //////////////////////////////
// clean up done in destructors, nothing explicit to do.
///////////////////////////////////////////////////////////////////////