NeverMoreH

TensorRT学习笔记5 - 运行fc_plugin_caffe_mnist

安装依赖

cuda（安过）
cudnn（安过）
cuBLAS（在cuda toolkit里）
pybind11（放到/home/user/下）：git clone -b v2.2.3 https://github.com/pybind/pybind11.git
Pillow：pip3 install Pillow
pycuda：pip3 install ‘pycuda>=2017.1.1’
numpy：pip3 install numpy
argparse：pip3 install argparse

运行过程

cd /your/TensorRT/path/samples/python/fc_plugin_caffe_mnist
mkdir build & pushd build
cmake ..
make
popd
sudo python3 sample.py -d /home/user/tensorrt_tar/TensorRT/data/

代码解析

CMakeLists.txt

# 限定使用的最小cmake版本
cmake_minimum_required(VERSION 3.2 FATAL_ERROR)
# 项目名称：FCPlugin 编程语言：CXX和C（如果不指定LANGUAGES即为CXX和C）
project(FCPlugin LANGUAGES CXX C)

# 设置一个宏set_ifndef，当变量var没找到时，将其设定为val
macro(set_ifndef var val)
    if(NOT ${var})
        set(${var} ${val})
    endif()
    message(STATUS "Configurable variable ${var} set to ${${var}}")
endmacro()

# -------- 配置 --------
# 设置模块的名称为fcplugin，这个名称必须与pyFullyConnected.cpp中的名称一致
set_ifndef(PY_MODULE_NAME fcplugin)
# 设置C++标准为C++11
set(CMAKE_CXX_STANDARD 11)
# pybind11默认支持C++14，我们使用C++11标准
set(PYBIND11_CPP_STANDARD -std=c++11)
# $ENV{HOME}代表环境变量HOME，调用宏set_ifndef把它的下属文件夹pybind11赋值给变量PYBIND11_DIR
set_ifndef(PYBIND11_DIR $ENV{HOME}/pybind11/)

# 下面这些set_ifndef也是一些变量值的设置
set_ifndef(CUDA_VERSION 10.0)
set_ifndef(CUDA_ROOT /usr/local/cuda-${CUDA_VERSION})
set_ifndef(CUDNN_ROOT ${CUDA_ROOT})
set_ifndef(PYTHON_ROOT /usr/include)
set_ifndef(TRT_LIB_DIR /usr/lib/x86_64-linux-gnu)
set_ifndef(TRT_INC_DIR /usr/include/x86_64-linux-gnu)

# 输出提示信息：以下变量的值如果不显式提供，则从已得到的变量中派生得到
message("\nThe following variables are derived from the values of the previous variables unless provided explicitly:\n")

# 查找包含cuda_runtime_api.h的路径，将该路径赋值给变量_CUDA_INC_DIR
# HINTS ${CUDA_ROOT} 指定${CUDA_ROOT}为额外的搜索路径
# PATH_SUFFIXES include 指定额外要搜索的子目录include
find_path(_CUDA_INC_DIR cuda_runtime_api.h HINTS ${CUDA_ROOT} PATH_SUFFIXES include)
set_ifndef(CUDA_INC_DIR ${_CUDA_INC_DIR})

find_library(_CUDA_LIB cudart HINTS ${CUDA_ROOT} PATH_SUFFIXES lib lib64)
set_ifndef(CUDA_LIB ${_CUDA_LIB})

find_library(_CUBLAS_LIB cublas HINTS ${CUDA_ROOT} PATH_SUFFIXES lib lib64)
set_ifndef(CUBLAS_LIB ${_CUBLAS_LIB})

find_path(_CUDNN_INC_DIR cudnn.h HINTS ${CUDNN_ROOT} PATH_SUFFIXES include x86_64-linux-gnu)
set_ifndef(CUDNN_INC_DIR ${_CUDNN_INC_DIR})

find_library(_CUDNN_LIB cudnn HINTS ${CUDNN_ROOT} PATH_SUFFIXES lib lib64 x86_64-linux-gnu)
set_ifndef(CUDNN_LIB ${_CUDNN_LIB})

find_library(_TRT_INC_DIR NvInfer.h HINTS ${TRT_INC_DIR} PATH_SUFFIXES include x86_64-linux-gnu)
set_ifndef(TRT_INC_DIR ${_TRT_INC_DIR})

find_library(_NVINFER_LIB nvinfer HINTS ${TRT_LIB_DIR} PATH_SUFFIXES lib lib64 x86_64-linux-gnu)
set_ifndef(NVINFER_LIB ${_NVINFER_LIB})

find_library(_NVPARSERS_LIB nvparsers HINTS ${TRT_LIB_DIR} PATH_SUFFIXES lib lib64 x86_64-linux-gnu)
set_ifndef(NVPARSERS_LIB ${_NVPARSERS_LIB})

find_library(_NVINFER_PLUGIN_LIB nvinfer_plugin HINTS ${TRT_LIB_DIR} PATH_SUFFIXES lib lib64 x86_64-linux-gnu)
set_ifndef(NVINFER_PLUGIN_LIB ${_NVINFER_PLUGIN_LIB})

find_path(_PYTHON2_INC_DIR Python.h HINTS ${PYTHON_ROOT} PATH_SUFFIXES python2.7)
set_ifndef(PYTHON2_INC_DIR ${_PYTHON2_INC_DIR})

find_path(_PYTHON3_INC_DIR Python.h HINTS ${PYTHON_ROOT} PATH_SUFFIXES python3.7 python3.6 python3.5 python3.4)
set_ifndef(PYTHON3_INC_DIR ${_PYTHON3_INC_DIR})

# -------- 构建 --------

# 添加include文件夹
include_directories(${TRT_INC_DIR} ${CUDA_INC_DIR} ${CUDNN_INC_DIR} ${PYBIND11_DIR}/include/)

# 添加子目录，使我们可以检索pybind11_add_module
add_subdirectory(${PYBIND11_DIR} ${CMAKE_BINARY_DIR}/pybind11)

# GLOB会遍历指定目录下的文件，将符合的组成一个列表，赋值给变量
# GLOB_RECURSE会遍历${CMAKE_SOURCE_DIR}/plugin/目录和其子目录下的所有.cpp文件，将他们组成一个列表，赋值给变量SOURCE_FILES
file(GLOB_RECURSE SOURCE_FILES ${CMAKE_SOURCE_DIR}/plugin/*.cpp)

# Bindings library. The module name MUST MATCH the module name specified in the .cpp
if(PYTHON3_INC_DIR AND NOT (${PYTHON3_INC_DIR} STREQUAL "None"))
    pybind11_add_module(${PY_MODULE_NAME} SHARED THIN_LTO ${SOURCE_FILES})
    target_include_directories(${PY_MODULE_NAME} BEFORE PUBLIC ${PYTHON3_INC_DIR})
    target_link_libraries(${PY_MODULE_NAME} PRIVATE ${CUDNN_LIB} ${CUDA_LIB} ${CUBLAS_LIB} ${NVINFER_LIB} ${NVPARSERS_LIB} ${NVINFER_PLUGIN_LIB})
endif()

if(PYTHON2_INC_DIR AND NOT (${PYTHON2_INC_DIR} STREQUAL "None"))
    # Suffix the cmake target name with a 2 to differentiate from the Python 3 bindings target.
    pybind11_add_module(${PY_MODULE_NAME}2 SHARED THIN_LTO ${SOURCE_FILES})
    target_include_directories(${PY_MODULE_NAME}2 BEFORE PUBLIC ${PYTHON2_INC_DIR})
    target_link_libraries(${PY_MODULE_NAME}2 PRIVATE ${CUDNN_LIB} ${CUDA_LIB} ${CUBLAS_LIB} ${NVINFER_LIB} ${NVPARSERS_LIB} ${NVINFER_PLUGIN_LIB})
    # Rename to remove the .cpython-35... extension.
    set_target_properties(${PY_MODULE_NAME}2 PROPERTIES OUTPUT_NAME ${PY_MODULE_NAME} SUFFIX ".so")
    # Python 2 requires an empty __init__ file to be able to import.
    file(WRITE ${CMAKE_BINARY_DIR}/__init__.py "")
endif()

FullyConnected.h

#ifndef _FULLY_CONNECTED_H_
#define _FULLY_CONNECTED_H_

#include 
#include 
#include 
#include 
#include 
#include 

#include "NvInfer.h"
#include "NvCaffeParser.h"

#define CHECK(status) { if (status != 0) throw std::runtime_error(__FILE__ +  __LINE__ + std::string{"CUDA Error: "} + std::to_string(status)); }

// Helpers to move data to/from the GPU.
nvinfer1::Weights copyToDevice(const void* hostData, int count)
{
	void* deviceData;
	CHECK(cudaMalloc(&deviceData, count * sizeof(float)));
	CHECK(cudaMemcpy(deviceData, hostData, count * sizeof(float), cudaMemcpyHostToDevice));
	return nvinfer1::Weights{nvinfer1::DataType::kFLOAT, deviceData, count};
}

int copyFromDevice(char* hostBuffer, nvinfer1::Weights deviceWeights)
{
	*reinterpret_cast(hostBuffer) = deviceWeights.count;
	CHECK(cudaMemcpy(hostBuffer + sizeof(int), deviceWeights.values, deviceWeights.count * sizeof(float), cudaMemcpyDeviceToHost));
	return sizeof(int) + deviceWeights.count * sizeof(float);
}

class FCPlugin: public nvinfer1::IPluginExt
{
public:
	// In this simple case we're going to infer the number of output channels from the bias weights.
	// The knowledge that the kernel weights are weights[0] and the bias weights are weights[1] was
	// divined from the caffe innards
	FCPlugin(const nvinfer1::Weights* weights, int nbWeights)
	{
		assert(nbWeights == 2);
		mKernelWeights = copyToDevice(weights[0].values, weights[0].count);
		mBiasWeights = copyToDevice(weights[1].values, weights[1].count);
	}

	// Create the plugin at runtime from a byte stream.
	FCPlugin(const void* data, size_t length)
	{
		const char* d = reinterpret_cast(data);
		const char* check = d;
		// Deserialize kernel.
		const int kernelCount = reinterpret_cast(d)[0];
		mKernelWeights = copyToDevice(d + sizeof(int), kernelCount);
		d += sizeof(int) + mKernelWeights.count * sizeof(float);
		// Deserialize bias.
		const int biasCount = reinterpret_cast(d)[0];
		mBiasWeights = copyToDevice(d + sizeof(int), biasCount);
		d += sizeof(int) + mBiasWeights.count * sizeof(float);
		// Check that the sizes are what we expected.
		assert(d == check + length);
	}

	virtual int getNbOutputs() const override { return 1; }

	virtual nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) override
	{
		assert(index == 0 && nbInputDims == 1 && inputs[0].nbDims == 3);
		return nvinfer1::DimsCHW{static_cast(mBiasWeights.count), 1, 1};
	}

	virtual int initialize() override
	{
		CHECK(cudnnCreate(&mCudnn));
		CHECK(cublasCreate(&mCublas));
		// Create cudnn tensor descriptors for bias addition.
		CHECK(cudnnCreateTensorDescriptor(&mSrcDescriptor));
		CHECK(cudnnCreateTensorDescriptor(&mDstDescriptor));
		return 0;
	}

	virtual void terminate() override
	{
		CHECK(cudnnDestroyTensorDescriptor(mSrcDescriptor));
		CHECK(cudnnDestroyTensorDescriptor(mDstDescriptor));
		CHECK(cublasDestroy(mCublas));
		CHECK(cudnnDestroy(mCudnn));
	}

    // This plugin requires no workspace memory during build time.
	virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0; }

	virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override
	{
		int nbOutputChannels = mBiasWeights.count;
		int nbInputChannels = mKernelWeights.count / nbOutputChannels;
		constexpr float kONE = 1.0f, kZERO = 0.0f;
		// Do matrix multiplication.
		cublasSetStream(mCublas, stream);
		cudnnSetStream(mCudnn, stream);
		CHECK(cublasSgemm(mCublas, CUBLAS_OP_T, CUBLAS_OP_N, nbOutputChannels, batchSize, nbInputChannels, &kONE,
				reinterpret_cast(mKernelWeights.values), nbInputChannels,
				reinterpret_cast(inputs[0]), nbInputChannels, &kZERO,
				reinterpret_cast(outputs[0]), nbOutputChannels));
        // Add bias.
		CHECK(cudnnSetTensor4dDescriptor(mSrcDescriptor, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, nbOutputChannels, 1, 1));
		CHECK(cudnnSetTensor4dDescriptor(mDstDescriptor, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batchSize, nbOutputChannels, 1, 1));
		CHECK(cudnnAddTensor(mCudnn, &kONE, mSrcDescriptor, mBiasWeights.values, &kONE, mDstDescriptor, outputs[0]));
		return 0;
	}

	// For this sample, we'll only support float32 with NCHW.
	virtual bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const override
	{
		return (type == nvinfer1::DataType::kFLOAT && format == nvinfer1::PluginFormat::kNCHW);
	}

	void configureWithFormat(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize)
	{
		assert(nbInputs == 1 && inputDims[0].d[1] == 1 && inputDims[0].d[2] == 1);
		assert(nbOutputs == 1 && outputDims[0].d[1] == 1 && outputDims[0].d[2] == 1);
		assert(mKernelWeights.count == inputDims[0].d[0] * inputDims[0].d[1] * inputDims[0].d[2] * mBiasWeights.count);
	}

	virtual size_t getSerializationSize() override
	{
		return sizeof(int) * 2 + mKernelWeights.count * sizeof(float) + mBiasWeights.count * sizeof(float);
	}

	virtual void serialize(void* buffer) override
	{
		char* d = reinterpret_cast(buffer);
		const char* check = d;
		d += copyFromDevice(d, mKernelWeights);
		d += copyFromDevice(d, mBiasWeights);
		assert(d == check + getSerializationSize());
	}

	// Free buffers.
	virtual ~FCPlugin()
	{
		cudaFree(const_cast(mKernelWeights.values));
		mKernelWeights.values = nullptr;
		cudaFree(const_cast(mBiasWeights.values));
		mBiasWeights.values = nullptr;
	}

private:
	cudnnHandle_t mCudnn;
	cublasHandle_t mCublas;
	nvinfer1::Weights mKernelWeights{nvinfer1::DataType::kFLOAT, nullptr}, mBiasWeights{nvinfer1::DataType::kFLOAT, nullptr};
	cudnnTensorDescriptor_t mSrcDescriptor, mDstDescriptor;
};

class FCPluginFactory : public nvcaffeparser1::IPluginFactoryExt, public nvinfer1::IPluginFactory
{
public:
	bool isPlugin(const char* name) override { return isPluginExt(name); }

	bool isPluginExt(const char* name) override { return !strcmp(name, "ip2"); }

    // Create a plugin using provided weights.
	virtual nvinfer1::IPlugin* createPlugin(const char* layerName, const nvinfer1::Weights* weights, int nbWeights) override
	{
		assert(isPluginExt(layerName) && nbWeights == 2);
		assert(mPlugin == nullptr);
        // This plugin will need to be manually destroyed after parsing the network, by calling destroyPlugin.
		mPlugin = new FCPlugin{weights, nbWeights};
		return mPlugin;
	}

    // Create a plugin from serialized data.
	virtual nvinfer1::IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override
	{
		assert(isPlugin(layerName));
        // This will be automatically destroyed when the engine is destroyed.
		return new FCPlugin{serialData, serialLength};
	}

    // User application destroys plugin when it is safe to do so.
    // Should be done after consumers of plugin (like ICudaEngine) are destroyed.
	void destroyPlugin() { delete mPlugin; }

    FCPlugin* mPlugin{ nullptr };
};

#endif //_FULLY_CONNECTED_H

pyFullyConnected.cpp

#include "FullyConnected.h"
#include "NvInfer.h"
#include "NvCaffeParser.h"
#include 

PYBIND11_MODULE(fcplugin, m)
{
    namespace py = pybind11;

    // This allows us to use the bindings exposed by the tensorrt module.
    py::module::import("tensorrt");

    // Note that we only need to bind the constructors manually. Since all other methods override IPlugin functionality, they will be automatically available in the python bindings.
    // The `std::unique_ptr` specifies that Python is not responsible for destroying the object. This is required because the destructor is private.
    py::class_>(m, "FCPlugin")
        // Bind the normal constructor as well as the one which deserializes the plugin
        .def(py::init())
        .def(py::init())
    ;

    // Our custom plugin factory derives from both nvcaffeparser1::IPluginFactoryExt and nvinfer1::IPluginFactory
    py::class_(m, "FCPluginFactory")
        // Bind the default constructor.
        .def(py::init<>())
        // The destroy_plugin function does not override either of the base classes, so we must bind it explicitly.
        .def("destroy_plugin", &FCPluginFactory::destroyPlugin)
    ;
}

sample.py

# This sample uses a Caffe model along with a custom plugin to create a TensorRT engine.
from random import randint
from PIL import Image
import numpy as np
import tempfile

import pycuda.driver as cuda
import pycuda.autoinit

import tensorrt as trt

try:
    from build import fcplugin
except ImportError as err:
    raise ImportError("""ERROR: Failed to import module ({})
Please build the FullyConnected sample plugin.
For more information, see the included README.md
Note that Python 2 requires the presence of `__init__.py` in the build folder""".format(err))

# Allows us to import from common.
import sys, os
sys.path.insert(1, os.path.join(sys.path[0], ".."))
import common

# You can set the logger severity higher to suppress messages (or lower to display more messages).
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

# Define some global constants about the model.
class ModelData(object):
    INPUT_NAME = "input"
    INPUT_SHAPE = (1, 28, 28)
    OUTPUT_NAME = "prob"
    OUTPUT_SHAPE = (10, )
    DTYPE = trt.float32

# Uses a parser to retrieve mean data from a binary_proto.
def retrieve_mean(mean_proto):
    with trt.CaffeParser() as parser:
        return parser.parse_binary_proto(mean_proto)

# Create the parser's plugin factory. The factory is global because it has
# to be destroyed after the engine is destroyed.
fc_factory = fcplugin.FCPluginFactory()

# For more information on TRT basics, refer to the introductory parser samples.
def build_engine(deploy_file, model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        # Set the parser's plugin factory. Note that we bind the factory to a reference so
        # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute)
        parser.plugin_factory_ext = fc_factory

        # Parse the model and build the engine.
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)

# Tries to load an engine from the provided engine_path, or builds and saves an engine to the engine_path.
def get_engine(deploy_file, model_file, engine_path):
    try:
        with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            # Note that we have to provide the plugin factory when deserializing an engine built with an IPlugin or IPluginExt.
            return runtime.deserialize_cuda_engine(f.read(), fc_factory)
    except:
        # Fallback to building an engine if the engine cannot be loaded for any reason.
        engine = build_engine(deploy_file, model_file)
        with open(engine_path, "wb") as f:
            f.write(engine.serialize())
        return engine

# Loads a test case into the provided pagelocked_buffer.
def load_normalized_test_case(data_path, mean):
    case_num = randint(0, 9)
    test_case_path = os.path.join(data_path, str(case_num) + ".pgm")
    # Flatten the image into a 1D array, and normalize.
    img = np.array(Image.open(test_case_path)).ravel() - mean
    return img, case_num

def main():
    # Get data files for the model.
    data_path, [deploy_file, model_file, mean_proto] = common.find_sample_data(description="Runs an MNIST network using a Caffe model file", subfolder="mnist", find_files=["mnist.prototxt", "mnist.caffemodel", "mnist_mean.binaryproto"])

    # Cache the engine in a temporary directory.
    engine_path = os.path.join(tempfile.gettempdir(), "mnist.engine")
    with get_engine(deploy_file, model_file, engine_path) as engine, engine.create_execution_context() as context:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        mean = retrieve_mean(mean_proto)
        # For more information on performing inference, refer to the introductory samples.
        inputs[0].host, case_num = load_normalized_test_case(data_path, mean)
        # The common.do_inference function will return a list of outputs - we only have one in this case.
        [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
        pred = np.argmax(output)
        print("Test Case: " + str(case_num))
        print("Prediction: " + str(pred))

    # After the engine is destroyed, we destroy the plugin. This function is exposed through the binding code in plugin/pyFullyConnected.cpp.
    fc_factory.destroy_plugin()

if __name__ == "__main__":
    main()

遇到的问题

在cmake时出现variables NOTFOUND，如下图所示:
解决办法：在cmake时对他们的目录进行指定
指令：cmake .. -DNVINFER_LIB=/home/user/tensorrt_tar/TensorRT-5.1.5.0/lib/libnvinfer.so
-D_NVINFER_PLUGIN_LIB=/home/user/tensorrt_tar/TensorRT-5.1.5.0/lib/
-D_NVPARSERS_LIB=/home/user/tensorrt_tar/TensorRT-5.1.5.0/lib/
在make的时候出现fatal error: NvInfer.h: No such file or directory，如下图所示：
解决办法：在cmake时指定TRT_INC_DIR的目录
指令：cmake .. -DNVINFER_LIB=/home/user/tensorrt_tar/TensorRT-5.1.5.0/lib/libnvinfer.so
   -D_NVINFER_PLUGIN_LIB=/home/user/tensorrt_tar/TensorRT-5.1.5.0/lib/
   -D_NVPARSERS_LIB=/home/user/tensorrt_tar/TensorRT-5.1.5.0/lib/
   -DTRT_INC_DIR=/home/user/tensorrt_tar/TensorRT-5.1.5.0/include/
在python sample.py时出现/usr/src/tensorrt/data/mnist does not exist，如下图：
解决办法：指定data目录
指令：sudo python3 sample.py -d /home/user/tensorrt_tar/TensorRT-5.1.5.0/data/
在python sample.py时出现段错误
解决方法：重启电脑
ImportError: libnvonnxparser.so.0: cannot open shared object file: No such file or directory
解决办法：将这个文件从TensorRT的lib目录中拷贝至/usr/lib
指令：sudo cp /home/user/tensorrt_tar/TensorRT-5.1.5.0/targets/x86_64-linux-gnu/lib/libnvonnxparser.so.0 /usr/lib/
ImportError: libnvonnxparser_runtime.so.0: cannot open shared object file: No such file or directory
解决方法：同上
指令：sudo cp /home/user/tensorrt_tar/TensorRT-5.1.5.0/targets/x86_64-linux-gnu/lib/libnvonnxparser_runtime.so.0 /usr/lib/
ImportError: libnvinfer_plugin.so.5: cannot open shared object file: No such file or directory
解决方法：同上
指令：sudo cp /home/user/tensorrt_tar/TensorRT-5.1.5.0/targets/x86_64-linux-gnu/lib/libnvinfer_plugin.so.5 /usr/lib/

模型实战（21）之 C++ - tensorRT部署yolov8-det 目标检测明月醉窗台 #深度学习实战例程人工智能 c++YOLO 目标检测计算机视觉人工智能
C++-tensorRT部署yolov8-det目标检测python环境下如何直接调用推理模型转换并导出：pt->onnx->.engineC++tensorrt部署检测模型不写废话了，直接上具体实现过程+all代码1.Python环境下推理直接命令行推理，巨简单yolodetectpredictmodel=yolov8n.ptsource='https
【深度学习】大模型GLM-4-9B Chat ，微调与部署(3) TensorRT-LLM、TensorRT量化加速、Triton部署 XD742971636 深度学习机器学习深度学习人工智能
文章目录获取TensorRT-LLM代码：构建docker镜像并安装TensorRT-LLM：运行docker镜像：安装依赖魔改下部分package代码：量化：构建图：全局参数插件配置常用配置参数测试推理是否可以代码推理CLI推理性能测试小结验证是否严重退化使用NVIDIATriton部署在线推理服务器代码弄下来编译镜像启动容器安装依赖量化构建trtengines图Triton模板说明实操发起Tr
Jetson Orin NX Super安装TensorRT-LLM u013250861 #LLM/部署&推理 elasticsearch 大数据搜索引擎
根据图片中显示的JetsonOrinNXSuper系统环境（JetPack6.2+CUDA12.6+TensorRT10.7），以下是针对该平台的TensorRT-LLM安装优化方案：一、环境适配调整基于你的实际配置：JetPack6.2（含CUDA12.6,TensorRT10.7）Python3.10.12aarch64架构需选择适配的TensorRT-LLM版本。由于官方预编译包可能未覆盖此
TensorRT-LLM：大模型推理加速引擎的架构与实践
前言：技术背景与发展历程：随着GPT-4、LLaMA等千亿级参数模型的出现，传统推理框架面临三大瓶颈：显存占用高（单卡可达80GB）、计算延迟大（生成式推理需迭代处理）、硬件利用率低（Transformer结构存在计算冗余）。根据MLPerf基准测试，原始PyTorch推理的token生成速度仅为12.3tokens/s（A100显卡）。一、TensorRT-LLM介绍：TensorRT-LLM是
【TensorRT】TensorRT及加速原理浩瀚之水_csdn tensorrt
一、TensorRT架构概览TensorRT是NVIDIA推出的高性能推理优化器，专为GPU加速设计。其核心架构分为三层：前端解析器支持ONNX/UFF/Caffe等格式的模型解析执行格式验证和初步结构优化优化引擎核心优化层（层融合、精度校准、内存优化等）生成优化后的计算图（OptimizedGraph）运行时环境管理GPU内存分配执行优化后的计算图二、核心加速原理（8大关键技术）1.层融合（La
使用numpy或pytorch校验两个张量是否相等
文章目录1、numpy2、pytorch做算法过程中，如果涉及到模型落地，那必然会将原始的深度学习的框架训练好的模型转换成目标硬件模型的格式，如onnx,tensorrt,openvino,tflite;那么就有对比不同格式模型输出的一致性，从而判断模型转换是否成功。1、numpy用到的核心代码就一行，就是：importnumpyasnpnp.testing.assert_allclose(act
YOLOV10的tensorrt C++部署 dddccc1234 YOLO
根据博客进行python版本安装YOLOv10最全使用教程（含ONNX和TensorRT推理）-CSDN博客并将pt转为onnx：yoloexportmodel=yolov10s.ptformat=onnxopset=13simplify然后采用：https://github.com/hamdiboukamcha/yolov10-tensorrt.git进行c++编译配置好cuda11.7tens
tensorRT 与 torchserve-GPU性能对比 joker-G 计算机视觉 pytorch python
实验对比前端时间搭建了TensorRT、Torchserve-GPU，最近抽时间将这两种方案做一个简单的实验对比。实验数据Cuda11.0、Xeon®62423.1*80、RTX309024G、Resnet50TensorRT、Torchserve-GPU各自一张卡搭建10进程接口，感兴趣的可以查看我个人其他文章。30进程并发、2000张1200*720像素图像的总量数据TensorRT的部署使用
YOLOv8模型在RDK5开发板上的部署指南：.pt到.bin转换与优化实践 pk_xz123456 python 算法仿真模型 YOLO 人工智能 rnn 深度学习开发语言 lstm
以下是针对在RDK5开发板（基于NVIDIAJetsonOrin平台）部署YOLOv8模型的详细技术指南，涵盖从模型转换、优化到部署的全流程：YOLOv8模型在RDK5开发板上的部署指南：.pt到.bin转换与优化实践——基于TensorRT的高性能嵌入式部署方案第一章：技术背景与核心概念1.1RDK5开发板硬件架构NVIDIAJetsonOrinNX核心参数：1024-coreAmpereGPU
Pytorch模型安卓部署 python&java pytorch 人工智能 python
Pytorch是一种流行的深度学习框架，用于算法开发，而Android是一种广泛应用的操作系统，多应用于移动设备当中。目前多数的研究都是在于算法上，个人觉得把算法落地是一件很有意思的事情，因此本人准备分享一些模型落地的文章(后续可能分享微信小程序部署，PyQt部署以及exe打包，ncnn部署，tensorRT部署，MNN部署)。本篇文章主要分享Pytorch的Android端部署。看这篇文章的读者
昇腾AI生态组件全解析：与英伟达生态的深度对比
随着人工智能技术的快速发展，国产AI芯片的崛起正在改变全球计算产业的格局。华为昇腾（Ascend）系列AI处理器凭借自主创新的达芬奇架构，构建了完整的软硬件生态体系。本文将从核心组件对比、显卡性能对标两个维度，深入剖析昇腾与英伟达（NVIDIA）生态的技术差异与适用场景。一、昇腾核心组件与英伟达对标分析1.推理引擎：MindIEvsTensorRT昇腾MindIE1.0.0基于昇腾芯片的深度学习推
【推理加速】TensorRT C++ 部署YOLO11全系模型 gloomyfish c++开发语言
YOLO11YOLO11C++推理YOLO11是Ultralytics最新发布的目标检测、实例分割、姿态评估的系列模型视觉轻量化框架，基于前代YOLO8版本进行了多项改进和优化。YOLO11在特征提取、效率和速度、准确性以及环境适应性方面都有显著提升，达到SOTA。TensorRTC++SDK最新版本的TensorRT10.x版本已经修改了推理的接口函数与查询输入输出层的函数，其中以YOLO11对
Java全栈AI平台实战：从模型训练到部署的革命性突破——Spring AI+Deeplearning4j+TensorFlow Java API深度解析墨夶 Java学习资料3 java 人工智能 spring
一、背景与需求：为什么需要Java驱动的AI平台？某医疗影像公司面临以下挑战：多语言开发混乱：Python训练模型，C++部署推理，Java调用服务，导致维护成本高昂部署效率低下：PyTorch模型需手动转换ONNX格式，TensorRT优化耗时2小时/模型实时性不足：视频流分析延迟达3秒，无法满足急诊场景需求通过Java全栈AI平台，我们实现了：端到端开发：Java调用PyTorch训练模型，直
【Bug】Could not locate zlibwapi.dll. Please make sure it is in your library path!
报错信息：使用tensort加速，cmake编译失败，提示缺少zlibwapi.dll文件Couldnotlocatezlibwapi.dll.Pleasemakesureitisinyourlibrarypath!解决方案：从以下链接下载zlibwapi.dllhttp://www.winimage.com/zLibDll/我是在windows10系统下进行的TensorRT加速下载得到的压缩包
win10安装wsl2(ubuntu20.04)并安装 TensorRT-8.6.1.6、cuda_11.6、cudnn 狄龙疤 wsl wsl2 win10 tensorrt cuda cudnn ubuntu
参考博客：1.CUDA】如何在windows上安装Ollama3+openwebui（docker+WSL2+ubuntu+nvidia-container）：https://blog.csdn.net/smileyan9/article/details/1403916672.在Windows10上安装WSL2：https://download.csdn.net/blog/column/10991
【代码分析】TensorRT sampleINT8 详解 HaoBBNuanMM
目录前言代码分析Main入口构建(Build)网络BatchStream推理(Infer)过程资源释放前言TensorRT可以通过INT8量化处理网络，然后大幅加速网络推理速度，本文旨在详细分析MNISTINT8Sample的代码，解释如何使用TensorRT对网络做INT8量化处理。关于INT8量化的背景知识可以参考博文TensorRTINT8校准与量化原理代码分析sampleINT8的gith
TensorRT × TVM 联合优化实战：多架构异构平台的统一推理加速与性能调优全流程观熵大模型高阶优化技术专题架构人工智能
TensorRT×TVM联合优化实战：多架构异构平台的统一推理加速与性能调优全流程关键词TensorRT、TVM、异构推理优化、跨平台部署、GPU加速、NPU融合、自动调度、深度学习推理引擎、性能调优摘要在深度学习模型推理部署场景中，面对GPU、NPU、CPU等多架构异构平台的并存，如何实现统一的高性能推理优化成为企业工程落地的关键挑战。本文聚焦TensorRT与TVM的联合优化策略，从平台结构适
retinaface在ubuntu20.04(wsl2)下使用tensorrt(c++)部署狄龙疤 c++retinaface tensorrt cuda opencv 人脸识别神经网络模型
1.参考博客：1.RetinafaceTensorrtPython/C++部署：https://blog.csdn.net/weixin_45747759/article/details/1245340792.B站视频教程：https://www.bilibili.com/video/BV1Nv4y1K727/3.Retinaface_Tensorrtgithub仓库：https://github
独家首发！低照度环境下YOLOv8的增强方案——从理论到TensorRT部署向哆哆 YOLO 架构 yolov8
文章目录引言一、低照度图像增强技术现状1.1传统低照度增强方法局限性1.2深度学习-based方法进展二、Retinexformer网络原理2.1Retinex理论回顾2.2Retinexformer创新架构2.2.1光照感知Transformer2.2.2多尺度Retinex分解2.2.3自适应特征融合三、YOLOv8-Retinexformer实现3.1网络架构修改3.2联合训练策略四、实验与
win10 环境进行 python + pytorch + yolov8 + tensorRT( c++版 ) 测试过程记录狄龙疤 python pytorch c++cuda tensorRT yolov8 计算机视觉
参考博客：1.YOLOv8模型转换pt-＞onnx(附上代码)：https://blog.csdn.net/2303_80018785/article/details/1381949612.yolov8的TensorRT部署（C++版本）：https://blog.csdn.net/liujiahao123987/article/details/133892746test.cpp就是使用此博客的d
【实战分享】TensorRT+LLM：大模型推理性能优化初探 fengbeely java
TensorRT-LLM初体验千呼万唤始出来，备受期待的Tensorrt-LLM终于发布，发布版本0.5.0。github:https://github.com/NVIDIA/TensorRT-LLM/tree/main1.介绍TensorRT-LLM可以视为TensorRT和FastTransformer的结合体，旨为大模型推理加速而生。1.1丰富的优化特性除了FastTransformer对T
NIPS-2013《Distributed PCA and $k$-Means Clustering》 Christo3 机器学习 kmeans 算法大数据人工智能
推荐深蓝学院的《深度神经网络加速：cuDNN与TensorRT》，课程面向就业，细致讲解CUDA运算的理论支撑与实践，学完可以系统化掌握CUDA基础编程知识以及TensorRT实战，并且能够利用GPU开发高性能、高并发的软件系统，感兴趣可以直接看看链接：深蓝学院《深度神经网络加速：cuDNN与TensorRT》核心思想该论文的核心思想是将主成分分析（PCA）与分布式kkk-均值聚类相结合，提出一种
NVIDIA 实现通义千问 Qwen3 的生产级应用集成和部署【2025年 5月 2日】 u013250861 #LLM/部署&推理 jetson
阿里巴巴近期发布了其开源的混合推理大语言模型（LLM）通义千问Qwen3，此次Qwen3开源模型系列包含两款混合专家模型(MoE)235B-A22B（总参数2,350亿，激活参数220亿）和30B-A3B，以及六款稠密（Dense）模型0.6B、1.7B、4B、8B、14B、32B。现在，开发者能够基于NVIDIAGPU，使用NVIDIATensorRT-LLM、Ollama、SGLang、vLL
YOLO学习笔记｜ YOLO11对象检测，实例分割，姿态评估的TensorRT部署c++ 单北斗SLAMer YOLO学习从零到1 YOLO 机器学习深度学习 c++python
以下是YOLOv11在TensorRT上部署的步骤指南，涵盖对象检测、实例分割和姿态评估：1.模型导出与转换1.1导出ONNX模型importtorchfrommodels.experimentalimportattempt_loadmodel=attempt_load('yolov11s.pt',fuse=True)model.eval
✅ TensorRT Python 安装精简流程（适用于 Ubuntu 20.04+） dbcccccsds python ubuntu 开发语言
安装TensorRTPython轮子的步骤确保pip和wheel模块已更新并安装：参考链接python3-mpipinstall--upgradepippython3-mpipinstallwheel1.确认环境要求Python：版本3.8-3.13OS：Ubuntu20.04+或Windows10+CPU：x86_64或ARMSBSA架构安装前确保pip、wheel是最新的：python3-mp
TensorRT-LLM——优化大型语言模型推理以实现最大性能的综合指南知来者逆 LLM 语言模型人工智能自然语言处理 TensorRT LLM 大语言模型深度学习
引言随着对大型语言模型(LLM)的需求不断增长，确保快速、高效和可扩展的推理变得比以往任何时候都更加重要。NVIDIA的TensorRT-LLM通过提供一套专为LLM推理设计的强大工具和优化，TensorRT-LLM可以应对这一挑战。TensorRT-LLM提供了一系列令人印象深刻的性能改进，例如量化、内核融合、动态批处理和多GPU支持。这些改进使推理速度比传统的基于CPU的方法快8倍，从而改变了
tensorrt部署yolov8 张张张子 YOLO python 边缘计算
记录一下部署过程遇到的问题，我是要再jstson上部署，首先导出onnx文件，没什么问题，然后又两种方案转为engine文件1：trtexec.exe--onnx=best.onnx--saveEngine=best.engine--fp16tensorrt库命令转换，过程中会遇到一些问题，这里不细讲了，可以查。2：用yolov8官方版本转换，较为容易，官方库写的比较好最后会得到trt文件或eng
YOLOv8 TensorRT 部署（Python 推理）保姆级教程码农的日常搅屎棍 YOLO python
本教程手把手教你如何在NVIDIAGPU或RK3588上部署YOLOv8TensorRT推理，让你从零基础到高性能AI推理！1.部署前的准备1.1硬件要求NVIDIAGPU（如RTX3060/4090、Jetson系列）或RK3588NPU（支持TensorRT）CUDA（如11.x）、cuDNN、TensorRT已正确安装可运行nvcc--version、dpkg-l|grepTensorRT检
深度学习部署包含哪些步骤？不学习怎么给老板打工？深度学习
深度学习部署包含哪些步骤？阶段说明示例工具模型导出把.pt、.h5等格式模型导出为通用格式（如ONNX）PyTorch,TensorFlow,ONNX推理优化减小模型体积、加速推理（量化、剪枝）TensorRT,ONNXRuntime系统集成将模型嵌入业务系统中运行（桌面、服务器、边缘设备）C++/C#/Python接口，Flask/Qt/WebApi上线运行打包运行环境，部署在云端、本地或设备上
TensorRT-YOLOv9：高效实时目标检测的利器幸生朋Margot
TensorRT-YOLOv9：高效实时目标检测的利器tensorrt-yolov9CppandpythonimplementationofYOLOv9usingTensorRTAPI项目地址:https://gitcode.com/gh_mirrors/te/tensorrt-yolov9项目介绍TensorRT-YOLOv9是一个基于YOLOv9的高性能目标检测模型实现，通过NVIDIA的Te
TOMCAT在POST方法提交参数丢失问题 357029540 java tomcat jsp
摘自http://my.oschina.net/luckyi/blog/213209 昨天在解决一个BUG时发现一个奇怪的问题，一个AJAX提交数据在之前都是木有问题的，突然提交出错影响其他处理流程。检查时发现页面处理数据较多，起初以为是提交顺序不正确修改后发现不是由此问题引起。于是删除掉一部分数据进行提交，较少数据能够提交成功。恢复较多数据后跟踪提交FORM DATA ，发现数
在MyEclipse中增加JSP模板删除-2008-08-18 ljy325 jsp xml MyEclipse
在D:\Program Files\MyEclipse 6.0\myeclipse\eclipse\plugins\com.genuitec.eclipse.wizards_6.0.1.zmyeclipse601200710\templates\jsp 目录下找到Jsp.vtl，复制一份，重命名为jsp2.vtl,然后把里面的内容修改为自己想要的格式，保存。然后在 D:\Progr
JavaScript常用验证脚本总结 eksliang JavaScript javaScript表单验证
转载请出自出处：http://eksliang.iteye.com/blog/2098985 下面这些验证脚本，是我在这几年开发中的总结，今天把他放出来，也算是一种分享吧，现在在我的项目中也在用！包括日期验证、比较，非空验证、身份证验证、数值验证、Email验证、电话验证等等...! &nb
微软BI（4） 18289753290 微软BI SSIS
1） Q:查看ssis里面某个控件输出的结果： A MessageBox.Show(Dts.Variables["v_lastTimestamp"].Value.ToString()); 这是我们在包里面定义的变量 2):在关联目的端表的时候如果是一对多的关系，一定要选择唯一的那个键作为关联字段。 3) Q：ssis里面如果将多个数据源的数据插入目的端一
定时对大数据量的表进行分表对数据备份酷的飞上天空大数据量
工作中遇到数据库中一个表的数据量比较大，属于日志表。正常情况下是不会有查询操作的，但如果不进行分表数据太多，执行一条简单sql语句要等好几分钟。。分表工具：linux的shell + mysql自身提供的管理命令原理：使用一个和原表数据结构一样的表，替换原表。 linux shell内容如下： =======================开始
本质的描述与因材施教永夜-极光感想随笔
不管碰到什么事,我都下意识的想去探索本质,找寻一个最形象的描述方式。我坚信,世界上对一件事物的描述和解释,肯定有一种最形象,最贴近本质,最容易让人理解 &
很迷茫。。。随便小屋随笔
小弟我今年研一，也是从事的咱们现在最流行的专业（计算机）。本科三流学校，为了能有个更好的跳板，进入了考研大军，非常有幸能进入研究生的行业（具体学校就不说了，怕把学校的名誉给损了）。先说一下自身的条件，本科专业软件工程。主要学习就是软件开发，几乎和计算机没有什么区别。因为学校本身三流，也就是让老师带着学生学点东西，然后让学生毕业就行了。对专业性的东西了解的非常浅。就那学的语言来说
23种设计模式的意图和适用范围 aijuans 设计模式
Factory Method 意图定义一个用于创建对象的接口，让子类决定实例化哪一个类。Factory Method 使一个类的实例化延迟到其子类。　　适用性当一个类不知道它所必须创建的对象的类的时候。　　当一个类希望由它的子类来指定它所创建的对象的时候。　　当类将创建对象的职责委托给多个帮助子类中的某一个，并且你希望将哪一个帮助子类是代理者这一信息局部化的时候。 Abstr
Java中的synchronized和volatile aoyouzi java volatile synchronized
说到Java的线程同步问题肯定要说到两个关键字synchronized和volatile。说到这两个关键字，又要说道JVM的内存模型。JVM里内存分为main memory和working memory。 Main memory是所有线程共享的，working memory则是线程的工作内存，它保存有部分main memory变量的拷贝，对这些变量的更新直接发生在working memo
js数组的操作和this关键字百合不是茶 js 数组操作 this关键字
js数组的操作; 一:数组的创建: 1、数组的创建 var array = new Array();　//创建一个数组 var array = new Array([size]);　//创建一个数组并指定长度，注意不是上限，是长度 var arrayObj = new Array([element0[, element1[, ...[, elementN]]]
别人的阿里面试感悟 bijian1013 面试分享工作感悟阿里面试
原文如下：http://greemranqq.iteye.com/blog/2007170 一直做企业系统，虽然也自己一直学习技术，但是感觉还是有所欠缺，准备花几个月的时间，把互联网的东西，以及一些基础更加的深入透析，结果这次比较意外，有点突然，下面分享一下感受吧！ &nb
淘宝的测试框架Itest Bill_chen spring maven 框架单元测试 JUnit
Itest测试框架是TaoBao测试部门开发的一套单元测试框架，以Junit4为核心，集合DbUnit、Unitils等主流测试框架，应该算是比较好用的了。近期项目中用了下，有关itest的具体使用如下： 1.在Maven中引入itest框架： <dependency> <groupId>com.taobao.test</groupId&g
【Java多线程二】多路条件解决生产者消费者问题 bit1129 java多线程
package com.tom; import java.util.LinkedList; import java.util.Queue; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.locks.Condition; import java.util.concurrent.loc
汉字转拼音pinyin4j 白糖_ pinyin4j
以前在项目中遇到汉字转拼音的情况，于是在网上找到了pinyin4j这个工具包，非常有用，别的不说了，直接下代码： import java.util.HashSet; import java.util.Set; import net.sourceforge.pinyin4j.PinyinHelper; import net.sourceforge.pinyin
org.hibernate.TransactionException: JDBC begin failed解决方案 bozch ssh 数据库异常 DBCP
org.hibernate.TransactionException: JDBC begin failed: at org.hibernate.transaction.JDBCTransaction.begin(JDBCTransaction.java:68) at org.hibernate.impl.SessionImp
java-并查集（Disjoint-set）-将多个集合合并成没有交集的集合 bylijinnan java
import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.ut
Java PrintWriter打印乱码 chenbowen00 java
一个小程序读写文件，发现PrintWriter输出后文件存在乱码，解决办法主要统一输入输出流编码格式。读文件： BufferedReader 从字符输入流中读取文本，缓冲各个字符，从而提供字符、数组和行的高效读取。可以指定缓冲区的大小，或者可使用默认的大小。大多数情况下，默认值就足够大了。通常，Reader 所作的每个读取请求都会导致对基础字符或字节流进行相应的读取请求。因
[天气与气候]极端气候环境 comsci 环境
如果空间环境出现异变...外星文明并未出现,而只是用某种气象武器对地球的气候系统进行攻击,并挑唆地球国家间的战争,经过一段时间的准备...最大限度的削弱地球文明的整体力量,然后再进行入侵...... 那么地球上的国家应该做什么样的防备工作呢? &n
oracle order by与union一起使用的用法 daizj UNION oracle order by
当使用union操作时，排序语句必须放在最后面才正确，如下：只能在union的最后一个子查询中使用order by，而这个order by是针对整个unioning后的结果集的。So：如果unoin的几个子查询列名不同，如 Sql代码 select supplier_id, supplier_name from suppliers UNI
zeus持久层读写分离单元测试 deng520159 单元测试
本文是zeus读写分离单元测试,距离分库分表,只有一步了.上代码: 1.ZeusMasterSlaveTest.java package com.dengliang.zeus.webdemo.test; import java.util.ArrayList; import java.util.List; import org.junit.Assert; import org.j
Yii 截取字符串(UTF-8) 使用组件 dcj3sjt126com yii
1.将Helper.php放进protected\components文件夹下。 2.调用方法： Helper::truncate_utf8_string($content,20,false); //不显示省略号 Helper::truncate_utf8_string($content,20); //显示省略号 &n
安装memcache及php扩展 dcj3sjt126com PHP
安装memcache tar zxvf memcache-2.2.5.tgz cd memcache-2.2.5/ /usr/local/php/bin/phpize (?) ./configure --with-php-confi
JsonObject 处理日期 feifeilinlin521 java json JsonOjbect JsonArray JSONException
写这边文章的初衷就是遇到了json在转换日期格式出现了异常 net.sf.json.JSONException: java.lang.reflect.InvocationTargetException 原因是当你用Map接收数据库返回了java.sql.Date 日期的数据进行json转换出的问题话不多说直接上代码 &n
Ehcache（06）——监听器 234390216 监听器 listener ehcache
监听器 Ehcache中监听器有两种，监听CacheManager的CacheManagerEventListener和监听Cache的CacheEventListener。在Ehcache中，Listener是通过对应的监听器工厂来生产和发生作用的。下面我们将来介绍一下这两种类型的监听器。
activiti 自带设计器中chrome 34版本不能打开bug的解决 jackyrong Activiti
在acitivti modeler中，如果是chrome 34，则不能打开该设计器，其他浏览器可以，经证实为bug，参考 http://forums.activiti.org/content/activiti-modeler-doesnt-work-chrome-v34 修改为，找到 oryx.debug.js 在最头部增加 if (!Document.
微信收货地址共享接口-终极解决 laotu5i0 微信开发
最近要接入微信的收货地址共享接口，总是不成功，折腾了好几天，实在没办法网上搜到的帖子也是骂声一片。我把我碰到并解决问题的过程分享出来，希望能给微信的接口文档起到一个辅助作用，让后面进来的开发者能快速的接入，而不需要像我们一样苦逼的浪费好几天，甚至一周的青春。各种羞辱、谩骂的话就不说了，本人还算文明。如果你能搜到本贴，说明你已经碰到了各种 ed
关于人才 netkiller.github.com 工作面试招聘 netkiller 人才
关于人才每个月我都会接到许多猎头的电话，有些猎头比较专业，但绝大多数在我看来与猎头二字还是有很大差距的。与猎头接触多了，自然也了解了他们的工作，包括操作手法，总体上国内的猎头行业还处在初级阶段。总结就是“盲目推荐，以量取胜”。目前现状许多从事人力资源工作的人，根本不懂得怎么找人才。处在人才找不到企业，企业找不到人才的尴尬处境。企业招聘，通常是需要用人的部门提出招聘条件，由人
搭建 CentOS 6 服务器 - 目录 rensanning centos
(1) 安装CentOS ISO（desktop/minimal）、Cloud（AWS/阿里云）、Virtualization（VMWare、VirtualBox）详细内容 (2) Linux常用命令 cd、ls、rm、chmod...... 详细内容 (3) 初始环境设置用户管理、网络设置、安全设置...... 详细内容 (4) 常驻服务Daemon
【求助】mongoDB无法更新主键 toknowme mongodb
Query query = new Query(); query.addCriteria(new Criteria("_id").is(o.getId())); &n
jquery 页面滚动到底部自动加载插件集合 xp9802 jquery
很多社交网站都使用无限滚动的翻页技术来提高用户体验，当你页面滑到列表底部时候无需点击就自动加载更多的内容。下面为你推荐 10 个 jQuery 的无限滚动的插件： 1. jQuery ScrollPagination jQuery ScrollPagination plugin 是一个 jQuery 实现的支持无限滚动加载数据的插件。 2. jQuery Screw S

TensorRT学习笔记5 - 运行fc_plugin_caffe_mnist

目录

安装依赖

运行过程

代码解析

遇到的问题

你可能感兴趣的:(TensorRT)