题目要求:学习了解单目深度估计模型Adabins,根据python源码集成到现有OpenCV系列模型中。
AdaBins 论文:Adabins: Depth Estimation using adaptive bins
AdaBins 源码:AdaBins GitHub
分析:
1)了解Adabins的基本原理和代码理解
2)将模型转化为更加方便高效的LibTorch模型并在opencv中完成推理过程(并验证)
import onnx
import onnxruntime as ort
import cv2
import matplotlib.pyplot as plt
def torch2LibTorch():
MIN_DEPTH = 1e-3
MAX_DEPTH_NYU = 10
N_BINS = 256
pretrained_path = "./pretrained/AdaBins_nyu.pt"
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'
# load model
model = UnetAdaptiveBins.build(n_bins=N_BINS, min_val=MIN_DEPTH, max_val=MAX_DEPTH_NYU)
model, _, _ = model_io.load_checkpoint(pretrained_path, model)
# set the model to inference mode
model.to(device)
model.eval()
## convert to trace model
example = torch.rand(1, 3, 480, 640).to(device)
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save('adabins_trace.pt')
class adabins
{
public:
adabins(int h, int w, const string& model_file = "model/adabins_trace.pt") {
this->inHeight = h;
this->inWidth = w;
net = torch::jit::load(model_file);
};
Mat depth(const Mat& frame);
private:
torch::jit::script::Module net;
int inWidth;
int inHeight;
};
Mat adabins::depth(const Mat& frame){
torch::NoGradGuard no_grad;
Mat img = frame.clone();
int img_h = img.rows;
int img_w = img.cols;
resize(img, img, Size(this->inHeight, this->inWidth));
torch::Tensor tensor_image = torch::from_blob(img.data, { 1, img.rows, img.cols,3 }, torch::kU8).to(torch::kFloat)/255.0;
tensor_image = tensor_image.permute({0,3,1,2});
tensor_image = tensor_image.to(at::kCPU);
net.to(at::kCPU);
net.eval();
torch::Tensor out_tensor = net.forward({tensor_image}).toTuple()->elements()[1].toTensor();
out_tensor = out_tensor.detach().permute({0,2,3,1}).squeeze();
auto min_t = 0.1;
auto max_t = 10;
out_tensor = out_tensor.clamp(min_t, max_t);
out_tensor = (out_tensor - min_t) / (max_t - min_t);
out_tensor = out_tensor.mul(255.0).clamp(0, 255).to(torch::kU8);
out_tensor = out_tensor.to(at::kCPU);
Mat resultImg(320, 240, CV_8UC1, out_tensor.data_ptr());
resize(resultImg, resultImg,Size(img_w, img_h));
// imwrite("inference/test.png", resultImg);
applyColorMap(resultImg, resultImg, COLORMAP_JET);
return resultImg;
}
// module test
// int main()
// {
// Mat frame = imread("inference/classroom.jpg", 1);
// if (frame.empty()) {
// printf("could not load image...\n");
// return -1;
// }
// int h = 480, w = 640;
// adabins net(h, w);
// Mat depth = net.depth(frame);
// Mat res = viewer({frame, depth});
// static const string kWinName = "Deep learning object detection in OpenCV";
// namedWindow(kWinName, WINDOW_NORMAL);
// imshow(kWinName, res);
// waitKey(0);
// destroyAllWindows();
// return 0;
// }
else if(config.model_name == "adabins"){
int h = 480, w = 640;
adabins net(h, w);
Mat depth = net.depth(srcimg);
Mat res = viewer({srcimg, depth});
kWinName = "Deep learning depth estimation ADABINS in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, res);
waitKey(0);
destroyAllWindows();
embeddings = embeddings + self.positional_encodings[:embeddings.shape[2], :].transpose(1, 0).unsqueeze(0)
add_library(adabins adabins.cpp)
target_link_libraries(adabins opencv_world460 ${TORCH_LIBRARIES})