http://software-dl.ti.com/dsps/dsps_public_sw/c6000/web/c6accel/latest/index_FDS.html 下载地址
http://software-dl.ti.com/dsps/dsps_public_sw/ezsdk/index.html
For C6Accel 2.01.00.11 and later
C6Accel 2.01.00.11 intergrates OpenCV functionality on the DSP along with other libraries. This release adds a test application and a new build target 'opencv_app' to build this in the package. There is a pre-requisite to build this opencv_app. This requires users to build OpenCV 2.x for the ARM as described here and place the OpenCV shared libraries on the target filesystem under the path $TARGETFS/usr/lib
After the prequisite step is complete execute make to build and install the OpenCV test application
make opencv_app make opencv_app_install
从以上的文字可以看出,要交叉编译opencv,因为,这个工具有做arm 和dsp运算速度的比较。所以要交叉编译opencv。其实这个,貌似不是很有必要,对于有些环节,移植opencv 到a8,都有困难。
我本人也在dm3730上完全移植过opencv 1.0 到codec engine中,改天把四路和代码一起贴出来。
c6accel 做的工作,很方便,不过如果要做优化,也要改动一部分的代码,貌似,源码都是打包好的
下面是dm8148的arm 和dsp 做opencv的耗时比较 的源码
/*================================================================================*/ /* Copyright (c) 2010, Texas Instruments Incorporated */ /* All rights reserved. */ /* */ /* Name: C6Accel_testfxns.c */ /* */ /* Descriptions: */ /* File contains code to test kernels in the C6Accel codec */ /* */ /* Version: 0.0.1 */ /*================================================================================*/ /* This define uses the new frame based (ie row and col parameters) that are optimised for C6Accel as they only request one operation on all rows rather than row operations*/ #define USE_NEW_FRAME_APIS /*XDC and codec engine includes*/ #include <xdc/std.h> #include <ti/sdo/ce/osal/Memory.h> /* Run Time lib include files: */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdarg.h> #include <math.h> //#include "precomp.hpp" #include <ti/sdo/linuxutils/cmem/src/interface/cmem.h> /* Declare MACROS that will be used for benchmarking*/ #include "benchmark.h" /* Include C6ACCEL headers*/ #include "../../c6accelw/c6accelw.h" #include "../../c6accelw/c6accelw_opencv.h" // extra headers for OpenCV /*#include <time.h> #include <sys/types.h> #include <sys/time.h> #include <sys/stat.h>*/ #include "opencv/highgui.h" #define CVX_GRAY50 cvScalar(100,0,0,0) #define CVX_WHITE cvScalar(255,0,0,0) /* Create default heap memory configuration for test functions */ static Memory_AllocParams testfxnsMemParams = { Memory_CONTIGHEAP, Memory_CACHED, Memory_DEFAULTALIGNMENT, 0 }; extern CMEM_AllocParams cvCmemParams;// = {CMEM_HEAP, CMEM_CACHED, 8}; /* Test for Floating point kernels */ /* * Test function for arithmetic rts single precision functions in this function */ // helper function - get overhead time static int get_overhead_time(void) { struct timeval startTime, endTime; gettimeofday(&startTime, NULL); gettimeofday(&endTime, NULL); return endTime.tv_usec - startTime.tv_usec; } Int c6accel_test_cvSobel(C6accel_Handle hC6accel, char *input_file_name, int n) { IplImage *inputImg, *outputImg_arm, *outputImg_dsp, *scaleImg_arm, *scaleImg_dsp; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; printf("cvSobel Test (%s, %i iterations)\n", input_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image from file inputImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_GRAYSCALE); // 2. Check image depth; require 8-bit if (inputImg->depth != IPL_DEPTH_8U && inputImg->depth != IPL_DEPTH_8S) { printf("C6accel_cvSobel test failed; input image must have 8-bit depth.\n"); return 0; } // 3. Allocate output images (must have 16- and 8-bit depth; output MUST be 16S) outputImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_16S, 1); outputImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_16S, 1); scaleImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1); scaleImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1); //printf("outputImage: %x\n", CMEM_getPhys(outputImg_arm)); // printf("outputImagedata: %x\n", CMEM_getPhys(outputImg_arm->imageData)); // 4.a Apply ARM algorithm cvSobel(inputImg, outputImg_arm, 1, 1, 3); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvSobel(inputImg, outputImg_arm, 1, 1, 3); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Sobel function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4.b Apply DSP algorithm C6accel_cvSobel(hC6accel, inputImg, outputImg_dsp, 1, 1, 3); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvSobel(hC6accel, inputImg, outputImg_dsp, 1, 1, 3); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Sobel function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5.a Apply scale conversion on ARM cvConvertScale(outputImg_arm, scaleImg_arm, 0.5, 128); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvConvertScale(outputImg_arm, scaleImg_arm, 0.5, 128); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM ConvertScale function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5.b Apply scale conversion on DSP C6accel_cvConvertScale(hC6accel, outputImg_dsp, scaleImg_dsp, 0.5, 128); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvConvertScale(hC6accel, outputImg_dsp, scaleImg_dsp, 0.5, 128); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP ConvertScale function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 6. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(scaleImg_arm, scaleImg_dsp, CV_L2, NULL)); // 7. Save outputs to filesystem cvSaveImage("./output_arm.png", scaleImg_arm, 0); cvSaveImage("./output_dsp.png", scaleImg_dsp, 0); //Free memory as cvFree was not patched during the openCv build // printf("outputImage: %x\n", CMEM_getPhys(outputImg_arm)); // printf("outputImagedata: %x\n", CMEM_getPhys(outputImg_arm->imageData)); // Freeing memory cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); cvReleaseImage(&scaleImg_arm); cvReleaseImage(&scaleImg_dsp); cvReleaseImage(&inputImg); printf("C6accel_cvSobel test completed successfully; outputs saved to filesystem\n"); return 1; } Int c6accel_test_cvFlip(C6accel_Handle hC6accel, char *input_file_name, int n) { IplImage *inputImg, *outputImg_arm, *outputImg_dsp, *copyImg_arm, *copyImg_dsp; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; printf("cvFlip Test (%s, %i iterations)\n", input_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image from file inputImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels); outputImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels); copyImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels); copyImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels); // 3.a Apply ARM algorithm cvFlip(inputImg, outputImg_arm, -1); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvFlip(inputImg, outputImg_arm, -1); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Flip function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 3.b Apply DSP algorithm C6accel_cvFlip(hC6accel, inputImg, outputImg_dsp, -1); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvFlip(hC6accel, inputImg, outputImg_dsp, -1); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Flip function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4.a Copy image on ARM cvCopy(outputImg_arm, copyImg_arm, NULL); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvCopy(outputImg_arm, copyImg_arm, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Copy function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4.b Copy image on DSP C6accel_cvCopy(hC6accel, outputImg_dsp, copyImg_dsp, NULL); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvCopy(hC6accel, outputImg_dsp, copyImg_dsp, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Copy function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(copyImg_arm, copyImg_dsp, CV_L2, NULL)); // 6. Save outputs to filesystem cvSaveImage("./output_arm.png", copyImg_arm, 0); cvSaveImage("./output_dsp.png", copyImg_dsp, 0); //OpenCV way of Freeing memory cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); cvReleaseImage(©Img_arm); cvReleaseImage(©Img_dsp); cvReleaseImage(&inputImg); printf("C6accel_cvFlip test completed successfully; outputs saved to filesystem\n"); return 1; } Int c6accel_test_cvCircle(C6accel_Handle hC6accel, char *input_file_name, int n) { IplImage *armImg, *dspImg; struct timeval startTime, endTime; int t_overhead, t_algo, radius, i; CvScalar orange = { 0, 128, 255, 255 }, blue = {255, 0, 0, 255}; // BGRA CvPoint center, pt1, pt2; float t_avg; printf("cvCircle Test (%s, %i iterations)\n", input_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image from file armImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_COLOR); dspImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_COLOR); // 2. Compute circle and rectangle parameters (center, radius, pt1, pt2) center.x = armImg->width / 2; center.y = armImg->height / 2; radius = (center.x >= center.y) ? center.y : center.x; pt1.x = pt1.y = 0; pt2.x = center.x; pt2.y = armImg->height; // 3.a Zero out ARM image cvSetZero(armImg); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvSetZero(armImg); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM SetZero function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 3.b Zero out DSP image C6accel_cvSetZero(hC6accel, dspImg); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvSetZero(hC6accel, dspImg); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP SetZero function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4.a Draw rectangle on ARM image cvRectangle(armImg, pt1, pt2, blue, 1, 8, 0); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvRectangle(armImg, pt1, pt2, blue, -1, 8, 0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Rectangle function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4.b Draw rectangle on DSP image C6accel_cvRectangle(hC6accel, dspImg, pt1, pt2, blue, 1, 8, 0); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvRectangle(hC6accel, dspImg, pt1, pt2, blue, -1, 8, 0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Rectangle function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5.a Draw circle on ARM image cvCircle(armImg, center, radius, orange, 1, 8, 0); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvCircle(armImg, center, radius, orange, -1, 8, 0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Circle function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5.b Draw circle on DSP image C6accel_cvCircle(hC6accel, dspImg, center, radius, orange, 1, 8, 0); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvCircle(hC6accel, dspImg, center, radius, orange, -1, 8, 0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Circle function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 6. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(armImg, dspImg, CV_L2, NULL)); // 7. Save outputs to filesystem cvSaveImage("./output_arm.png", armImg, 0); cvSaveImage("./output_dsp.png", dspImg, 0); // Free memory cvReleaseImage(&armImg); cvReleaseImage(&dspImg); printf("C6accel_cvCircle test completed successfully; outputs saved to filesystem\n"); return 1; } Int c6accel_test_cvResize(C6accel_Handle hC6accel, char *input_file_name, int n) { IplImage *inputImg, *colorImg_arm, *colorImg_dsp, *resizeImg_arm, *resizeImg_dsp; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; printf("cvResize Test (%s, %i iterations)\n", input_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image from file inputImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_COLOR); // 2. Allocate recolor images (must same depth, channels, size as input) // and resize images (must have same depth, channels, as input with half size) colorImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels); colorImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), inputImg->depth, inputImg->nChannels); resizeImg_arm = cvCreateImage(cvSize(inputImg->width / 2, inputImg->height / 2), inputImg->depth, inputImg->nChannels); resizeImg_dsp = cvCreateImage(cvSize(inputImg->width / 2, inputImg->height / 2), inputImg->depth, inputImg->nChannels); // 3.a Apply ARM algorithm cvCvtColor(inputImg, colorImg_arm, CV_BGR2YCrCb); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvCvtColor(inputImg, colorImg_arm, CV_BGR2YCrCb); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM CvtColor function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 3.b Apply DSP algorithm C6accel_cvCvtColor(hC6accel, inputImg, colorImg_dsp, CV_BGR2YCrCb); // run once before timing gettimeofday(&startTime, NULL); //for (i = 0; i < 1000; i++) C6accel_cvCvtColor(hC6accel, inputImg, colorImg_dsp, CV_BGR2YCrCb); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP CvtColor function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4.a Resize image on ARM cvResize(colorImg_arm, resizeImg_arm, CV_INTER_LINEAR); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvResize(colorImg_arm, resizeImg_arm, CV_INTER_LINEAR); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Resize function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4.b Resize image on DSP C6accel_cvResize(hC6accel, colorImg_dsp, resizeImg_dsp, CV_INTER_LINEAR); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvResize(hC6accel, colorImg_dsp, resizeImg_dsp, CV_INTER_LINEAR); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Resize function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(resizeImg_arm, resizeImg_dsp, CV_L2, NULL)); // 6. Save outputs to filesystem cvSaveImage("./output_arm.png", resizeImg_arm, 0); cvSaveImage("./output_dsp.png", resizeImg_dsp, 0); // Free memory cvReleaseImage(&resizeImg_arm); cvReleaseImage(&resizeImg_dsp); cvReleaseImage(&colorImg_arm); cvReleaseImage(&colorImg_dsp); cvReleaseImage(&inputImg); printf("C6accel_cvResize test completed successfully; outputs saved to filesystem\n"); return 1; } Int c6accel_test_cvEqualizeHist(C6accel_Handle hC6accel, char *input_file_name, int n) { IplImage *inputImg, *outputImg_arm, *outputImg_dsp; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; printf("cvEqualizeHist Test (%s, %i iterations)\n", input_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image from file inputImg = cvLoadImage( input_file_name, CV_LOAD_IMAGE_GRAYSCALE); // 2. Check image depth; require 8-bit if (inputImg->depth != IPL_DEPTH_8U && inputImg->depth != IPL_DEPTH_8S) { printf("C6accel_cvEqualizeHist test failed; input image must have 8-bit depth.\n"); return 0; } // 3. Allocate output images outputImg_arm = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1); outputImg_dsp = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1); // 4.a Apply ARM algorithm cvEqualizeHist(inputImg, outputImg_arm); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvEqualizeHist(inputImg, outputImg_arm); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM EqualizeHist function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4.b Apply DSP algorithm C6accel_cvEqualizeHist(hC6accel, inputImg, outputImg_dsp); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvEqualizeHist(hC6accel, inputImg, outputImg_dsp); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP EqualizeHist function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 6. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); // 7. Save outputs to filesystem cvSaveImage("./output_arm.png", outputImg_arm, 0); cvSaveImage("./output_dsp.png", outputImg_dsp, 0); // Free memory cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); cvReleaseImage(&inputImg); printf("C6accel_cvEqualizeHist test completed successfully; outputs saved to filesystem\n"); return 1; } // TEMP: function to traverse classifier cascade typedef int sumtype; typedef double sqsumtype; typedef struct CvHidHaarFeature { struct { sumtype *p0, *p1, *p2, *p3; float weight; } rect[CV_HAAR_FEATURE_MAX]; } CvHidHaarFeature; typedef struct CvHidHaarTreeNode { CvHidHaarFeature feature; float threshold; int left; int right; } CvHidHaarTreeNode; typedef struct CvHidHaarClassifier { int count; //CvHaarFeature* orig_feature; CvHidHaarTreeNode* node; float* alpha; } CvHidHaarClassifier; typedef struct CvHidHaarStageClassifier { int count; float threshold; CvHidHaarClassifier* classifier; int two_rects; struct CvHidHaarStageClassifier* next; struct CvHidHaarStageClassifier* child; struct CvHidHaarStageClassifier* parent; } CvHidHaarStageClassifier; struct CvHidHaarClassifierCascade { int count; int is_stump_based; int has_tilted_features; int is_tree; double inv_window_area; CvMat sum, sqsum, tilted; CvHidHaarStageClassifier* stage_classifier; sqsumtype *pq0, *pq1, *pq2, *pq3; sumtype *p0, *p1, *p2, *p3; void** ipp_stages; }; void traverse_and_translate_hid_cascade(CvHidHaarClassifierCascade *cascade, FILE *fp) { CvHidHaarStageClassifier *hid_stage; CvHidHaarClassifier *hid_classifier; CvHidHaarTreeNode *hid_node; int stage_count, classifier_count, feature_count; int i, j, k, l; if (cascade == NULL) { fprintf(fp, "//\tHidden cascade pointer is NULL; no traversal required\n"); return; } else { stage_count = cascade->count; fprintf(fp, "//\tHidden cascade at 0x%08X has %i stages\n", (unsigned int)cascade, stage_count); for (i = 0; i < cascade->count; i++) { hid_stage = cascade->stage_classifier + i; classifier_count = hid_stage->count; fprintf(fp, "//\t\tHidden stage %i at 0x%08X has %i classifiers\n", i, (unsigned int)hid_stage, classifier_count); for (j = 0; j < classifier_count; j++) { hid_classifier = hid_stage->classifier + j; feature_count = hid_classifier->count; fprintf(fp, "//\t\t\tHidden classifier %i at 0x%08X has %i nodes/features\n", j, (unsigned int)hid_classifier, feature_count); for (k = 0; k < feature_count; k++) { hid_node = hid_classifier->node + k; // apply CMEM_getPhys to node contents (none) // apply CMEM_getPhys to feature contents (rect[0].p0, .p1, .p2, .p3, rect[1].p0, etc.) for (l = 0; l < CV_HAAR_FEATURE_MAX; l++) { hid_node->feature.rect[l].p0 = hid_node->feature.rect[l].p0 ? (void *)CMEM_getPhys(hid_node->feature.rect[l].p0) : NULL; hid_node->feature.rect[l].p1 = hid_node->feature.rect[l].p1 ? (void *)CMEM_getPhys(hid_node->feature.rect[l].p1) : NULL; hid_node->feature.rect[l].p2 = hid_node->feature.rect[l].p2 ? (void *)CMEM_getPhys(hid_node->feature.rect[l].p2) : NULL; hid_node->feature.rect[l].p3 = hid_node->feature.rect[l].p3 ? (void *)CMEM_getPhys(hid_node->feature.rect[l].p3) : NULL; fprintf(fp, "//\t\t\t\tHidden node %i feature rect %i pointers translated to physical addresses (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n", k, l, (unsigned int)hid_node->feature.rect[l].p0, (unsigned int)hid_node->feature.rect[l].p1, hid_node->feature.rect[l].p2, hid_node->feature.rect[l].p3); } } // apply CMEM_getPhys to classifier contents (node, alpha) hid_classifier->node = hid_classifier->node ? (void *)CMEM_getPhys(hid_classifier->node) : NULL; hid_classifier->alpha = hid_classifier->alpha ? (void *)CMEM_getPhys(hid_classifier->alpha) : NULL; fprintf(fp, "//\t\t\tHidden classifier %i pointers translated to physical addresses (0x%08X, 0x%08X)\n", j, (unsigned int)hid_classifier->node, (unsigned int)hid_classifier->alpha); } // apply CMEM_getPhys to stage contents (classifier, next, child, parent) hid_stage->classifier = hid_stage->classifier ? (void *)CMEM_getPhys(hid_stage->classifier) : NULL; hid_stage->next = hid_stage->next ? (void *)CMEM_getPhys(hid_stage->next) : NULL; hid_stage->child = hid_stage->child ? (void *)CMEM_getPhys(hid_stage->child) : NULL; hid_stage->parent = hid_stage->parent ? (void *)CMEM_getPhys(hid_stage->parent) : NULL; fprintf(fp, "//\t\tHidden stage %i pointers translated to physical addresses (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n", i, (unsigned int)hid_stage->classifier, (unsigned int)hid_stage->next, (unsigned int)hid_stage->child, (unsigned int)hid_stage->parent); } // apply CMEM_getPhys to hid_cascade contents (stage_classifier, sum.refcount, sum.data.i, // sqsum.refcount, sqsum.data.db, tilted.refcount, tilted.data.i, pq0, pq1, pq2, pq3, // p0, p1, p2, p3, ipp_stages) cascade->stage_classifier = cascade->stage_classifier ? (void *)CMEM_getPhys(cascade->stage_classifier) : NULL; cascade->sum.refcount = cascade->sum.refcount ? (void *)CMEM_getPhys(cascade->sum.refcount) : NULL; cascade->sum.data.i = cascade->sum.data.i ? (void *)CMEM_getPhys(cascade->sum.data.i) : NULL; cascade->sqsum.refcount = cascade->sqsum.refcount ? (void *)CMEM_getPhys(cascade->sqsum.refcount) : NULL; cascade->sqsum.data.db = cascade->sqsum.data.db ? (void *)CMEM_getPhys(cascade->sqsum.data.db) : NULL; cascade->tilted.refcount = cascade->tilted.refcount ? (void *)CMEM_getPhys(cascade->tilted.refcount) : NULL; cascade->tilted.data.i = cascade->tilted.data.i ? (void *)CMEM_getPhys(cascade->tilted.data.i) : NULL; fprintf(fp, "//\tHidden cascade pointers translated to physical addresses (1 of 4) (0x%08X, 0x%08X, 0x%08X)\n", (unsigned int)cascade->stage_classifier, (unsigned int)cascade->sum.refcount, (unsigned int)cascade->sum.data.i); fprintf(fp, "//\tHidden cascade pointers translated to physical addresses (2 of 4) (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n", (unsigned int)cascade->sqsum.refcount, (unsigned int)cascade->sqsum.data.db, (unsigned int)cascade->tilted.refcount, (unsigned int)cascade->tilted.data.i); cascade->pq0 = cascade->pq0 ? (void *)CMEM_getPhys(cascade->pq0) : NULL; cascade->pq1 = cascade->pq1 ? (void *)CMEM_getPhys(cascade->pq1) : NULL; cascade->pq2 = cascade->pq2 ? (void *)CMEM_getPhys(cascade->pq2) : NULL; cascade->pq3 = cascade->pq3 ? (void *)CMEM_getPhys(cascade->pq3) : NULL; cascade->p0 = cascade->p0 ? (void *)CMEM_getPhys(cascade->p0) : NULL; cascade->p1 = cascade->p1 ? (void *)CMEM_getPhys(cascade->p1) : NULL; cascade->p2 = cascade->p2 ? (void *)CMEM_getPhys(cascade->p2) : NULL; cascade->p3 = cascade->p3 ? (void *)CMEM_getPhys(cascade->p3) : NULL; fprintf(fp, "//\tHidden cascade pointers translated to physical addresses (3 of 4) (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n", (unsigned int)cascade->pq0, (unsigned int)cascade->pq1, (unsigned int)cascade->pq2, (unsigned int)cascade->pq3); fprintf(fp, "//\tHidden cascade pointers translated to physical addresses (4 of 4) (0x%08X, 0x%08X, 0x%08X, 0x%08X)\n", (unsigned int)cascade->p0, (unsigned int)cascade->p1, (unsigned int)cascade->p2, (unsigned int)cascade->p3); if (cascade->ipp_stages == NULL) { fprintf(fp, "//\tHidden cascade ipp stage array pointer is NULL; sub-array not traversed\n"); } else { for (i = 0; i < cascade->count; i++) { cascade->ipp_stages[i] = cascade->ipp_stages[i] ? (void *)CMEM_getPhys(cascade->ipp_stages[i]) : NULL; fprintf(fp, "//\tHidden cascade ipp stage %i translated to physical address (0x%08X)\n", i, (unsigned int)cascade->ipp_stages[i]); } } cascade->ipp_stages = cascade->ipp_stages ? (void *)CMEM_getPhys(cascade->ipp_stages) : NULL; fprintf(fp, "//\tHidden cascade ipp stage array pointer translated to physical address (0x%08X)\n", (unsigned int)cascade->ipp_stages); } } void LOCAL_restore_hid_cascade(CvHidHaarClassifierCascade *cascade) { CvHidHaarStageClassifier *hid_stage; CvHidHaarClassifier *hid_classifier; CvHidHaarTreeNode *hid_node; int stage_count, classifier_count, feature_count; int i, j, k, l; if (cascade == NULL) { return; } else { stage_count = cascade->count; // apply Memory_getBufferVirtualAddress to hid_cascade contents (stage_classifier, sum.refcount, sum.data.i, // sqsum.refcount, sqsum.data.db, tilted.refcount, tilted.data.i, pq0, pq1, pq2, pq3, // p0, p1, p2, p3, ipp_stages) cascade->stage_classifier = cascade->stage_classifier ? (void *)Memory_getBufferVirtualAddress((int)cascade->stage_classifier,sizeof(CvHidHaarStageClassifier)) : NULL; cascade->sum.refcount = cascade->sum.refcount ? (void *)Memory_getBufferVirtualAddress((int)cascade->sum.refcount,sizeof(int)) : NULL; cascade->sum.data.i = cascade->sum.data.i ? (void *)Memory_getBufferVirtualAddress((int)cascade->sum.data.i,sizeof(int)) : NULL; cascade->sqsum.refcount = cascade->sqsum.refcount ? (void *)Memory_getBufferVirtualAddress((int)cascade->sqsum.refcount,sizeof(int)) : NULL; cascade->sqsum.data.db = cascade->sqsum.data.db ? (void *)Memory_getBufferVirtualAddress((int)cascade->sqsum.data.db,sizeof(double *)) : NULL; cascade->tilted.refcount = cascade->tilted.refcount ? (void *)Memory_getBufferVirtualAddress((int)cascade->tilted.refcount,sizeof(int)) : NULL; cascade->tilted.data.i = cascade->tilted.data.i ? (void *)Memory_getBufferVirtualAddress((int)cascade->tilted.data.i,sizeof(int)) : NULL; cascade->pq0 = cascade->pq0 ? (void *)Memory_getBufferVirtualAddress((int)cascade->pq0,sizeof(sqsumtype)) : NULL; cascade->pq1 = cascade->pq1 ? (void *)Memory_getBufferVirtualAddress((int)cascade->pq1,sizeof(sqsumtype)) : NULL; cascade->pq2 = cascade->pq2 ? (void *)Memory_getBufferVirtualAddress((int)cascade->pq2,sizeof(sqsumtype)) : NULL; cascade->pq3 = cascade->pq3 ? (void *)Memory_getBufferVirtualAddress((int)cascade->pq3,sizeof(sqsumtype)) : NULL; cascade->p0 = cascade->p0 ? (void *)Memory_getBufferVirtualAddress((int)cascade->p0,sizeof(sumtype)) : NULL; cascade->p1 = cascade->p1 ? (void *)Memory_getBufferVirtualAddress((int)cascade->p1,sizeof(sumtype)) : NULL; cascade->p2 = cascade->p2 ? (void *)Memory_getBufferVirtualAddress((int)cascade->p2,sizeof(sumtype)) : NULL; cascade->p3 = cascade->p3 ? (void *)Memory_getBufferVirtualAddress((int)cascade->p3,sizeof(sumtype)) : NULL; for (i = 0; i < cascade->count; i++) { hid_stage = cascade->stage_classifier + i; classifier_count = hid_stage->count; // apply Memory_getBufferVirtualAddress to stage contents (classifier, next, child, parent) hid_stage->classifier = hid_stage->classifier ? (void *)Memory_getBufferVirtualAddress((int)hid_stage->classifier,sizeof(CvHidHaarClassifier)) : NULL; hid_stage->next = hid_stage->next ? (void *)Memory_getBufferVirtualAddress((int)hid_stage->next,sizeof(CvHidHaarStageClassifier)) : NULL; hid_stage->child = hid_stage->child ? (void *)Memory_getBufferVirtualAddress((int)hid_stage->child,sizeof(CvHidHaarStageClassifier)) : NULL; hid_stage->parent = hid_stage->parent ? (void *)Memory_getBufferVirtualAddress((int)hid_stage->parent,sizeof(CvHidHaarStageClassifier)) : NULL; for (j = 0; j < classifier_count; j++) { hid_classifier = hid_stage->classifier + j; feature_count = hid_classifier->count; // apply Memory_getBufferVirtualAddress to classifier contents (node, alpha) hid_classifier->node = hid_classifier->node ? (void *)Memory_getBufferVirtualAddress((int)hid_classifier->node,sizeof(CvHidHaarTreeNode)) : NULL; hid_classifier->alpha = hid_classifier->alpha ? (void *)Memory_getBufferVirtualAddress((int)hid_classifier->alpha,sizeof(float)) : NULL; for (k = 0; k < feature_count; k++) { hid_node = hid_classifier->node + k; // apply Memory_getBufferVirtualAddress to node contents (none) // apply Memory_getBufferVirtualAddress to feature contents (rect[0].p0, .p1, .p2, .p3, rect[1].p0, etc.) for (l = 0; l < CV_HAAR_FEATURE_MAX; l++) { hid_node->feature.rect[l].p0 = hid_node->feature.rect[l].p0 ? (void *)Memory_getBufferVirtualAddress((int)hid_node->feature.rect[l].p0,sizeof(sumtype)) : NULL; hid_node->feature.rect[l].p1 = hid_node->feature.rect[l].p1 ? (void *)Memory_getBufferVirtualAddress((int)hid_node->feature.rect[l].p1,sizeof(sumtype)) : NULL; hid_node->feature.rect[l].p2 = hid_node->feature.rect[l].p2 ? (void *)Memory_getBufferVirtualAddress((int)hid_node->feature.rect[l].p2,sizeof(sumtype)) : NULL; hid_node->feature.rect[l].p3 = hid_node->feature.rect[l].p3 ? (void *)Memory_getBufferVirtualAddress((int)hid_node->feature.rect[l].p3,sizeof(sumtype)) : NULL; } } } } if (cascade->ipp_stages == NULL) { printf( "//\tHidden cascade ipp stage array pointer is NULL; sub-array not traversed\n"); } else { for (i = 0; i < cascade->count; i++) { cascade->ipp_stages[i] = cascade->ipp_stages[i] ? (void *)Memory_getBufferVirtualAddress((int)cascade->ipp_stages[i],sizeof(void *)) : NULL; } } cascade->ipp_stages = cascade->ipp_stages ? (void *)Memory_getBufferVirtualAddress((int)cascade->ipp_stages,sizeof(void *)) : NULL; } } void traverse_and_translate_cascade(CvHaarClassifierCascade *cascade ) { CvHaarStageClassifier *stage; CvHaarClassifier *classifier; CvHaarFeature *feature; int stage_count, classifier_count, feature_count; unsigned int new_thresh, new_left, new_right, new_alpha; int i, j, k; FILE *fp = fopen("dsp_cascade_traversal_log.txt", "w+"); stage_count = cascade->count; fprintf(fp, "Cascade at 0x%08X has %i stages\n", (unsigned int)cascade, stage_count); for (i = 0; i < stage_count; i++) { stage = cascade->stage_classifier + i; classifier_count = stage->count; fprintf(fp, "\tStage %i at 0x%08X has %i classifiers\n", i, (unsigned int)stage, classifier_count); for (j = 0; j < classifier_count; j++) { classifier = stage->classifier + j; feature_count = classifier->count; fprintf(fp, "\t\tClassifier %i at 0x%08X has %i features\n", j, (unsigned int)classifier, feature_count); for (k = 0; k < feature_count; k++) { feature = classifier->haar_feature + k; fprintf(fp, "\t\t\tFeature %i at 0x%08X rect array that begins at 0x%08X\n", k, (unsigned int)feature, (unsigned int)feature->rect); // apply CMEM_getPhys to feature contents (rect) // NOT NECESSARY (array pointer doesn't actually exist; feature->rect == (char *)feature + 4 //feature->rect = (unsigned int)CMEM_getPhys(feature->rect); //fprintf(fp, "0x%08X\n", (unsigned int)feature->rect); } // apply CMEM_getPhys to classifier contents (haar_feature, threshold, left, right, alpha) classifier->haar_feature = classifier->haar_feature ? (void *)Memory_getBufferPhysicalAddress(classifier->haar_feature, sizeof(CvHaarFeature),NULL) : NULL; classifier->threshold = classifier->threshold ? (void *)Memory_getBufferPhysicalAddress(classifier->threshold,sizeof(float),NULL) : NULL; classifier->left = classifier->left ? (void *)Memory_getBufferPhysicalAddress(classifier->left,sizeof(int),NULL) : NULL; classifier->right = classifier->right ? (void *)Memory_getBufferPhysicalAddress(classifier->right,sizeof(int),NULL) : NULL; classifier->alpha = classifier->alpha ? (void *)Memory_getBufferPhysicalAddress(classifier->alpha,sizeof(float),NULL) : NULL; fprintf(fp, "\t\tClassifier %i pointers translated to physical addresses (0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X)\n", j, (unsigned int)classifier->haar_feature, (unsigned int)classifier->threshold, (unsigned int)classifier->left, (unsigned int)classifier->right, (unsigned int)classifier->alpha); Memory_cacheWbInv( (void *)classifier, sizeof(CvHaarClassifier)); // Cache_wait(); } // apply CMEM_getPhys to stage contents (classifier) fprintf(fp, "\tBefore translation :Stage %i pointers translated to physical addresses (0x%08X)\n", i, (unsigned int)stage->classifier); stage->classifier = stage->classifier ? (void *)Memory_getBufferPhysicalAddress(stage->classifier,sizeof(CvHaarClassifier),NULL) : NULL; Memory_cacheWbInv( (void *)stage,sizeof(CvHaarStageClassifier)); fprintf(fp, "\tStage %i pointers translated to physical addresses (0x%08X)\n", i, (unsigned int)stage->classifier); } // traverse "hidden" cascade, too traverse_and_translate_hid_cascade(cascade->hid_cascade, fp); // apply CMEM_getPhys to cascade contents (stage_classifier, hid_cascade) // cascade->stage_classifier->classifier = cascade->stage_classifier->classifier ? (void *)CMEM_getPhys(cascade->stage_classifier->classifier) : NULL; cascade->stage_classifier = cascade->stage_classifier ? (void *)Memory_getBufferPhysicalAddress(cascade->stage_classifier,sizeof(CvHaarStageClassifier),NULL) : NULL; cascade->hid_cascade = cascade->hid_cascade ? (void *)Memory_getBufferPhysicalAddress(cascade->hid_cascade,sizeof(CvHidHaarClassifierCascade),NULL) : NULL; fprintf(fp, "Cascade pointers translated to physical addresses (0x%08X, 0x%08X)\n", (unsigned int)cascade->stage_classifier, (unsigned int)cascade->hid_cascade); Memory_cacheWbInvAll(); //Cache_wait(); fclose(fp); } void LOCAL_restore_cascade(CvHaarClassifierCascade *cascade) { CvHaarStageClassifier *stage; CvHaarClassifier *classifier; CvHaarFeature *feature; int stage_count, classifier_count, feature_count; unsigned int new_thresh, new_left, new_right, new_alpha; int i, j, k; stage_count = cascade->count; // printf("Stage_classifier %x\n",(int)cascade->stage_classifier); cascade->stage_classifier = (void *)Memory_getBufferVirtualAddress(((int)cascade->stage_classifier),sizeof(CvHaarStageClassifier)+sizeof(int)); // printf("Stage_classifier %x\n",(int)cascade->stage_classifier); cascade->hid_cascade = cascade->hid_cascade ? (void *)Memory_getBufferVirtualAddress((int)cascade->hid_cascade,sizeof(CvHidHaarClassifierCascade)): NULL; for (i = 0; i < stage_count; i++) { // printf("In stage loop\n"); stage = cascade->stage_classifier + i; // printf("stage %x\n",(int)stage ); classifier_count = stage->count; // apply CMEM_getPhys to stage contents (classifier) stage->classifier = (void *)Memory_getBufferVirtualAddress((int)stage->classifier,sizeof(CvHaarClassifier)); for (j = 0; j < classifier_count; j++) { // printf("In classifier loop\n"); classifier = stage->classifier + j; // printf(" classifier %x\n", classifier); feature_count = classifier->count; // apply CMEM_getPhys to classifier contents (haar_feature, threshold, left, right, alpha) classifier->haar_feature = (void *)Memory_getBufferVirtualAddress((int)(classifier->haar_feature),sizeof(CvHaarFeature)); classifier->threshold = (void *)Memory_getBufferVirtualAddress((int)classifier->threshold,sizeof(float)) ; classifier->left = (void *)Memory_getBufferVirtualAddress((int)classifier->left,sizeof(int)); classifier->right = (void *)Memory_getBufferVirtualAddress((int)classifier->right,sizeof(int)); classifier->alpha = (void *)Memory_getBufferVirtualAddress((int)classifier->alpha,sizeof(float)); // Memory_cacheWbInv( (void *)classifier, sizeof(CvHaarClassifier)); for (k = 0; k < feature_count; k++) { // printf("In Feature loop\n"); feature = classifier->haar_feature + k; // apply CMEM_getPhys to feature contents (rect) // NOT NECESSARY (array pointer doesn't actually exist; feature->rect == (char *)feature + 4 //feature->rect = (unsigned int)CMEM_getPhys(feature->rect); //fprintf(fp, "0x%08X\n", (unsigned int)feature->rect); } } } LOCAL_restore_hid_cascade(cascade->hid_cascade); } Int c6accel_test_cvHaarDetectObjects(C6accel_Handle hC6accel, char *image_file_name, char *cascade_file_name) { IplImage *image, *image_color; CvSeq *arm_sequence = NULL, *dsp_sequence = NULL; CvHaarClassifierCascade *cascade; CvMemStorage *storage; CvRect *r; CvPoint c1, c2; CvScalar red = {0, 0, 255, 255}, black = {0, 0, 0, 255}; struct timeval startTime, endTime; int t_overhead, t_algo, i; void *temp_ptr; printf("cvHaarDetectObjects Test (%s, %s)\n", image_file_name, cascade_file_name); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); printf("Memory for images,cascade and \n"); // 1. Read input image and cascade files image = cvLoadImage(image_file_name, CV_LOAD_IMAGE_GRAYSCALE); image_color = cvLoadImage(image_file_name, CV_LOAD_IMAGE_COLOR); // 2. Create memory storage space; use dummy allocation to prime for DSP storage = cvCreateMemStorage(0); printf("Memory allocation for images and storage done\n"); cascade = (CvHaarClassifierCascade *)cvLoad(cascade_file_name, 0, 0, 0); printf("Reading of Cascade complete\n"); // 3.a Apply ARM algorithm arm_sequence = cvHaarDetectObjects(image_color, cascade, storage, 1.1, 2, CV_HAAR_DO_CANNY_PRUNING, cvSize(30, 30)); gettimeofday(&startTime, NULL); printf("top= %x\n", storage->top); printf("bottom= %x\n", storage->bottom); for (i = 0; i < 1; i++) arm_sequence = cvHaarDetectObjects(image_color, cascade, storage, 1.1, 2, CV_HAAR_DO_CANNY_PRUNING, cvSize(30, 30)); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM HaarDetectObjects function (time: %f ms)\n", t_algo / 1000.0 / 1.0); // 4.a Mark and print list of matches detected by ARM if (arm_sequence == NULL) { printf("ARM sequence returned NULL\n"); } else { printf("ARM sequence contains %i elements:\n", arm_sequence->total); for (i = 0; i < arm_sequence->total; i++){ r = (CvRect *)cvGetSeqElem(arm_sequence, i); printf("%4i: %4i, %4i (%ix%i)\n", i, r->x, r->y, r->width, r->height); // mark with thick black rectangle c1.x = r->x; c1.y = r->y; c2.x = r->x + r->width; c2.y = r->y + r->height; cvRectangle(image_color, c1, c2, black, 2, 8, 0); } } cvReleaseHaarClassifierCascade(&cascade); cvReleaseMemStorage(&storage); // 3.b Apply DSP algorithm storage = cvCreateMemStorage(0); temp_ptr = cvMemStorageAlloc(storage, 64); printf("Memory allocation for images and storage done\n"); cascade = (CvHaarClassifierCascade *)cvLoad(cascade_file_name, 0, 0, 0); traverse_and_translate_cascade(cascade); gettimeofday(&startTime, NULL); C6accel_cvHaarDetectObjects(hC6accel, image_color, cascade, storage, 1.1, 2, CV_HAAR_DO_CANNY_PRUNING, cvSize(30, 30), &dsp_sequence); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP HaarDetectObjects function (time: %f ms)\n", t_algo / 1000.0); // 4.b Mark and print list of matches detected by DSP if (dsp_sequence == NULL) { printf("DSP sequence returned NULL\n"); } else { printf("DSP sequence contains %i elements:\n", dsp_sequence->total); for (i = 0; i < dsp_sequence->total; i++) { r = (CvRect *)cvGetSeqElem(dsp_sequence, i); printf("%4i: %4i, %4i (%ix%i)\n", i, r->x, r->y, r->width, r->height); // mark with thin red rectangle c1.x = r->x; c1.y = r->y; c2.x = r->x + r->width; c2.y = r->y + r->height; cvRectangle(image_color, c1, c2, red, 1, 8, 0); } } // 7. Save marked image to filesystem cvSaveImage("./output.png", image_color, 0); cvReleaseImage(&image); cvReleaseImage(&image_color); LOCAL_restore_cascade(cascade); cvReleaseHaarClassifierCascade(&cascade); cvReleaseMemStorage(&storage); printf("C6accel_cvHaarDetectObjects test completed successfully\n"); return 1; } Int c6accel_test_Cascade(char *cascade_file_name) { CvHaarClassifierCascade *cascade; printf("Reading Cascade\n"); cascade = (CvHaarClassifierCascade *)cvLoad("opencv_images/haarcascade_frontalface_alt2.xml", 0, 0, 0); printf("Reading of Cascade complete\n"); traverse_and_translate_cascade(cascade); printf("Traverse and translate complete\n"); printf("count: %x\n", cascade->count); LOCAL_restore_cascade(cascade); cvReleaseHaarClassifierCascade(&cascade); printf("C6accel_Cascade test completed successfully\n"); return 1; } Int c6accel_test_cvGoodFeaturesToTrack(C6accel_Handle hC6accel, char *input_file_name, int n) { IplImage *input_image, *output_image, *eig_image, *temp_image; CvPoint2D32f *arm_corners, *dsp_corners, *arm_corners_rough, *dsp_corners_rough; int arm_cornerCount = 256, dsp_cornerCount = 256; CvScalar red = {0, 0, 255, 255}, black = {0, 0, 0, 255}; CvPoint c1, c2; struct timeval startTime, endTime; int t_overhead, t_algo, i; printf("cvGoodFeaturesToTrack Test (%s, %i)\n", input_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image and create working images input_image = cvLoadImage(input_file_name, CV_LOAD_IMAGE_GRAYSCALE); output_image = cvLoadImage(input_file_name, CV_LOAD_IMAGE_COLOR); eig_image = cvCreateImage(cvGetSize(input_image), 32, 1); temp_image = cvCreateImage(cvGetSize(input_image), 32, 1); // 2. Allocate output buffers arm_corners = (CvPoint2D32f *)cvAlloc(256 * sizeof(CvPoint2D32f)); dsp_corners = (CvPoint2D32f *)cvAlloc(256 * sizeof(CvPoint2D32f)); arm_corners_rough = (CvPoint2D32f *)cvAlloc(256 * sizeof(CvPoint2D32f)); dsp_corners_rough = (CvPoint2D32f *)cvAlloc(256 * sizeof(CvPoint2D32f)); // 3.a Apply ARM algorithm to find features cvGoodFeaturesToTrack(input_image, eig_image, temp_image, arm_corners, &arm_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvGoodFeaturesToTrack(input_image, eig_image, temp_image, arm_corners, &arm_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM GoodFeaturesToTrack function (time: %f ms)\n", t_algo / 1000.0 / n); // 3.b Apply DSP algorithm to find features C6accel_cvGoodFeaturesToTrack(hC6accel, input_image, eig_image, temp_image, dsp_corners, &dsp_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvGoodFeaturesToTrack(hC6accel, input_image, eig_image, temp_image, dsp_corners, &dsp_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP GoodFeaturesToTrack function (time: %f ms)\n", t_algo / 1000.0 / n); // 4. Save rough corner locations so we can accurately benchmark the refinement functions memcpy(arm_corners_rough, arm_corners, arm_cornerCount * sizeof(CvPoint2D32f)); memcpy(dsp_corners_rough, dsp_corners, dsp_cornerCount * sizeof(CvPoint2D32f)); // printf("I finished memcpy\n"); // 4.a Apply ARM algorithm to refine features //cvFindCorner SubPix issue to be resolved cvFindCornerSubPix(input_image, dsp_corners, dsp_cornerCount, cvSize(10, 10), cvSize(-1, -1), cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03)); t_algo = 0; for (i = 0; i < n; i++) { memcpy(arm_corners, arm_corners_rough, arm_cornerCount * sizeof(CvPoint2D32f)); printf("ARM Benchmark begin \n"); gettimeofday(&startTime, NULL); cvFindCornerSubPix(input_image, arm_corners, arm_cornerCount, cvSize(10, 10), cvSize(-1, -1), cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03)); gettimeofday(&endTime, NULL); t_algo += (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; } printf("Called ARM FindCornerSubPix function (time: %f ms)\n", t_algo / 1000.0 / n); // 4.b Apply DSP algorithm to refine features C6accel_cvFindCornerSubPix(hC6accel, input_image, dsp_corners, dsp_cornerCount, cvSize(10, 10), cvSize(-1, -1), cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03)); t_algo = 0; for (i = 0; i < n; i++) { memcpy(dsp_corners, dsp_corners_rough, dsp_cornerCount * sizeof(CvPoint2D32f)); //printf("DSP Benchmark begin \n"); gettimeofday(&startTime, NULL); C6accel_cvFindCornerSubPix(hC6accel, input_image, dsp_corners, dsp_cornerCount, cvSize(10, 10), cvSize(-1, -1), cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03)); gettimeofday(&endTime, NULL); t_algo += (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; } printf("Called DSP FindCornerSubPix function (time: %f ms)\n", t_algo / 1000.0 / n); // 5.a Mark features detected by ARM and report number of elements /* printf("ARM list contains %i features.\n", arm_cornerCount); for (i = 0; i < arm_cornerCount; i++) { //printf("%3i: (%10f,%10f) -> (%10f,%10f)\n", i, // arm_corners_rough[i].x, arm_corners_rough[i].y, // arm_corners[i].x, arm_corners[i].y); // mark with large black square c1.x = cvRound(arm_corners[i].x) - 2; c1.y = cvRound(arm_corners[i].y) - 2; c2.x = cvRound(arm_corners[i].x) + 2; c2.y = cvRound(arm_corners[i].y) + 2; cvRectangle(output_image, c1, c2, black, -1, 8, 0); }*/ // 5.b Mark and print list of matches detected by DSP printf("DSP list contains %i features.\n", dsp_cornerCount); for (i = 0; i < dsp_cornerCount; i++) { //printf("%3i: (%10f,%10f) -> (%10f,%10f)\n", i, // dsp_corners_rough[i].x, dsp_corners_rough[i].y, // dsp_corners[i].x, dsp_corners[i].y); // mark with small red square c1.x = cvRound(dsp_corners[i].x) - 1; c1.y = cvRound(dsp_corners[i].y) - 1; c2.x = cvRound(dsp_corners[i].x) + 1; c2.y = cvRound(dsp_corners[i].y) + 1; cvRectangle(output_image, c1, c2, red, -1, 8, 0); } // 5. Save marked image to filesystem cvSaveImage("./output.png", output_image, 0); printf("Saved image\n"); //6. Free memory allocated for the images cvReleaseImage(&output_image); cvReleaseImage(&eig_image); cvReleaseImage(&input_image); cvReleaseImage(&temp_image); cvFree(&arm_corners); cvFree(&dsp_corners); printf("dsp_corners free\n"); printf("dsp_corner_phys: %x\n",CMEM_getPhys(dsp_corners_rough)); cvFree(&dsp_corners_rough); cvFree(&arm_corners_rough); printf("C6accel_cvGoodFeaturesToTrack test completed successfully\n"); return 1; } Int c6accel_test_cvCalcOpticalFlowPyrLK(C6accel_Handle hC6accel, char *input_file_name_1, char *input_file_name_2, int n) { IplImage *prev_input_image, *curr_input_image, *eig_image, *temp_image, *prev_pyramid, *curr_pyramid, *output_image; CvPoint2D32f *prev_corners, *arm_curr_corners, *dsp_curr_corners; char *arm_status, *dsp_status; int arm_cornerCount = 1024, dsp_cornerCount = 1024; CvScalar red = {0, 0, 255, 255}, black = {0, 0, 0, 255}; CvPoint c1, c2; struct timeval startTime, endTime; int t_overhead, t_algo, i; printf("cvCalcOpticalFlowPyrLK Test (%s -> %s, %i)\n", input_file_name_1, input_file_name_2, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image and create working images prev_input_image = cvLoadImage(input_file_name_1, CV_LOAD_IMAGE_GRAYSCALE); curr_input_image = cvLoadImage(input_file_name_2, CV_LOAD_IMAGE_GRAYSCALE); output_image = cvLoadImage(input_file_name_2, CV_LOAD_IMAGE_COLOR); eig_image = cvCreateImage(cvGetSize(prev_input_image), 32, 1); temp_image = cvCreateImage(cvGetSize(prev_input_image), 32, 1); prev_pyramid = cvCreateImage(cvGetSize(prev_input_image), 8, 1); curr_pyramid = cvCreateImage(cvGetSize(prev_input_image), 8, 1); // 2. Allocate output buffers prev_corners = (CvPoint2D32f *)cvAlloc(arm_cornerCount * sizeof(CvPoint2D32f)); arm_curr_corners = (CvPoint2D32f *)cvAlloc(arm_cornerCount * sizeof(CvPoint2D32f)); dsp_curr_corners = (CvPoint2D32f *)cvAlloc(dsp_cornerCount * sizeof(CvPoint2D32f)); arm_status = (char *)cvAlloc(arm_cornerCount * sizeof(char)); dsp_status = (char *)cvAlloc(dsp_cornerCount * sizeof(char)); // 3. Apply ARM algorithm to find features cvGoodFeaturesToTrack(prev_input_image, eig_image, temp_image, prev_corners, &arm_cornerCount, 0.01, 10.0, NULL, 3, 0, 0.04); dsp_cornerCount = arm_cornerCount; // 4.a Apply ARM algorithm to refine features cvCalcOpticalFlowPyrLK(prev_input_image, curr_input_image, prev_pyramid, curr_pyramid, prev_corners, arm_curr_corners, arm_cornerCount, cvSize(10, 10), 3, arm_status, NULL, cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03), 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) { cvCalcOpticalFlowPyrLK(prev_input_image, curr_input_image, prev_pyramid, curr_pyramid, prev_corners, arm_curr_corners, arm_cornerCount, cvSize(10, 10), 3, arm_status, NULL, cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03), 0); } gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM cvCalcOpticalFlowPyrLK function (time: %f ms)\n", t_algo / 1000.0 / n); // 4.b Apply DSP algorithm to refine features C6accel_cvCalcOpticalFlowPyrLK(hC6accel, prev_input_image, curr_input_image, prev_pyramid, curr_pyramid, prev_corners, dsp_curr_corners, dsp_cornerCount, cvSize(10, 10), 3, dsp_status, NULL, cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03), 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) { C6accel_cvCalcOpticalFlowPyrLK(hC6accel, prev_input_image, curr_input_image, prev_pyramid, curr_pyramid, prev_corners, dsp_curr_corners, dsp_cornerCount, cvSize(10, 10), 3, dsp_status, NULL, cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03), 0); } gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP cvCalcOpticalFlowPyrLK function (time: %f ms)\n", t_algo / 1000.0 / n); // 5.a Mark motion detected by ARM and report number of elements printf("ARM list contains %i features.\n", arm_cornerCount); for (i = 0; i < arm_cornerCount; i++) { //printf("%3i: (%10f,%10f) -> (%10f,%10f)\n", i, // arm_corners_rough[i].x, arm_corners_rough[i].y, // arm_corners[i].x, arm_corners[i].y); // mark with thick black line c1.x = cvRound(prev_corners[i].x); c1.y = cvRound(prev_corners[i].y); c2.x = cvRound(arm_curr_corners[i].x); c2.y = cvRound(arm_curr_corners[i].y); cvCircle( output_image, c1, 2, CVX_GRAY50, -1,8,0 ); //cvLine(output_image, c1, c2, black, 2, 8, 0); } // 5.b Mark and print list of matches detected by DSP printf("DSP list contains %i features.\n", dsp_cornerCount); for (i = 0; i < dsp_cornerCount; i++) { //printf("%3i: (%10f,%10f) -> (%10f,%10f)\n", i, // dsp_corners_rough[i].x, dsp_corners_rough[i].y, // dsp_corners[i].x, dsp_corners[i].y); // mark with thin red line c1.x = cvRound(prev_corners[i].x); c1.y = cvRound(prev_corners[i].y); c2.x = cvRound(dsp_curr_corners[i].x); c2.y = cvRound(dsp_curr_corners[i].y); cvLine(output_image, c1, c2, red, 1, 8, 0); } // 5. Save marked image to filesystem cvSaveImage("./output.png", output_image, 0); cvReleaseImage(&prev_input_image); cvReleaseImage(&curr_input_image); cvReleaseImage(&output_image); cvReleaseImage(&eig_image); cvReleaseImage(&temp_image); cvReleaseImage(&prev_pyramid); cvReleaseImage(&curr_pyramid); // 2. Allocate output buffers prev_corners = cvFree(&prev_corners); arm_curr_corners = cvFree(&arm_curr_corners); dsp_curr_corners = cvFree(&dsp_curr_corners); arm_status = cvFree(&arm_status); dsp_status = cvFree(&dsp_status); printf("C6accel_cvCalcOpticalFlowPyrLK test completed successfully\n"); return 1; } Int c6accel_test_cvMatchTemplate(C6accel_Handle hC6accel, char *input_file_name, char *template_file_name, int n) { IplImage *input_image, *template_image, *arm_output_image, *dsp_output_image, *arm_scale_image, *dsp_scale_image; CvSize output_size; struct timeval startTime, endTime; int t_overhead, t_algo, i; printf("cvMatchTemplate Test (%s, %s, %i)\n", input_file_name, template_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input and template images input_image = cvLoadImage(input_file_name, CV_LOAD_IMAGE_GRAYSCALE); template_image = cvLoadImage(template_file_name, CV_LOAD_IMAGE_GRAYSCALE); // 2. Allocate output images output_size = cvSize(input_image->width - template_image->width + 1, input_image->height - template_image->height + 1); arm_output_image = cvCreateImage(output_size, 32, 1); arm_scale_image = cvCreateImage(output_size, 8, 1); dsp_output_image = cvCreateImage(output_size, 32, 1); dsp_scale_image = cvCreateImage(output_size, 8, 1); cvSetZero(arm_output_image); cvSetZero(dsp_output_image); // 3.a Apply ARM algorithm to match template cvMatchTemplate(input_image, template_image, arm_output_image, CV_TM_SQDIFF); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvMatchTemplate(input_image, template_image, arm_output_image, CV_TM_SQDIFF); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM cvMatchTemplate function (time: %f ms)\n", t_algo / 1000.0 / n); // 4.b Apply DSP algorithm to refine features C6accel_cvMatchTemplate(hC6accel, input_image, template_image, dsp_output_image, CV_TM_SQDIFF); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvMatchTemplate(hC6accel, input_image, template_image, dsp_output_image, CV_TM_SQDIFF); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP cvMatchTemplate function (time: %f ms)\n", t_algo / 1000.0 / n); // 5. Normalize output images cvNormalize(arm_output_image, arm_scale_image, 0, 255, CV_MINMAX, NULL); cvNormalize(dsp_output_image, dsp_scale_image, 0, 255, CV_MINMAX, NULL); // 6. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(arm_scale_image, dsp_scale_image, CV_L2, NULL)); // 7. Save output images to filesystem cvSaveImage("./output_arm.png", arm_scale_image, 0); cvSaveImage("./output_dsp.png", dsp_scale_image, 0); // 8. Free memory allocated to images cvReleaseImage(&template_image); cvReleaseImage(&input_image); cvReleaseImage(&arm_output_image); cvReleaseImage(&arm_scale_image); cvReleaseImage(&dsp_output_image); cvReleaseImage(&dsp_scale_image); printf("C6accel_cvMatchTemplate test completed successfully\n"); return 1; } Int c6accel_test_cvMulSpectrums(C6accel_Handle hC6accel, char *input_file_name_1, char *input_file_name_2, int n) { IplImage *input_image_1, *input_image_2, *input_image_1f, *input_image_2f, *dft_image_1, *dft_image_2, *arm_mult_image, *dsp_mult_image, *arm_idft_image, *dsp_idft_image, *arm_norm_image, *dsp_norm_image; CvSize dft_size; struct timeval startTime, endTime; int t_overhead, t_algo, i; printf("cvMulSpectrums Test (%s, %s, %i)\n", input_file_name_1, input_file_name_2, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input images and check that sizes match input_image_1 = cvLoadImage(input_file_name_1, CV_LOAD_IMAGE_GRAYSCALE); input_image_2 = cvLoadImage(input_file_name_2, CV_LOAD_IMAGE_GRAYSCALE); if ((input_image_1->width != input_image_2->width) || (input_image_1->height != input_image_2->height)) { printf("Image size mismatch; cvMulSpectrums Test aborted!\n"); return -1; } // 2. Allocate working and output images (and convert input images to floating point) dft_size = cvSize(input_image_1->width, input_image_1->height); input_image_1f = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); input_image_2f = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); dft_image_1 = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); dft_image_2 = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); arm_mult_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); arm_idft_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); arm_norm_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); dsp_mult_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); dsp_idft_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); dsp_norm_image = cvCreateImage(dft_size, IPL_DEPTH_32F, 1); cvConvertScale(input_image_1, input_image_1f, 1.0f / 255.0f, 0); cvConvertScale(input_image_2, input_image_2f, 1.0f / 255.0f, 0); // 3. Apply ARM algorithm to calculate DFT for images 1, 2 (don't time) cvDFT(input_image_1f, dft_image_1, CV_DXT_FORWARD, 0); cvDFT(input_image_2f, dft_image_2, CV_DXT_FORWARD, 0); // 4.a Apply ARM algorithm to multiply spectrums cvMulSpectrums(dft_image_1, dft_image_2, arm_mult_image, 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvMulSpectrums(dft_image_1, dft_image_2, arm_mult_image, 0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM cvMulSpectrums function (time: %f ms)\n", t_algo / 1000.0 / n); // 4.b Apply DSP algorithm to multiply spectrums C6accel_cvMulSpectrums(hC6accel, dft_image_1, dft_image_2, dsp_mult_image, 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvMulSpectrums(hC6accel, dft_image_1, dft_image_2, dsp_mult_image, 0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP cvMulSpectrums function (time: %f ms)\n", t_algo / 1000.0 / n); // 5.a Apply ARM IDFT algorithm cvDFT(arm_mult_image, arm_idft_image, CV_DXT_INVERSE, 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvDFT(arm_mult_image, arm_idft_image, CV_DXT_INVERSE, 0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM cvDFT function (time: %f ms)\n", t_algo / 1000.0 / n); // 5.b Apply DSP IDFT algorithm C6accel_cvDFT(hC6accel, dsp_mult_image, dsp_idft_image, CV_DXT_INVERSE, 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvDFT(hC6accel, dsp_mult_image, dsp_idft_image, CV_DXT_INVERSE, 0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP cvDFT function (time: %f ms)\n", t_algo / 1000.0 / n); // 6.a Apply ARM algorithm to normalize image cvNormalize(arm_idft_image, arm_norm_image, 0, 255, CV_MINMAX, NULL); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvNormalize(arm_idft_image, arm_norm_image, 0, 255, CV_MINMAX, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM cvNormalize function (time: %f ms)\n", t_algo / 1000.0 / n); // 6.b Apply DSP algorithm to normalize image C6accel_cvNormalize(hC6accel, dsp_idft_image, dsp_norm_image, 0, 255, CV_MINMAX, NULL); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvNormalize(hC6accel, dsp_idft_image, dsp_norm_image, 0, 255, CV_MINMAX, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP cvNormalize function (time: %f ms)\n", t_algo / 1000.0 / n); // 7. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(arm_norm_image, dsp_norm_image, CV_L2, NULL)); // 8. Save output images to filesystem cvSaveImage("./output_arm.png", arm_norm_image, 0); cvSaveImage("./output_dsp.png", dsp_norm_image, 0); //9. Free memory allocated to the images cvReleaseImage(&input_image_1); cvReleaseImage(&input_image_2); cvReleaseImage(&dft_image_1); cvReleaseImage(&dft_image_2); cvReleaseImage(&input_image_1f); cvReleaseImage(&input_image_2f); cvReleaseImage(&arm_mult_image); cvReleaseImage(&arm_idft_image); cvReleaseImage(&arm_norm_image); cvReleaseImage(&dsp_mult_image); cvReleaseImage(&dsp_idft_image); cvReleaseImage(&dsp_norm_image); printf("C6accel_cvMulSpectrums test completed successfully\n"); return 1; } Int c6accel_test_cvNorm(C6accel_Handle hC6accel, char *input_file_name, int n) { IplImage *input_image; double arm_min_val, arm_max_val, dsp_min_val, dsp_max_val, arm_norm, dsp_norm; CvPoint arm_min_loc, arm_max_loc, dsp_min_loc, dsp_max_loc; struct timeval startTime, endTime; int t_overhead, t_algo, i; printf("cvNorm Test (%s, %i)\n", input_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image input_image = cvLoadImage(input_file_name, CV_LOAD_IMAGE_GRAYSCALE); // 2.a Apply ARM algorithm to find min/max pixels cvMinMaxLoc(input_image, &arm_min_val, &arm_max_val, &arm_min_loc, &arm_max_loc, NULL); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvMinMaxLoc(input_image, &arm_min_val, &arm_max_val, &arm_min_loc, &arm_max_loc, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM cvMinMaxLoc function (time: %f ms)\n", t_algo / 1000.0 / n); // 2.b Apply DSP algorithm to find min/max pixels C6accel_cvMinMaxLoc(hC6accel, input_image, &dsp_min_val, &dsp_max_val, &dsp_min_loc, &dsp_max_loc, NULL); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvMinMaxLoc(hC6accel, input_image, &dsp_min_val, &dsp_max_val, &dsp_min_loc, &dsp_max_loc, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP cvMinMaxLoc function (time: %f ms)\n", t_algo / 1000.0 / n); // 3.a Apply ARM algorithm to find norm arm_norm = cvNorm(input_image, NULL, CV_L2, NULL); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) arm_norm = cvNorm(input_image, NULL, CV_L2, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM cvNorm function (time: %f ms)\n", t_algo / 1000.0 / n); // 3.b Apply DSP algorithm to find norm C6accel_cvNorm(hC6accel, input_image, NULL, CV_L2, NULL, &dsp_norm); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvNorm(hC6accel, input_image, NULL, CV_L2, NULL, &dsp_norm); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP cvNorm function (time: %f ms)\n", t_algo / 1000.0 / n); // 4. Compare outputs printf("ARM image statistics:\n\tL2 Norm:\t%f\n\tMin Val:\t%f\t(%i, %i)\n\tMax Val:\t%f\t(%i, %i)\n", arm_norm, arm_min_val, arm_min_loc.x, arm_min_loc.y, arm_max_val, arm_max_loc.x, arm_max_loc.y); printf("DSP image statistics:\n\tL2 Norm:\t%f\n\tMin Val:\t%f\t(%i, %i)\n\tMax Val:\t%f\t(%i, %i)\n", dsp_norm, dsp_min_val, dsp_min_loc.x, dsp_min_loc.y, dsp_max_val, dsp_max_loc.x, dsp_max_loc.y); cvReleaseImage(&input_image); printf("C6accel_cvNorm test completed successfully\n"); return 1; } Int c6accel_test_cvIntegral(C6accel_Handle hC6accel, char *input_file_name, int n) { IplImage *input_image, *arm_sum_image, *arm_norm_image, *dsp_sum_image, *dsp_norm_image; CvSize integral_size; struct timeval startTime, endTime; int t_overhead, t_algo, i; printf("cvIntegral Test (%s, %i)\n", input_file_name, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input image input_image = cvLoadImage(input_file_name, CV_LOAD_IMAGE_GRAYSCALE); // 2. Create output images integral_size = cvSize(input_image->width + 1, input_image->height + 1); arm_sum_image = cvCreateImage(integral_size, IPL_DEPTH_32S, 1); arm_norm_image = cvCreateImage(integral_size, IPL_DEPTH_32S, 1); dsp_sum_image = cvCreateImage(integral_size, IPL_DEPTH_32S, 1); dsp_norm_image = cvCreateImage(integral_size, IPL_DEPTH_32S, 1); // 3.a Apply ARM integral algorithm cvIntegral(input_image, arm_sum_image, NULL, NULL); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvIntegral(input_image, arm_sum_image, NULL, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called ARM cvIntegral function (time: %f ms)\n", t_algo / 1000.0 / n); // 3.b Apply DSP integral algorithm C6accel_cvIntegral(hC6accel, input_image, dsp_sum_image, NULL, NULL); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvIntegral(hC6accel, input_image, dsp_sum_image, NULL, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; printf("Called DSP cvIntegral function (time: %f ms)\n", t_algo / 1000.0 / n); // 4. Normalize output images cvNormalize(arm_sum_image, arm_norm_image, 0, 255, CV_MINMAX, NULL); cvNormalize(dsp_sum_image, dsp_norm_image, 0, 255, CV_MINMAX, NULL); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(arm_norm_image, dsp_norm_image, CV_L2, NULL)); // 6. Save output images to filesystem cvSaveImage("./output_arm.png", arm_norm_image, 0); cvSaveImage("./output_dsp.png", dsp_norm_image, 0); // 7. Free memory allocated to images cvReleaseImage(&input_image); cvReleaseImage(&arm_sum_image); cvReleaseImage(&arm_norm_image); cvReleaseImage(&dsp_sum_image); cvReleaseImage(&dsp_norm_image); printf("C6accel_cvIntegral test completed successfully\n"); return 1; } Int c6accel_test_cvAdd(C6accel_Handle hC6accel, char *input_file_name1,char *input_file_name2, int n) { IplImage *inputImg1,*inputImg2, *outputImg_arm, *outputImg_dsp; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; printf("cvAdd Test (%s, %i iterations)\n", input_file_name1, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input images from file inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); inputImg2 = cvLoadImage( input_file_name2, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); // 3.a Apply ARM algorithm cvAdd(inputImg1,inputImg2,outputImg_arm,NULL); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvAdd(inputImg1,inputImg2, outputImg_arm, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Add function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 3.b Apply DSP algorithm C6accel_cvAdd(hC6accel, inputImg1,inputImg2,outputImg_dsp,NULL); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvAdd(hC6accel, inputImg1, inputImg2, outputImg_dsp,NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Add function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); // 5. Save outputs to filesystem cvSaveImage("./output_arm.png", outputImg_arm, 0); cvSaveImage("./output_dsp.png", outputImg_dsp, 0); // 6. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&inputImg2); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); printf("C6accel_cvAdd test completed successfully; outputs saved to filesystem\n"); return 1; } Int c6accel_test_cvAddS(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; CvScalar red = {0, 0, 255, 255}; printf("cvAddS Test (%s, %i iterations)\n", input_file_name1, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input images from file inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); // 3.a Apply ARM algorithm cvAddS(inputImg1,red,outputImg_arm,NULL); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvAddS(inputImg1,red, outputImg_arm, NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Add function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 3.b Apply DSP algorithm C6accel_cvAddS(hC6accel, inputImg1,red,outputImg_dsp,NULL); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvAddS(hC6accel, inputImg1, red, outputImg_dsp,NULL); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP AddS function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); // 6. Save outputs to filesystem cvSaveImage("./output_arm.png", outputImg_arm, 0); cvSaveImage("./output_dsp.png", outputImg_dsp, 0); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); printf("C6accel_cvAddS test completed successfully; outputs saved to filesystem\n"); return 1; } Int c6accel_test_cvAbsDiff(C6accel_Handle hC6accel, char *input_file_name1,char *input_file_name2, int n) { IplImage *inputImg1,*inputImg2, *outputImg_arm, *outputImg_dsp; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; printf("cvAbsDiff Test (%s, %i iterations)\n", input_file_name1, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input images from file inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); inputImg2 = cvLoadImage( input_file_name2, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); // 3.a Apply ARM algorithm cvAbsDiff(inputImg1,inputImg2,outputImg_arm); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvAbsDiff(inputImg1,inputImg2, outputImg_arm); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM AbsDiff function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 3.b Apply DSP algorithm C6accel_cvAbsDiff(hC6accel, inputImg1,inputImg2,outputImg_dsp); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvAbsDiff(hC6accel, inputImg1, inputImg2, outputImg_dsp); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP AbsDiff function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); // 6. Save outputs to filesystem cvSaveImage("./output_arm.png", outputImg_arm, 0); cvSaveImage("./output_dsp.png", outputImg_dsp, 0); //7. Free memeory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&inputImg2); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); printf("C6accel_cvAbsDiff test completed successfully; outputs saved to filesystem\n"); return 1; } Int c6accel_test_cvAbsDiffS(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; CvScalar value = cvScalarAll(0.0); printf("cvAbsDiffS Test (%s, %i iterations)\n", input_file_name1, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input images from file inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); // 3.a Apply ARM algorithm cvAbsDiffS(inputImg1,outputImg_arm,value); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvAbsDiffS(inputImg1, outputImg_arm,value); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM AbsDiffS function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 3.b Apply DSP algorithm C6accel_cvAbsDiffS(hC6accel, inputImg1,outputImg_arm,value); // run once before timing gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvAbsDiffS(hC6accel, inputImg1, outputImg_dsp,value); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP AbsDiffS function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 4. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); // 5. Save outputs to filesystem cvSaveImage("./output_arm.png", outputImg_arm, 0); cvSaveImage("./output_dsp.png", outputImg_dsp, 0); //6. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); printf("C6accel_cvAbsDiffS test completed successfully; outputs saved to filesystem\n"); return 1; } Int C6accel_test_contours(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *inputImg2, *outputImg_arm, *outputImg_dsp; IplImage* g_gray = NULL; double g_thresh = 100.0; CvSeq* contours = 0, *contour2; CvMemStorage* g_storage = NULL; int status; struct timeval startTime, endTime; int t_overhead=0, t_algo, i; double t_br_arm=0.0, t_dc_arm=0.0, t_ca_arm=0.0,t_br_dsp=0.0, t_dc_dsp=0.0, t_ca_dsp=0.0; float t_avg; CvScalar value = cvScalarAll(0.0); CvRect boundbox; double area,area_arm,area_dsp, Total_area_arm, Total_area_dsp; //Important: For DSP implementation pass pointer and allocate memroy from CMEM CvRect *boundbox_ptr; void *temp_ptr; printf("cvThreshold Test (%s, %i iterations)\n", input_file_name1, n); // initialize timer t_overhead = get_overhead_time(); printf("(Overhead time is %f ms.)\n", t_overhead / 1000.0); // 1. Read input images from file inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); inputImg2 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); //allocate memory for CvRect from CMEM boundbox_ptr=Memory_alloc(sizeof(CvRect), &testfxnsMemParams); // 2. Allocate output images (must have same depth, channels as input) // cvThreshold supports only single chanel output(8bit) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1); g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),8,1); //3. Create storage for the contour g_storage = cvCreateMemStorage(0); temp_ptr = cvMemStorageAlloc(g_storage, 64); contours = cvCreateSeq(0,sizeof(CvSeq),sizeof(CvPoint), g_storage); cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY ); gettimeofday(&startTime, NULL); // 3.a Apply ARM algorithm for (i = 0; i < n; i++) cvThreshold(g_gray, outputImg_arm , g_thresh, 255.0, CV_THRESH_BINARY ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvThreshold function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 3.b Apply DSP algorithm gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) status = C6accel_cvThreshold(hC6accel, g_gray, outputImg_dsp , g_thresh, 255.0, CV_THRESH_BINARY ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvThreshold function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_arm_thresh.png",outputImg_arm , 0); cvSaveImage("./output_dsp_thresh.png",outputImg_dsp , 0); printf("Find Contours Called\n"); //6 Test for drawing functions and contour features :Bounding Rect, DrawContours, ContourArea C6accel_cvFindContours(hC6accel,outputImg_arm, g_storage, &contours, sizeof(CvContour),CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0) ); printf("Find Contours Complete\n"); gettimeofday(&startTime, NULL); boundbox = cvBoundingRect(contours,1); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvBoundingRect function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); C6accel_cvBoundingRect(hC6accel,contours,boundbox_ptr,1); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvBoundingRect function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); cvDrawContours(inputImg1, contours,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvDrawContour function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); C6accel_cvDrawContours(hC6accel,inputImg2, contours,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvDrawContour function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); cvContourArea( (void *)contours,CV_WHOLE_SEQ,0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvContourArea function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); C6accel_cvContourArea(hC6accel, contours,CV_WHOLE_SEQ,&area ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvContourArea function %i times (average time: %f ms)\n", n, t_avg / 1000.0); i=0; Total_area_arm = 0.0; Total_area_dsp = 0.0; gettimeofday(&startTime, NULL); for(; contours; contours = contours->h_next) { if( contours ){ i++; //ARM code boundbox = cvBoundingRect(contours,1); cvRectangle(inputImg1, cvPoint(boundbox.x, boundbox.y), cvPoint(boundbox.x+boundbox.width, boundbox.y+boundbox.height), cvScalar(255,0,0,0), 1, 8, 0); //DSP code //Find minimal bounding box for each sequence C6accel_cvBoundingRect(hC6accel,contours,boundbox_ptr,1); cvRectangle(inputImg2, cvPoint(boundbox_ptr->x, boundbox_ptr->y), cvPoint(boundbox_ptr->x+boundbox_ptr->width, boundbox_ptr->y+boundbox_ptr->height), cvScalar(255,0,0,0), 1, 8, 0); cvDrawContours(inputImg1, contours,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) ); C6accel_cvDrawContours(hC6accel,inputImg2, contours,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) ); area_arm = fabs(cvContourArea( (void *)contours,CV_WHOLE_SEQ,0)); Total_area_arm += area_arm; C6accel_cvContourArea(hC6accel, contours,CV_WHOLE_SEQ,&area ); Total_area_dsp += fabs(area); } } gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Plotting function %i times (average time: %f ms)\n", n, t_avg / 1000.0); printf("Difference between total area calculated using ContourArea =%f\n", fabs(Total_area_dsp-Total_area_arm) ); // 6. Save outputs to filesystem cvSaveImage("./output_arm_contour.png",inputImg1 , 0); cvSaveImage("./output_dsp_contour.png",inputImg2 , 0); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&g_gray); cvReleaseImage(&inputImg2); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); cvReleaseMemStorage(&g_storage); printf("Test for contours completed successfully; outputs saved to filesystem\n"); return 1; } Int C6Accel_test_Matchshapes(C6accel_Handle hC6accel, char *input_file_name1, char *input_file_name2,int n) { IplImage *inputImg1, *inputImg2, *outputImg_arm, *outputImg_dsp; IplImage* g_gray = NULL; double g_thresh = 100.0; CvSeq *contour1,*contour2, *tmp1, *tmp2; CvMemStorage* g_storage_1 = NULL; CvMemStorage* g_storage_2 = NULL; int status; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; void *temp_ptr_1,*temp_ptr_2; double measure,measure_arm; unsigned int var; // 1. Read input images from file inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); inputImg2 = cvLoadImage( input_file_name2, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) // cvThreshold supports only single chanel output(8bit) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1); outputImg_dsp = cvCreateImage(cvSize(inputImg2->width, inputImg1->height), 8,1); g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),8,1); //3.Create storage for the contour g_storage_1 = cvCreateMemStorage(0); temp_ptr_1 = cvMemStorageAlloc(g_storage_1, 64); contour1 = cvCreateSeq(0,sizeof(CvSeq),sizeof(CvPoint), g_storage_1); cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY ); cvThreshold(g_gray, outputImg_arm , g_thresh, 255.0, CV_THRESH_BINARY ); cvFindContours(outputImg_arm,g_storage_1, &contour1,sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0)); g_storage_2 = cvCreateMemStorage(0); temp_ptr_2 = cvMemStorageAlloc(g_storage_2, 64); contour2 = cvCreateSeq(0,sizeof(CvSeq),sizeof(CvPoint), g_storage_1); cvCvtColor( inputImg2, g_gray, CV_BGR2GRAY ); cvThreshold(g_gray, outputImg_dsp , g_thresh, 255.0, CV_THRESH_BINARY ); cvFindContours(outputImg_dsp,g_storage_2, &contour2,sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0)); tmp1= contour1; tmp2= contour2; cvCopy(inputImg2,inputImg1,0); //ARM: Benchmark loop gettimeofday(&startTime, NULL); for(i=0; contour1,contour2; contour1 = contour1->h_next,contour2 = contour2->h_next) { measure_arm = cvMatchShapes(contour2,contour1, 2,1.0); } gettimeofday(&endTime, NULL); //ARM: Draw loop contour1=tmp1; contour2=tmp2; for(i=0; contour1,contour2; contour1 = contour1->h_next,contour2 = contour2->h_next) { measure_arm = cvMatchShapes(contour2,contour1, 2,1.0); //Code to plot matched and unmatched contours printf("match_arm = %f\n",(double)measure_arm); if(measure_arm<0.7){ //arbitary threshold cvDrawContours(inputImg1, contour2,CV_RGB(0,255,0),CV_RGB(0,255,0),-1, 3,8, cvPoint(0,0) ); } if(measure_arm>=0.7){//arbitary threshold cvDrawContours(inputImg1, contour2,CV_RGB(0,0,255),CV_RGB(0,0,255),-1, 3,8, cvPoint(0,0) ); } } t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvMatchShapes function %i times (average time: %f ms)\n", n, t_avg / 1000.0); contour1=tmp1; contour2=tmp2; //DSP: Benchmark loop gettimeofday(&startTime, NULL); for(i=0; contour1,contour2; contour1 = contour1->h_next,contour2 = contour2->h_next) { C6accel_cvMatchShapes(hC6accel,contour1,contour2, 2,1.0,&measure); } gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvMatchShapes function %i times (average time: %f ms)\n", n, t_avg / 1000.0); contour1=tmp1; contour2=tmp2; //DSP: Draw loop for(i=0; contour1,contour2; contour1 = contour1->h_next,contour2 = contour2->h_next) { C6accel_cvMatchShapes(hC6accel,contour1,contour2, 2,1.0,&measure); printf("match_dsp = %f\n",(double)measure); if(measure<0.7){ cvDrawContours(inputImg2, contour2,CV_RGB(0,255,0),CV_RGB(0,255,0),-1, 3,8, cvPoint(0,0) ); } if(measure>=0.7){ cvDrawContours(inputImg2, contour2,CV_RGB(0,0,255),CV_RGB(0,0,255),-1, 3,8, cvPoint(0,0) ); } } gettimeofday(&endTime, NULL); // 6. Save outputs to filesystem cvSaveImage("./output_arm_matchcontour.png",inputImg1 , 0); cvSaveImage("./output_dsp_matchcontour.png",inputImg2 , 0); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&inputImg2); cvReleaseImage(&g_gray); cvReleaseMemStorage(&g_storage_1); cvReleaseMemStorage(&g_storage_2); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 0; } Int C6Accel_test_FindContours(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *inputImg2, *outputImg_arm, *outputImg_dsp; IplImage* g_gray = NULL; double g_thresh = 100.0; CvSeq *contour1 = NULL,*contour2=NULL , *tmp1, *tmp2; CvMemStorage* g_storage_1 = NULL; CvMemStorage* g_storage_2 = NULL; int status; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; void *temp_ptr_1,*temp_ptr_2; double measure,measure_arm; unsigned int var; int storage_size = 200*1024; // 200K /*** Storage size from one image to other *****/ // 1. Read input images from file inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); printf("inp_image->imagedata= %x\n",CMEM_getPhys(inputImg1->imageData)); inputImg2 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) // cvThreshold supports only single chanel output(8bit) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1); outputImg_dsp = cvCreateImage(cvSize(inputImg2->width, inputImg2->height), 8,1); g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),8,1); //Create storage for the contour //ARM loop g_storage_1 = cvCreateMemStorage(storage_size); temp_ptr_1 = cvMemStorageAlloc(g_storage_1, 64); cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY ); cvThreshold(g_gray, outputImg_arm , g_thresh, 255.0, CV_THRESH_BINARY ); gettimeofday(&startTime, NULL); cvFindContours(outputImg_arm,g_storage_1, &contour1,sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0)); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvFindContours function %i times (average time: %f ms)\n", n, t_avg / 1000.0); for(i=0;contour1;contour1=contour1->h_next,i++) { // status = contour1->flags; // printf("Flag=%x\n",status); cvDrawContours(inputImg1, contour1,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) ); } // printf("Press Enter\n"); // getchar(); //DSP loop g_storage_2 = cvCreateMemStorage(storage_size); temp_ptr_2 = cvMemStorageAlloc(g_storage_2, 64); contour2 = cvCreateSeq(0,sizeof(CvSeq),sizeof(CvPoint), g_storage_1); cvCvtColor( inputImg2, g_gray, CV_BGR2GRAY ); cvThreshold(g_gray, outputImg_dsp , g_thresh, 255.0, CV_THRESH_BINARY ); gettimeofday(&startTime, NULL); C6accel_cvFindContours(hC6accel, outputImg_dsp,g_storage_2, &contour2,sizeof(CvContour), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE,cvPoint(0,0)); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvFindContours function %i times (average time: %f ms)\n", n, t_avg / 1000.0); printf("Test for cvFindContours function called successfully. Output saved to filesystem\n"); for(i=0;contour2;contour2=contour2->h_next,i++) { // status = contour2->flags; // printf("Flag=%x\n",status); C6accel_cvDrawContours(hC6accel,inputImg2, contour2,CV_RGB(255,0,0),CV_RGB(0,255,0),-1, 1,8, cvPoint(0,0) ); } // 6. Save outputs to filesystem cvSaveImage("./output_arm_contour.png",inputImg1 , 0); cvSaveImage("./output_dsp_contour.png",inputImg2 , 0); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&inputImg2); cvReleaseImage(&g_gray); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); cvReleaseMemStorage(&g_storage_2); cvReleaseMemStorage(&g_storage_1); return 0; } Int C6Accel_test_Dilate(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; // 1. Load Input inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); //3. Benchmark the ARM call gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvDilate(inputImg1,outputImg_arm,NULL,pos); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvDilate function %i times (average time: %f ms)\n", n, t_avg / 1000.0); //4. Benchmark the DSP call gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvDilate(hC6accel,inputImg1,outputImg_dsp,NULL,pos); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvDilate function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); // 6. Save output cvSaveImage("./output_arm.png",outputImg_arm , 0); cvSaveImage("./output_dsp.png",outputImg_dsp , 0); printf("Test for Dilate operations done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_Erode(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvErode(inputImg1,outputImg_arm,NULL,pos); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvErode function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvErode(hC6accel,inputImg1,outputImg_dsp,NULL,pos); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvErode function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_arm.png",outputImg_arm , 0); cvSaveImage("./output_dsp.png",outputImg_dsp , 0); printf("Test for Erode operations done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_Laplace(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_16S, 1); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_16S, 1); g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),IPL_DEPTH_8U, 1); cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY ); gettimeofday(&startTime, NULL); //for (i = 0; i < n; i++) cvLaplace(g_gray,outputImg_arm ,3); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvLaplace function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); // for (i = 0; i < n; i++) C6accel_cvLaplace(hC6accel,g_gray,outputImg_dsp,3); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo /(float)n; printf("Called DSP cvLaplace function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_arm.png",outputImg_arm , 0); cvSaveImage("./output_dsp.png",outputImg_dsp , 0); printf("Test for Laplace operations done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&g_gray); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_PyrDown(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; inputImg1 = cvLoadImage( input_file_name1, 0); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width/2, inputImg1->height/2), IPL_DEPTH_8U, 1); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width/2, inputImg1->height/2), IPL_DEPTH_8U, 1); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvPyrDown(inputImg1,outputImg_arm ,CV_GAUSSIAN_5x5); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvPyrDown function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvPyrDown(hC6accel,inputImg1,outputImg_dsp,CV_GAUSSIAN_5x5); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvPyrDown function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_arm.png",outputImg_arm , 0); cvSaveImage("./output_dsp.png",outputImg_dsp , 0); printf("Test for PyrDown operations done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_Filter2D(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; CvMat *filter; int nFiltCols=5, nFiltRows =5; float kernel [25] = { 0,-1, 0,1,0, -1,-2,0,2,1, -1,-2,1,2,1, -1,-1,0,2,1, 0,-1,0,1,0}; float* pkernel; /* Allocate CMEM memory for 3x3 short mask*/ pkernel = Memory_alloc(25*sizeof(float), &testfxnsMemParams); memcpy( pkernel,kernel,25*sizeof(float)); inputImg1 = cvLoadImage( input_file_name1, 0); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_8U, 1); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_8U, 1); filter = cvCreateMat(nFiltRows, nFiltCols, CV_32FC1); cvSetData(filter,pkernel,nFiltCols*sizeof(float) ); printf("Mat =%x\n", CMEM_getPhys(filter)); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvFilter2D(inputImg1,outputImg_arm,filter,cvPoint(-1,-1)); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvFilter2D function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvFilter2D(hC6accel,inputImg1,outputImg_dsp,filter,cvPoint(-1,-1)); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvFilter2D function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_arm.png",outputImg_arm , 0); cvSaveImage("./output_dsp.png",outputImg_dsp , 0); printf("Test for Filter2D operations done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); //printf("Test for Filter2D operations done\n"); /* Release Gaussian CMEM */ Memory_free(pkernel,25*sizeof(float),&testfxnsMemParams); return 1; } Int C6Accel_test_Canny(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), 8,1); g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),8,1); //for (i=0;i<30;i++) cvCvtColor(inputImg1, g_gray, CV_BGR2GRAY ); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvCanny(g_gray,outputImg_arm ,10.0,100.0,3); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvCanny function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvCanny(hC6accel,g_gray,outputImg_dsp,(double)10.0,(double)100.00,3); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvCanny function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_arm.png",outputImg_arm , 0); cvSaveImage("./output_dsp.png",outputImg_dsp , 0); printf("Test for Canny edge detection done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&g_gray); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_CornerHarris(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *corner8; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; double minVal=0.0, maxVal=0.0; double scale, shift; double min=0, max=255; inputImg1 = cvLoadImage( input_file_name1, 0); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),IPL_DEPTH_32F ,1); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_32F ,1); corner8 = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),IPL_DEPTH_8U ,1); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvCornerHarris(inputImg1,outputImg_arm ,3,3, 0.04); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvCornerHarris function %i times (average time: %f ms)\n", n, t_avg / 1000.0); cvMinMaxLoc( outputImg_arm, &minVal, &maxVal, NULL, NULL, 0); scale = (max - min)/(maxVal-minVal); shift = -minVal * scale + min; cvConvertScale(outputImg_arm, corner8 ,scale,shift); cvSaveImage("./output_arm.png",corner8 , 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvCornerHarris(hC6accel,inputImg1,outputImg_dsp,3,3, 0.04); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvCornerHarris function %i times (average time: %f ms)\n", n, t_avg / 1000.0); cvMinMaxLoc( outputImg_dsp, &minVal, &maxVal, NULL, NULL, 0); scale = (max - min)/(maxVal-minVal); shift = -minVal * scale + min; cvConvertScale(outputImg_dsp, corner8 ,scale,shift); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_dsp.png",corner8 , 0); printf("Test for cornerharris edge detection done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&corner8); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_CornerEigenValsAndVecs(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray, *eig_val_arm, *eig_val_dsp; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; double minVal=0.0, maxVal=0.0; double scale, shift; double min=0, max=255; inputImg1 = cvLoadImage( input_file_name1, 1); // 2. Allocate output images (must have same depth, channels as input) //Output must have 6 times the width of the input image to store Eigen values and eigen vectors. outputImg_arm = cvCreateImage(cvSize(inputImg1->width*6, inputImg1->height),IPL_DEPTH_32F ,1); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width*6, inputImg1->height), IPL_DEPTH_32F ,1); g_gray = cvCreateImage(cvSize(inputImg1->width,inputImg1->height), inputImg1->depth, 1); eig_val_arm = cvCreateImage(cvSize(inputImg1->width,inputImg1->height), IPL_DEPTH_32F, 1); eig_val_dsp = cvCreateImage(cvSize(inputImg1->width,inputImg1->height), IPL_DEPTH_32F, 1); cvvConvertImage (inputImg1, g_gray, 0); gettimeofday(&startTime, NULL); //After that it finds eigenvectors and eigenvalues of //the resultant matrix and stores them into destination //image in form (¦Ë1, ¦Ë2, x1, y1, x2, y2), where //¦Ë1, ¦Ë2 - eigenvalues of M; not sorted //(x1, y1) - eigenvector corresponding to ¦Ë1 //(x2, y2) - eigenvector corresponding to ¦Ë2 for (i = 0; i < n; i++) cvCornerEigenValsAndVecs(g_gray,outputImg_arm ,5,5); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvCornerEigenValsAndVecs function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // cvSaveImage("./output_arm.png",outputImg_arm , 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvCornerEigenValsAndVecs(hC6accel,g_gray,outputImg_dsp,5,5); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvCornerEigenValsAndVecs function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvCornerMinEigenVal(g_gray,eig_val_arm ,5,5); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvCornerMinEigenVal function %i times (average time: %f ms)\n", n, t_avg / 1000.0); cvSaveImage("./output_arm.png",eig_val_arm , 0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvCornerMinEigenVal(hC6accel,g_gray,eig_val_dsp ,5,5); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvCornerMinEigenVal function %i times (average time: %f ms)\n", n, t_avg / 1000.0); cvSaveImage("./output_dsp.png",eig_val_dsp , 0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(eig_val_arm, eig_val_dsp, CV_L2, NULL)); printf("Test for CornerEigenValsAndVecs and CornerMinEigVal completed successfully\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&g_gray); cvReleaseImage(&eig_val_arm); cvReleaseImage(&eig_val_dsp); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_Smooth(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), inputImg1->depth, inputImg1->nChannels); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvSmooth(inputImg1,outputImg_arm ,CV_GAUSSIAN, 11,11,0.0,0.0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvSmooth function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvSmooth(hC6accel,inputImg1,outputImg_dsp ,CV_GAUSSIAN, 11,11,(double)0.0,(double)0.0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP cvSmooth function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_arm.png",outputImg_arm , 0); cvSaveImage("./output_dsp.png",outputImg_dsp , 0); printf("Test for Smooth/Bluring operation done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_AdaptiveThreshold(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage *inputImg1, *outputImg_arm, *outputImg_dsp, *g_gray; int pos= 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; inputImg1 = cvLoadImage( input_file_name1, CV_LOAD_IMAGE_COLOR); // 2. Allocate output images (must have same depth, channels as input) outputImg_arm = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_8U,1); outputImg_dsp = cvCreateImage(cvSize(inputImg1->width, inputImg1->height), IPL_DEPTH_8U,1); g_gray = cvCreateImage(cvSize(inputImg1->width, inputImg1->height),IPL_DEPTH_8U,1); cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY ); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) cvAdaptiveThreshold(g_gray,outputImg_arm ,(double)125.0, CV_ADAPTIVE_THRESH_MEAN_C,CV_THRESH_BINARY,7,(double)10.0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Adaptive threshold function %i times (average time: %f ms)\n", n, t_avg / 1000.0); cvCvtColor( inputImg1, g_gray, CV_BGR2GRAY ); gettimeofday(&startTime, NULL); for (i = 0; i < n; i++) C6accel_cvAdaptiveThreshold(hC6accel,g_gray,outputImg_dsp ,(double)125.0, CV_ADAPTIVE_THRESH_MEAN_C,CV_THRESH_BINARY,7,(double)10.0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Adaptive threshold function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(outputImg_arm, outputImg_dsp, CV_L2, NULL)); cvSaveImage("./output_arm.png",outputImg_arm , 0); cvSaveImage("./output_dsp.png",outputImg_dsp , 0); printf("Test for Adaptive thresholding operation done\n"); //7. Free memory allocated to images cvReleaseImage(&inputImg1); cvReleaseImage(&g_gray); cvReleaseImage(&outputImg_arm); cvReleaseImage(&outputImg_dsp); return 1; } Int C6Accel_test_Houghlines2D(C6accel_Handle hC6accel, char *input_file_name1, int n) { IplImage* src, *dst; IplImage* color_dst_arm, *color_dst_dsp; CvMemStorage* storage_dsp, *storage_arm ; CvSeq* lines_dsp = NULL, *lines_arm =NULL; int t_overhead, t_algo, i; struct timeval startTime, endTime; float t_avg; CvPoint pt1,pt2; float* line, rho, theta; double a,b, x0,y0; void *temp_ptr; src= cvLoadImage(input_file_name1, 0); dst= cvCreateImage( cvGetSize(src), 8, 1 ); color_dst_arm = cvCreateImage( cvGetSize(src), 8, 3 ); color_dst_dsp = cvCreateImage( cvGetSize(src), 8, 3 ); storage_arm = cvCreateMemStorage(0); temp_ptr = cvMemStorageAlloc(storage_arm, 64); cvCanny( src, dst, 50, 200, 3 ); cvCvtColor( dst, color_dst_arm, CV_GRAY2BGR ); cvCvtColor( dst, color_dst_dsp, CV_GRAY2BGR ); gettimeofday(&startTime, NULL); /*C6accel_cvHoughLines2( hC6accel, dst, storage, CV_HOUGH_PROBABILISTIC, (double)1.0, (double)(CV_PI/180), 80, (double)30.0, (double)10.0,&lines );*/ lines_arm = cvHoughLines2( dst, storage_arm, CV_HOUGH_PROBABILISTIC, (double)1.0, (double)(CV_PI/180), 80, (double)30.0, (double)10.0); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM HoughLines2D function %i times (average time: %f ms)\n", n, t_avg / 1000.0); for( i = 0; i < lines_arm->total; i++ ) { CvPoint* line = (CvPoint*)cvGetSeqElem(lines_arm,i); cvLine( color_dst_arm, line[0], line[1], CV_RGB(255,0,0), 1, 8,0 ); } cvSaveImage("./output_arm.png",color_dst_arm , 0); //DSP Processing storage_dsp = cvCreateMemStorage(0); temp_ptr = cvMemStorageAlloc(storage_dsp, 64); gettimeofday(&startTime, NULL); C6accel_cvHoughLines2( hC6accel, dst, storage_dsp, CV_HOUGH_PROBABILISTIC, (double)1.0, (double)(CV_PI/180), 80, (double)30.0, (double)10.0,&lines_dsp ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP HoughLines2D function %i times (average time: %f ms)\n", n, t_avg / 1000.0); for( i = 0; i < lines_dsp->total; i++ ) { CvPoint* line = (CvPoint*)cvGetSeqElem(lines_dsp,i); cvLine( color_dst_dsp, line[0], line[1], CV_RGB(255,0,0), 1, 8,0 ); } // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(color_dst_arm, color_dst_dsp, CV_L2, NULL)); cvSaveImage("./output_dsp.png",color_dst_dsp , 0); printf("Releasing allocated buffers\n"); //6 Release Memory allocatted for the test cvReleaseImage( &color_dst_arm ); cvReleaseImage( &color_dst_dsp ); cvReleaseImage( &src); cvReleaseImage( &dst); cvReleaseMemStorage( &storage_arm); cvReleaseMemStorage( &storage_dsp); return 1; } /*Int C6Accel_test_opticalflowHS(C6accel_Handle hC6accel, char *input_file_name1, int n) { int step, x,y; float *px, *py; int t_overhead, t_algo, i; struct timeval startTime, endTime; float t_avg; // Initialize, load two images from the file system, and // allocate the images and other structures we will need for // results. // exit if no input images IplImage *imgA = 0, *imgB = 0; imgA = cvLoadImage("opencv_images/OpticalFlow0.jpg",0); imgB = cvLoadImage("opencv_images/OpticalFlow1.jpg",0); if(!(imgA)||!(imgB)){ printf("One of OpticalFlow0.jpg and/or OpticalFlow1.jpg didn't load\n"); return -1;} printf("1\n"); IplImage* velx = cvCreateImage(cvGetSize(imgA),IPL_DEPTH_32F,1); IplImage* vely = cvCreateImage(cvGetSize(imgA),IPL_DEPTH_32F,1); IplImage* imgC = cvCreateImage(cvGetSize(imgA),IPL_DEPTH_8U,3); imgC = cvLoadImage("opencv_images/OpticalFlow1.jpg",1); printf("2\n"); cvSaveImage( "./OpticalFlow0.png",imgA, 0 ); cvSaveImage( "./OpticalFlow1.png",imgB, 0 ); gettimeofday(&startTime, NULL); // Call the actual Horn and Schunck algorithm // cvCalcOpticalFlowHS( imgA, imgB, 0, velx, vely, .10, cvTermCriteria( CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, imgA->width, 1e-6 ) ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called Optical flow function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // Now make some image of what we are looking at: // gettimeofday(&startTime, NULL); step = 4; for( y=0; y<imgC->height; y += step ) { px = (float*) ( velx->imageData + y * velx->widthStep ); py = (float*) ( vely->imageData + y * vely->widthStep ); for( x=0; x<imgC->width; x += step ) { if( px[x]>1 && py[x]>1 ) { cvCircle( imgC, cvPoint( x, y ), 2, CVX_GRAY50, -1,8,0 ); cvLine( imgC, cvPoint( x, y ), cvPoint( x+px[x]/2, y+py[x]/2 ), CV_RGB(255,0,0), 1,8, 0 ); } } } gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM cvSmooth function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // show tracking cvSaveImage( "./Flow Results.png",imgC ,0); // release memory cvReleaseImage( &imgA ); cvReleaseImage( &imgB ); cvReleaseImage( &imgC ); cvReleaseImage( &velx); cvReleaseImage( &vely); return 0; }*/ Int C6Accel_test_rotation(C6accel_Handle hC6accel, char *input_file_name1, int n) { int angle_switch_value = 0; int angleInt = 0; int scale_switch_value = 0; int scaleInt = 0; struct timeval startTime, endTime; int t_overhead, t_algo, i; float t_avg; // Set up variables CvPoint2D32f srcTri[3], dstTri[3]; CvMat* rot_mat = cvCreateMat(2,3,CV_32FC1); CvMat* rot_mat_dsp = cvCreateMat(2,3,CV_32FC1); CvMat* warp_mat = cvCreateMat(2,3,CV_32FC1); CvMat* warp_mat_dsp = cvCreateMat(2,3,CV_32FC1); IplImage *src, *dst_arm, *dst_dsp; const char* name = "Affine_Transform"; // Load image src=cvLoadImage(input_file_name1,1); dst_arm = cvLoadImage(input_file_name1,1); dst_dsp = cvLoadImage(input_file_name1,1); dst_arm->origin = src->origin; dst_dsp->origin = src->origin; cvZero( dst_arm ); cvZero( dst_dsp ); // Create angle and scale double angle = 45.0; double scale = 1.0; // Compute warp matrix srcTri[0].x = 0; srcTri[0].y = 0; srcTri[1].x = src->width - 1; srcTri[1].y = 0; srcTri[2].x = 0; srcTri[2].y = src->height - 1; dstTri[0].x = src->width*0.0; dstTri[0].y = src->height*0.25; dstTri[1].x = src->width*0.90; dstTri[1].y = src->height*0.15; dstTri[2].x = src->width*0.10; dstTri[2].y = src->height*0.75; gettimeofday(&startTime, NULL); cvGetAffineTransform( srcTri, dstTri, warp_mat ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM get Affine transform matrix %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); C6accel_cvGetAffineTransform( hC6accel, srcTri, dstTri, warp_mat_dsp ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP get Affine transform matrix %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(warp_mat, warp_mat_dsp, CV_L2, NULL)); gettimeofday(&startTime, NULL); cvWarpAffine( src, dst_arm, warp_mat,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Affine transform function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); C6accel_cvWarpAffine( hC6accel,src, dst_dsp, warp_mat_dsp,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Affine transform function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(dst_arm, dst_dsp, CV_L2, NULL)); cvSaveImage( "./output_arm_affine.png",dst_arm ,0); cvSaveImage( "./output_dsp_affine.png",dst_dsp ,0); cvCopy ( dst_arm, src, NULL ); cvCopy ( dst_dsp, src, NULL ); // Compute rotation matrix CvPoint2D32f center = cvPoint2D32f( src->width/2, src->height/2 ); gettimeofday(&startTime, NULL); cv2DRotationMatrix(center, angle, scale, rot_mat ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM 2D Rotation Matrix function %i times (average time: %f ms)\n", n, t_avg / 1000.0); gettimeofday(&startTime, NULL); C6accel_cv2DRotationMatrix(hC6accel, center, angle, scale, rot_mat_dsp ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP 2D Rotation Matrix function %i times (average time: %f ms)\n", n, t_avg / 1000.0); printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(rot_mat, rot_mat_dsp, CV_L2, NULL)); // Do the transformation gettimeofday(&startTime, NULL); cvWarpAffine(src, dst_arm, rot_mat,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called ARM Rotation using Affine function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // Do the transformation gettimeofday(&startTime, NULL); C6accel_cvWarpAffine( hC6accel,src, dst_dsp, rot_mat,CV_INTER_LINEAR+CV_WARP_FILL_OUTLIERS,cvScalarAll(0) ); gettimeofday(&endTime, NULL); t_algo = (endTime.tv_sec - startTime.tv_sec) * 1000000 + endTime.tv_usec - startTime.tv_usec - t_overhead; t_avg = (float)t_algo / (float)n; printf("Called DSP Rotation using Affine function %i times (average time: %f ms)\n", n, t_avg / 1000.0); // 5. Compare outputs printf("Difference (L2 norm) between ARM and DSP outputs: %f\n", cvNorm(dst_arm, dst_dsp, CV_L2, NULL)); cvSaveImage( "./output_arm_rotated.png",dst_arm ,0); cvSaveImage( "./output_dsp_rotated.png",dst_dsp ,0); cvReleaseImage( &dst_arm ); cvReleaseImage( &dst_dsp ); cvReleaseImage( &src); cvReleaseMat( &rot_mat ); cvReleaseMat( &warp_mat ); cvReleaseMat( &rot_mat_dsp ); cvReleaseMat( &warp_mat_dsp ); return 0; }