cocos2dx 高性能高斯模糊(包含lua接口)

根据官方的帖子实现的高斯模糊当前屏幕内容  点击打开链接


1.截屏缩小压缩,减小像素采样的优化算法。默认截屏后缩小到原来的1/4。

2.C++代码进行一次性高斯模糊。避免使用shader造成的渲染掉帧


以下是C++部分代码:

	/*
	* 高斯模糊接口  缩放因子:iScale,截图会把全屏压缩为1/iScale大
	*/
	static void gaussianBlur(const std::function& afterCaptured, int iScale = 4);

// The Stack Blur Algorithm was invented by Mario Klingemann, 
// [email protected] and described here:
// http://incubator.quasimondo.com/processing/fast_blur_deluxe.php

// This is C++ RGBA (32 bit color) multi-threaded version 
// by Victor Laskin ([email protected])
// More details: http://vitiy.info/stackblur-algorithm-multi-threaded-blur-for-cpp

// This code is using MVThread class from my cross-platform framework 
// You can exchange it with any thread implementation you like
// -------------------------------------- stackblur ----------------------------------------->

static unsigned short const stackblur_mul[255] =
{
	512, 512, 456, 512, 328, 456, 335, 512, 405, 328, 271, 456, 388, 335, 292, 512,
	454, 405, 364, 328, 298, 271, 496, 456, 420, 388, 360, 335, 312, 292, 273, 512,
	482, 454, 428, 405, 383, 364, 345, 328, 312, 298, 284, 271, 259, 496, 475, 456,
	437, 420, 404, 388, 374, 360, 347, 335, 323, 312, 302, 292, 282, 273, 265, 512,
	497, 482, 468, 454, 441, 428, 417, 405, 394, 383, 373, 364, 354, 345, 337, 328,
	320, 312, 305, 298, 291, 284, 278, 271, 265, 259, 507, 496, 485, 475, 465, 456,
	446, 437, 428, 420, 412, 404, 396, 388, 381, 374, 367, 360, 354, 347, 341, 335,
	329, 323, 318, 312, 307, 302, 297, 292, 287, 282, 278, 273, 269, 265, 261, 512,
	505, 497, 489, 482, 475, 468, 461, 454, 447, 441, 435, 428, 422, 417, 411, 405,
	399, 394, 389, 383, 378, 373, 368, 364, 359, 354, 350, 345, 341, 337, 332, 328,
	324, 320, 316, 312, 309, 305, 301, 298, 294, 291, 287, 284, 281, 278, 274, 271,
	268, 265, 262, 259, 257, 507, 501, 496, 491, 485, 480, 475, 470, 465, 460, 456,
	451, 446, 442, 437, 433, 428, 424, 420, 416, 412, 408, 404, 400, 396, 392, 388,
	385, 381, 377, 374, 370, 367, 363, 360, 357, 354, 350, 347, 344, 341, 338, 335,
	332, 329, 326, 323, 320, 318, 315, 312, 310, 307, 304, 302, 299, 297, 294, 292,
	289, 287, 285, 282, 280, 278, 275, 273, 271, 269, 267, 265, 263, 261, 259
};

static unsigned char const stackblur_shr[255] =
{
	9, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17,
	17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
	19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20,
	20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
	21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
	21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22,
	22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
	22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23,
	23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
	23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
	23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
	23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
	24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24
};

/// Stackblur algorithm body
void stackblurJob(unsigned char* src,				///< input image data
	unsigned int w,					///< image width
	unsigned int h,					///< image height
	unsigned int radius,				///< blur intensity (should be in 2..254 range)
	int cores,						///< total number of working threads
	int core,							///< current thread number
	int step,							///< step of processing (1,2)
	unsigned char* stack				///< stack buffer
	)
{
	unsigned int x, y, xp, yp, i;
	unsigned int sp;
	unsigned int stack_start;
	unsigned char* stack_ptr;

	unsigned char* src_ptr;
	unsigned char* dst_ptr;

	unsigned long sum_r;
	unsigned long sum_g;
	unsigned long sum_b;
	unsigned long sum_a;
	unsigned long sum_in_r;
	unsigned long sum_in_g;
	unsigned long sum_in_b;
	unsigned long sum_in_a;
	unsigned long sum_out_r;
	unsigned long sum_out_g;
	unsigned long sum_out_b;
	unsigned long sum_out_a;

	unsigned int wm = w - 1;
	unsigned int hm = h - 1;
	unsigned int w4 = w * 4;
	unsigned int div = (radius * 2) + 1;
	unsigned int mul_sum = stackblur_mul[radius];
	unsigned char shr_sum = stackblur_shr[radius];


	if (step == 1)
	{
		int minY = core * h / cores;
		int maxY = (core + 1) * h / cores;

		for (y = minY; y < maxY; y++)
		{
			sum_r = sum_g = sum_b = sum_a =
				sum_in_r = sum_in_g = sum_in_b = sum_in_a =
				sum_out_r = sum_out_g = sum_out_b = sum_out_a = 0;

			src_ptr = src + w4 * y; // start of line (0,y)

			for (i = 0; i <= radius; i++)
			{
				stack_ptr = &stack[4 * i];
				stack_ptr[0] = src_ptr[0];
				stack_ptr[1] = src_ptr[1];
				stack_ptr[2] = src_ptr[2];
				stack_ptr[3] = src_ptr[3];
				sum_r += src_ptr[0] * (i + 1);
				sum_g += src_ptr[1] * (i + 1);
				sum_b += src_ptr[2] * (i + 1);
				sum_a += src_ptr[3] * (i + 1);
				sum_out_r += src_ptr[0];
				sum_out_g += src_ptr[1];
				sum_out_b += src_ptr[2];
				sum_out_a += src_ptr[3];
			}


			for (i = 1; i <= radius; i++)
			{
				if (i <= wm) src_ptr += 4;
				stack_ptr = &stack[4 * (i + radius)];
				stack_ptr[0] = src_ptr[0];
				stack_ptr[1] = src_ptr[1];
				stack_ptr[2] = src_ptr[2];
				stack_ptr[3] = src_ptr[3];
				sum_r += src_ptr[0] * (radius + 1 - i);
				sum_g += src_ptr[1] * (radius + 1 - i);
				sum_b += src_ptr[2] * (radius + 1 - i);
				sum_a += src_ptr[3] * (radius + 1 - i);
				sum_in_r += src_ptr[0];
				sum_in_g += src_ptr[1];
				sum_in_b += src_ptr[2];
				sum_in_a += src_ptr[3];
			}


			sp = radius;
			xp = radius;
			if (xp > wm) xp = wm;
			src_ptr = src + 4 * (xp + y * w); //   img.pix_ptr(xp, y);
			dst_ptr = src + y * w4; // img.pix_ptr(0, y);
			for (x = 0; x < w; x++)
			{
				dst_ptr[0] = (sum_r * mul_sum) >> shr_sum;
				dst_ptr[1] = (sum_g * mul_sum) >> shr_sum;
				dst_ptr[2] = (sum_b * mul_sum) >> shr_sum;
				dst_ptr[3] = (sum_a * mul_sum) >> shr_sum;
				dst_ptr += 4;

				sum_r -= sum_out_r;
				sum_g -= sum_out_g;
				sum_b -= sum_out_b;
				sum_a -= sum_out_a;

				stack_start = sp + div - radius;
				if (stack_start >= div) stack_start -= div;
				stack_ptr = &stack[4 * stack_start];

				sum_out_r -= stack_ptr[0];
				sum_out_g -= stack_ptr[1];
				sum_out_b -= stack_ptr[2];
				sum_out_a -= stack_ptr[3];

				if (xp < wm)
				{
					src_ptr += 4;
					++xp;
				}

				stack_ptr[0] = src_ptr[0];
				stack_ptr[1] = src_ptr[1];
				stack_ptr[2] = src_ptr[2];
				stack_ptr[3] = src_ptr[3];

				sum_in_r += src_ptr[0];
				sum_in_g += src_ptr[1];
				sum_in_b += src_ptr[2];
				sum_in_a += src_ptr[3];
				sum_r += sum_in_r;
				sum_g += sum_in_g;
				sum_b += sum_in_b;
				sum_a += sum_in_a;

				++sp;
				if (sp >= div) sp = 0;
				stack_ptr = &stack[sp * 4];

				sum_out_r += stack_ptr[0];
				sum_out_g += stack_ptr[1];
				sum_out_b += stack_ptr[2];
				sum_out_a += stack_ptr[3];
				sum_in_r -= stack_ptr[0];
				sum_in_g -= stack_ptr[1];
				sum_in_b -= stack_ptr[2];
				sum_in_a -= stack_ptr[3];


			}

		}
	}

	// step 2
	if (step == 2)
	{
		int minX = core * w / cores;
		int maxX = (core + 1) * w / cores;

		for (x = minX; x < maxX; x++)
		{
			sum_r = sum_g = sum_b = sum_a =
				sum_in_r = sum_in_g = sum_in_b = sum_in_a =
				sum_out_r = sum_out_g = sum_out_b = sum_out_a = 0;

			src_ptr = src + 4 * x; // x,0
			for (i = 0; i <= radius; i++)
			{
				stack_ptr = &stack[i * 4];
				stack_ptr[0] = src_ptr[0];
				stack_ptr[1] = src_ptr[1];
				stack_ptr[2] = src_ptr[2];
				stack_ptr[3] = src_ptr[3];
				sum_r += src_ptr[0] * (i + 1);
				sum_g += src_ptr[1] * (i + 1);
				sum_b += src_ptr[2] * (i + 1);
				sum_a += src_ptr[3] * (i + 1);
				sum_out_r += src_ptr[0];
				sum_out_g += src_ptr[1];
				sum_out_b += src_ptr[2];
				sum_out_a += src_ptr[3];
			}
			for (i = 1; i <= radius; i++)
			{
				if (i <= hm) src_ptr += w4; // +stride

				stack_ptr = &stack[4 * (i + radius)];
				stack_ptr[0] = src_ptr[0];
				stack_ptr[1] = src_ptr[1];
				stack_ptr[2] = src_ptr[2];
				stack_ptr[3] = src_ptr[3];
				sum_r += src_ptr[0] * (radius + 1 - i);
				sum_g += src_ptr[1] * (radius + 1 - i);
				sum_b += src_ptr[2] * (radius + 1 - i);
				sum_a += src_ptr[3] * (radius + 1 - i);
				sum_in_r += src_ptr[0];
				sum_in_g += src_ptr[1];
				sum_in_b += src_ptr[2];
				sum_in_a += src_ptr[3];
			}

			sp = radius;
			yp = radius;
			if (yp > hm) yp = hm;
			src_ptr = src + 4 * (x + yp * w); // img.pix_ptr(x, yp);
			dst_ptr = src + 4 * x; 			  // img.pix_ptr(x, 0);
			for (y = 0; y < h; y++)
			{
				dst_ptr[0] = (sum_r * mul_sum) >> shr_sum;
				dst_ptr[1] = (sum_g * mul_sum) >> shr_sum;
				dst_ptr[2] = (sum_b * mul_sum) >> shr_sum;
				dst_ptr[3] = (sum_a * mul_sum) >> shr_sum;
				dst_ptr += w4;

				sum_r -= sum_out_r;
				sum_g -= sum_out_g;
				sum_b -= sum_out_b;
				sum_a -= sum_out_a;

				stack_start = sp + div - radius;
				if (stack_start >= div) stack_start -= div;
				stack_ptr = &stack[4 * stack_start];

				sum_out_r -= stack_ptr[0];
				sum_out_g -= stack_ptr[1];
				sum_out_b -= stack_ptr[2];
				sum_out_a -= stack_ptr[3];

				if (yp < hm)
				{
					src_ptr += w4; // stride
					++yp;
				}

				stack_ptr[0] = src_ptr[0];
				stack_ptr[1] = src_ptr[1];
				stack_ptr[2] = src_ptr[2];
				stack_ptr[3] = src_ptr[3];

				sum_in_r += src_ptr[0];
				sum_in_g += src_ptr[1];
				sum_in_b += src_ptr[2];
				sum_in_a += src_ptr[3];
				sum_r += sum_in_r;
				sum_g += sum_in_g;
				sum_b += sum_in_b;
				sum_a += sum_in_a;

				++sp;
				if (sp >= div) sp = 0;
				stack_ptr = &stack[sp * 4];

				sum_out_r += stack_ptr[0];
				sum_out_g += stack_ptr[1];
				sum_out_b += stack_ptr[2];
				sum_out_a += stack_ptr[3];
				sum_in_r -= stack_ptr[0];
				sum_in_g -= stack_ptr[1];
				sum_in_b -= stack_ptr[2];
				sum_in_a -= stack_ptr[3];
			}
		}
	}

}


class MVImageUtilsStackBlurTask
{
public:
	unsigned char* src;
	unsigned int w;
	unsigned int h;
	unsigned int radius;
	int cores;
	int core;
	int step;
	unsigned char* stack;

	inline MVImageUtilsStackBlurTask(unsigned char* src, unsigned int w, unsigned int h, unsigned int radius, int cores, int core, int step, unsigned char* stack)
	{
		this->src = src;
		this->w = w;
		this->h = h;
		this->radius = radius;
		this->cores = cores;
		this->core = core;
		this->step = step;
		this->stack = stack;
	}

	inline void run()
	{
		stackblurJob(src, w, h, radius, cores, core, step, stack);
	}

};


/// Stackblur algorithm by Mario Klingemann
/// Details here:
/// http://www.quasimondo.com/StackBlurForCanvas/StackBlurDemo.html
/// C++ implemenation base from:
/// https://gist.github.com/benjamin9999/3809142
/// http://www.antigrain.com/__code/include/agg_blur.h.html
/// This version works only with RGBA color
void 			   stackblur(unsigned char* src,				///< input image data
	unsigned int w,					///< image width
	unsigned int h,					///< image height
	unsigned int radius,				///< blur intensity (should be in 2..254 range)
	int cores = 1						///< number of threads (1 - normal single thread)
	)
{
	if (radius > 254) return;
	if (radius < 2) return;

	unsigned int div = (radius * 2) + 1;
	unsigned char* stack = new unsigned char[div * 4 * cores];

	if (cores == 1)
	{
		// no multithreading
		stackblurJob(src, w, h, radius, 1, 0, 1, stack);
		stackblurJob(src, w, h, radius, 1, 0, 2, stack);
	}


	delete[] stack;
}


/**
* Capture screen implementation, don't use it directly.
*/
void onCaptureScreen(const std::function& afterCaptured, int iScale)
{
	static bool startedCapture = false;

	if (startedCapture)
	{
		CCLOG("Screen capture is already working");
		if (afterCaptured)
		{
			afterCaptured(false, nullptr);
		}
		return;
	}
	else
	{
		startedCapture = true;
	}


	auto glView = Director::getInstance()->getOpenGLView();
	auto frameSize = glView->getFrameSize();
#if (CC_TARGET_PLATFORM == CC_PLATFORM_MAC) || (CC_TARGET_PLATFORM == CC_PLATFORM_WIN32) || (CC_TARGET_PLATFORM == CC_PLATFORM_LINUX)
	frameSize = frameSize * glView->getFrameZoomFactor() * glView->getRetinaFactor();
#endif

	int width = static_cast(frameSize.width);
	int height = static_cast(frameSize.height);

	do
	{
		std::shared_ptr buffer(new GLubyte[width * height * 4], [](GLubyte* p){ CC_SAFE_DELETE_ARRAY(p); });
		if (!buffer)
		{
			break;
		}

		glPixelStorei(GL_PACK_ALIGNMENT, 1);
		glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buffer.get());

		std::shared_ptr flippedBuffer(new GLubyte[width * height * 4], [](GLubyte* p) { CC_SAFE_DELETE_ARRAY(p); });
		if (!flippedBuffer)
		{
			break;
		}

		for (int row = 0; row < height; ++row)
		{
			memcpy(flippedBuffer.get() + (height - row - 1) * width * 4, buffer.get() + row * width * 4, width * 4);
		}

		/*-------------压缩start------------*/
		unsigned long dst_width = width / iScale;
		unsigned long dst_height = height / iScale;

		std::shared_ptr zipFlippedBuffer(new GLubyte[dst_width * dst_height * 4], [](GLubyte* p) { CC_SAFE_DELETE_ARRAY(p); });
		if (!zipFlippedBuffer)
		{
			break;
		}
		unsigned long xrIntFloat_16 = (width << 16) / dst_width + 1;
		unsigned long yrIntFloat_16 = (height << 16) / dst_height + 1;
		unsigned long srcy_16 = 0;

		unsigned long byte_width = 4;//单个数据的物理宽度 4字节
		unsigned long byte_shift = 2;//单个数据的物理移位

		auto beginPos = zipFlippedBuffer.get();

		for (unsigned long y = 0; y < dst_height; ++y)
		{
			//auto pSrcLine = flippedBuffer.get() + width * byte_width * (srcy_16 >> 16);
			auto pSrcLine = flippedBuffer.get() + (width<<2)*(srcy_16>>16);

			unsigned long srcx_16 = 0;
			for (unsigned long x = 0; x < dst_width; ++x)
			{
				//memcpy(beginPos + x * byte_width, pSrcLine + (srcx_16 >> 16)*byte_width, byte_width);
				memcpy(beginPos + (x<<2), pSrcLine + ((srcx_16 >> 16)<<2), byte_width);
				srcx_16 += xrIntFloat_16;
			}
			srcy_16 += yrIntFloat_16;
			beginPos += (dst_width << byte_shift);
		}
		/*-------------压缩end------------*/

		//使用算法一次性对图片进行高斯模糊
		stackblur(zipFlippedBuffer.get(), dst_width, dst_height, 5);

		Image* image = new (std::nothrow) Image;
		if (image)
		{
			image->initWithRawData(zipFlippedBuffer.get(), dst_width * dst_height * 4 , dst_width, dst_height, 8);
			image->autorelease();
			if (afterCaptured)
			{
				afterCaptured(true, image);
			}
		}
		else
		{
			CCLOG("Malloc Image memory failed!");
			if (afterCaptured)
			{
				afterCaptured(false, nullptr);
			}
			delete image;
			image = nullptr;
		}
		startedCapture = false;
	} while (0);
}


/*
* 高斯模糊接口  缩放因子:iScale,截图会把全屏压缩为1/iScale大
*/
static EventListenerCustom* s_captureScreenListener;
static CustomCommand s_captureScreenCommand;
void Util::gaussianBlur(const std::function& afterCaptured, int iScale /*= 4*/)
{
	if (s_captureScreenListener)
	{
		CCLOG("Warning: CaptureScreen has been called already, don't call more than once in one frame.");
		return;
	}
	s_captureScreenCommand.init(std::numeric_limits::max());
	s_captureScreenCommand.func = std::bind(onCaptureScreen, afterCaptured, iScale);
	s_captureScreenListener = Director::getInstance()->getEventDispatcher()->addCustomEventListener(Director::EVENT_AFTER_DRAW, [](EventCustom *event) {
		auto director = Director::getInstance();
		director->getEventDispatcher()->removeEventListener((EventListener*)(s_captureScreenListener));
		s_captureScreenListener = nullptr;
		director->getRenderer()->addCommand(&s_captureScreenCommand);
		director->getRenderer()->render();
	});
}


以下是导出的lua接口:

#include "base/ccConfig.h"
#ifndef __game_custom_h__
#define __game_custom_h__

#ifdef __cplusplus
extern "C" {
#endif
#include "tolua++.h"
#ifdef __cplusplus
}
#endif

int register_all_game_custom(lua_State* tolua_S);

#endif // __game_custom_h__

static int tolua_pf_common_gaussianBlur(lua_State* tolua_S)
{
	LUA_FUNCTION callbackHander = toluafix_ref_function(tolua_S, 2, 0);
	if (callbackHander == 0)
	{
		CCLOG("tolua_pf_common_gaussianBlur : toluafix_ref_function , error");
		return 0;
	}

	auto capture_callback = [=](bool succeed, Image* img){
		auto luastack = LuaEngine::getInstance()->getLuaStack();

		luastack->pushBoolean(succeed);
		if (succeed){
			luastack->pushObject(img, "cc.Image");
		}
		else{
			luastack->pushNil();
		}
		luastack->executeFunctionByHandler(callbackHander, 2);
	};


	int argc = lua_gettop(tolua_S) - 1;
	if (argc == 2)
	{
		int q = 4;
		if (!luaval_to_int32(tolua_S, 3, &q))
		{
			CCLOG("tolua_pf_common_gaussianBlur : luaval_to_number , error");
			return 0;
		}
		Util::gaussianBlur(capture_callback, q);
	}
	else
	{
		Util::gaussianBlur(capture_callback);
	}
	return 0;
}

TOLUA_API int register_all_game_custom(lua_State* tolua_S)
{
	tolua_open(tolua_S);

	tolua_module(tolua_S, "pf", 0);
	tolua_beginmodule(tolua_S, "pf");

		tolua_module(tolua_S, "Common", 0);
		tolua_beginmodule(tolua_S, "Common");
		{
			tolua_function(tolua_S, "GaussianBlur", tolua_pf_common_gaussianBlur);
		}
		tolua_endmodule(tolua_S);

	tolua_endmodule(tolua_S);
	return 1;
}


使用方法:

            local function onFinishCapture(ret,img)
                if ret then
                    local texture = cc.Director:getInstance():getTextureCache():addImage(img, "capriteadu")
                    local spriteBlur = cc.Sprite:createWithTexture(texture)
                    local wSize = cc.Director:getInstance():getWinSize()
                    spriteBlur:setPosition(cc.p(wSize.width/2, wSize.height/2))
                    self:addChild(spriteBlur)
                    PF.UIEx.nodeToScaleForFixedSize(spriteBlur, wSize)
                end
            end
            pf.Common:GaussianBlur(onFinishCapture, 4)



你可能感兴趣的:(C++,cocos2d-x)