OSG OIT 顺序无关透明绘制(PPLL_OIT, WB_OIT) 实现及注意事项




WB OIT


WB OIT

WB OIT

OSG OIT 顺序无关透明绘制(PPLL_OIT, WB_OIT) 实现及注意事项_第4张图片





下面简单说说算法公式,其实不知原理,套公式一样能实现,只是不明白原理的话,出了问题浪费点精气神,好在公式简单明了,如果想了解更详细些,搜一下大神的论文Weighted Blended Order-Independent Transparency,就是G3D 革新引擎的作者 Morgan McGuire,这个家伙是个牛人,Nvidia 的人。

  •  Weighted Blended OIT


第一版 Meshkin 2007 年的首先提出的 Sort-independent alpha blending 论文,公式如下

简单明了,   , C0 是背景颜色, 从该公式可以看出他只是简单的将源颜色求和 ,再加上 目标颜色 * (1-源颜色a值求和) 做为混合后的最终颜色,该公式也不是人家瞎整,虽然不具备通用性,但对于颜色相近和a值较小的情况下效果最好,公式推断可以看看论文,如何把顺序相关的因子排除掉,为后续 平均加权 OIT 方法奠定了坚实的基础。

第二版 Bavoil and Myers 的加权平均法 ,也算是对上一方法的改进版,公式如下

该公式基于加权求和的方式对 Meshkin 的方法做了改进,正确性提高了很多,而且更具有通用性,但当a 为 0 的时候,本来应该不贡献颜色的片元也参与了加权求和,使颜色变淡,总是透明的。该公式也有个缺点,后续版本也一样,如果不透明的实体使用该公式绘制,C0项目 为0, 但公式前半部分 变为 Ci 求和项 除于 ai 求和项,  平均了颜色,所以明明不透明的实体也变得透明了。

第三版 由 Morgan McGuire 2013 年提出,对上述公式加以改进,解决了a 为0 ,红字部分没解决。公式如下:

对a 求和改成 乘法了,全透明实体的绘制问题解决了, 该公式考虑到 片元深度和 a 的影响 ,加入了w()权重函数。一般我们认为离的近的半透明物体罩着后面的物体,看上去颜色也最贴最前面的物体的颜色,论文里几个效果比较好的权重公式:

OSG OIT 顺序无关透明绘制(PPLL_OIT, WB_OIT) 实现及注意事项_第5张图片


下面我给出我的实现代码, demo 程序就不给了,代码粘贴拷贝很容易 使用OSG后处理来实现。

void WB_OITRenderPass::initialize()
	_pass = new osg::Group();

	_accumTexture = createTexture2D(getFrameBufferWidth(), getFrameBufferHeight(), GL_RGBA16F_ARB, GL_RGBA, GL_FLOAT);
	_accumTexture->setWrap(osg::Texture2D::WRAP_S, osg::Texture2D::CLAMP_TO_EDGE);
	_accumTexture->setWrap(osg::Texture2D::WRAP_T, osg::Texture2D::CLAMP_TO_EDGE);
	_accumTexture->setFilter(osg::Texture::MIN_FILTER, osg::Texture::NEAREST);
	_accumTexture->setFilter(osg::Texture::MAG_FILTER, osg::Texture::NEAREST);

	_accumAlphaTexture = createTexture2D(getFrameBufferWidth(), getFrameBufferHeight(), GL_R16F, GL_RED, GL_FLOAT);
	_accumAlphaTexture->setWrap(osg::Texture2D::WRAP_S, osg::Texture2D::CLAMP_TO_EDGE);
	_accumAlphaTexture->setWrap(osg::Texture2D::WRAP_T, osg::Texture2D::CLAMP_TO_EDGE);
	_accumAlphaTexture->setFilter(osg::Texture::MIN_FILTER, osg::Texture::NEAREST);
	_accumAlphaTexture->setFilter(osg::Texture::MAG_FILTER, osg::Texture::NEAREST);

	// Accum pass.
	_accumPass = createRTTCamera(getFrameBufferWidth(), getFrameBufferHeight(), false, GL_COLOR_BUFFER_BIT);
	_accumPass->attach(osg::Camera::COLOR_BUFFER0, _accumTexture);
	_accumPass->attach(osg::Camera::COLOR_BUFFER1, _accumAlphaTexture);
	_accumPass->attach(osg::Camera::DEPTH_BUFFER, getContext()->_depthBuffer);
	_accumPass->setCullCallback(new PassCallback(getContext()->getPipeline()));
	_accumPass->setClearColor(osg::Vec4(0.0, 0.0, 0.0, 1.0));

	osg::StateSet* ss = setShaderProgram(_accumPass, "trans_accum", osg::StateAttribute::ON | osg::StateAttribute::OVERRIDE);
	ss->setMode(GL_CULL_FACE,  osg::StateAttribute::OFF|osg::StateAttribute::OVERRIDE);

	osg::Depth* depth = new osg::Depth;
	ss->setAttributeAndModes(depth, osg::StateAttribute::ON | osg::StateAttribute::OVERRIDE);
	osg::BlendFunc* bf = new osg::BlendFunc(osg::BlendFunc::ONE, osg::BlendFunc::ONE, osg::BlendFunc::ZERO, osg::BlendFunc::ONE_MINUS_SRC_ALPHA);
	ss->setAttributeAndModes(bf, osg::StateAttribute::ON | osg::StateAttribute::OVERRIDE);

	// Draw pass
	_drawPass = createRTTCamera(getFrameBufferWidth(), getFrameBufferHeight(), true, GL_DEPTH_BUFFER_BIT|GL_COLOR_BUFFER_BIT);
	_drawPass->attach(osg::Camera::COLOR_BUFFER, getContext()->_outputTextureWithLum);
	_drawPass->attach(osg::Camera::DEPTH_BUFFER, getContext()->_tempDepthBuffer);

	ss = setShaderProgram(_drawPass, "trans_draw");

	ss->setTextureAttributeAndModes(0, _accumTexture);
	ss->addUniform(new osg::Uniform("Accumulate", 0));

	ss->setTextureAttributeAndModes(1, _accumAlphaTexture);
	ss->addUniform(new osg::Uniform("AccumulateAlpha", 1));

	ss->setTextureAttributeAndModes(2, getContext()->_outputTexture);
	ss->addUniform(new osg::Uniform("Opacity", 2));


shader 部分代码:

#version 420 core
in vec4 osg_Vertex;
in vec3 osg_Normal;
in vec4 osg_MultiTexCoord0;

uniform mat4 osg_ViewMatrix;
uniform mat4 osg_ViewMatrixInverse;
uniform mat4 osg_ModelViewMatrix;
uniform mat4 osg_ModelViewProjectionMatrix;

out vec2 TexCoords;
out vec3 WorldPos;
out vec3 WorldNormal;

void main()
    TexCoords   = osg_MultiTexCoord0.xy;  

    mat4 worldMatrix = osg_ViewMatrixInverse * osg_ModelViewMatrix;
    WorldPos = (worldMatrix * osg_Vertex).xyz;
    mat3 normalMatrix = mat3(worldMatrix);
    WorldNormal = normalize(normalMatrix * osg_Normal); 
    gl_Position = osg_ModelViewProjectionMatrix * osg_Vertex;

#version 420 core
#extension GL_ARB_shader_image_load_store : enable
layout (early_fragment_tests) in;
#include "chunk_math.glsl"
#include "forward_pbr_shading_parameters.frag"
#include "chunk_shadowmap.frag"
#include "chunk_light.frag"
#include "tone_mapping.frag"

 float weight(float z, float a) 
	return clamp(pow(min(1.0, a * 10.0) + 0.01, 3.0) * 1e8 * pow(1.0 - z * 0.9, 3.0), 1e-2, 3e3);
vec4 shading()

	vec4  _albedo    = getAlbedo();
	float _roughness = getRoughness();
	float _metallic  = getMetallic();
	float _ao        = getAo();
	vec3 camPos      = getCameraPosition();
	vec3 worldNormal = normalize(getWorldNormal());
	vec4 worldPos    = getWorldPosition();
	float _ssao      = 1.0;
	float _shadow    = 0.0;
		if(transparency < 0.0001)
			_ssao   = getSSAO();
			_shadow = getShadow(vec4(WorldPos,1.0), WorldNormal);
	vec3 F0 = 0.16 * reflectance * reflectance * (1.0 - metallic) + _albedo.rgb * metallic;
	vec3 diffuse = _albedo.rgb * (1.0 - metallic);
	vec3 Lo = vec3(0.0);
    vec3 ambient = vec3(0.0);
	for (int i = 0; i < NUMBER_LIGHTS; ++i)
		Light light = Lights[i];
		Lo += CalcPointOrDirectionalLight( light, camPos, worldPos.xyz, worldNormal, F0, diffuse, metallic, roughness, ambient );

	//Ambient lighting 
		vec3 kS = fresnelSchlick(max(dot(N, V), 0.0), F0);
		vec3 kD = 1.0 - kS;
		kD *= 1.0 - _metallic;	  
		vec3 irradiance = texture(irradianceMap, N).rgb;
		vec3 diffuse      = irradiance * _albedo;
		ambient = (kD * diffuse) * _ao;
		ambient = ambient * _albedo.rgb * _ao;
	Lo = Lo*( 1.0 - _shadow);
	vec3 color = ambient* _ssao + Lo;
          //vec3 I = normalize(WorldPos - camPos);
          //vec3 R = reflect(I, normalize(WorldNormal));
          //color = texture(irradianceMap, R).rgb;
		  //color = texture(irradianceMap,TexCoords2).rgb;
	color = tonemap(color);
	vec4 outputColor = vec4(color, _albedo.a);
	return outputColor;

layout (location = 0) out vec4 out_accumColor;
layout (location = 1) out float out_accumAlpha;

void main()
     vec4 color = shading();
     color.rgb *= color.a;
     float w = weight(gl_FragCoord.z, color.a);
     out_accumColor = vec4(color.rgb * w, color.a);
     out_accumAlpha = color.a * w;
#version 420 core

in vec4 osg_MultiTexCoord0;
in vec4 osg_Vertex;

uniform mat4 osg_ModelViewProjectionMatrix;

out vec2 TexCoord;

void main()
	TexCoord    = osg_MultiTexCoord0.xy;
	gl_Position = osg_ModelViewProjectionMatrix * osg_Vertex;
#version 420 core

#include "chunk_math.glsl"

in vec2 TexCoord;

uniform sampler2D Accumulate;
uniform sampler2D AccumulateAlpha;
uniform sampler2D Opacity;

layout (location = 0) out vec4 fragColor;

void main()
	 ivec2 fragCoord = ivec2(gl_FragCoord.xy);
     vec4 accum = texelFetch(Accumulate, fragCoord, 0);
	 float r = accum.a;
     accum.a = texelFetch(AccumulateAlpha, fragCoord, 0).r;
     vec4 color = vec4(accum.rgb / clamp(accum.a, 0.0001, 50000.0), r);	
	 color.rgb = pow(color.rgb, vec3(1.0/2.2));
	 vec4 opaqueColor = texelFetch(Opacity, fragCoord, 0).rgba;
	 vec3 outputColor = mix(color.rgb, opaqueColor.rgb, color.a);

     //luminance 为实现FXAA反走样计算亮度值,如果只是测试,a为 1 就可以了。
	 fragColor = vec4(outputColor, luminance(outputColor));


结论: Weighted Blended OIT ,效果还过得去,比非OIT 绘制正确性合理的多,但毕竟是去掉了公式中顺序相关项推出来的透明融合,存在一些缺陷:

1) 不透明实体被绘制成透明实体,见上文红字部分,如果纹理带a通道,透明通道内不能实现镂空效果。

2) 绘制的不是十分正确,只是整体上过的去;


  • GPU 端链表法 实现OIT  

直接上源码,至于实现原理过程,OpenGL 编程指南介绍的很清楚了,最后说下该方法存在的问题:


程序初始化部分,这部分代码最有价值的是 OSG 原子计数器, TBO 存储的实现。很庆幸OSG 虽然没落了,但API更新还算及时,支持 计算,几何着色器, 原子操作, TBO 等等。

void PPLL_OITPass::initialize()
	_OITRoot = new osg::Group();

	_FinalOIT = createTexture2D(getViewportWidth(), getViewportHeight(), GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE);
	_FinalOIT->setWrap(osg::Texture2D::WRAP_S, osg::Texture2D::CLAMP_TO_EDGE);
	_FinalOIT->setWrap(osg::Texture2D::WRAP_T, osg::Texture2D::CLAMP_TO_EDGE);
	_FinalOIT->setFilter(osg::Texture::MIN_FILTER, osg::Texture::LINEAR_MIPMAP_LINEAR);
	_FinalOIT->setFilter(osg::Texture::MAG_FILTER, osg::Texture::LINEAR);

	//Head pointer texture.
	_head_pointer_texture = createTexture2D(getViewportWidth(), getViewportHeight(), GL_R32UI, GL_RED_INTEGER_EXT, GL_UNSIGNED_INT);
	_head_pointer_texture->setWrap(osg::Texture2D::WRAP_S, osg::Texture2D::CLAMP_TO_EDGE);
	_head_pointer_texture->setWrap(osg::Texture2D::WRAP_T, osg::Texture2D::CLAMP_TO_EDGE);
	_head_pointer_texture->setFilter(osg::Texture::MIN_FILTER, osg::Texture::NEAREST);
	_head_pointer_texture->setFilter(osg::Texture::MAG_FILTER, osg::Texture::NEAREST);
	_head_pointer_image = new osg::BindImageTexture(0,
		osg::BindImageTexture::READ_WRITE, GL_R32UI,

    // 原子计数器创建部分
	osg::ref_ptr atomicCounterArray = new osg::UIntArray;
	osg::ref_ptr acbo = new osg::AtomicCounterBufferObject;
	osg::ref_ptr acbb = new osg::AtomicCounterBufferBinding(0, atomicCounterArray.get(), 0, sizeof(GLuint));
	acbb->setUpdateCallback(new ResetAtomicCounter);

#define OIT_LAYERS  3
	int linked_list_buffer_item_size = 2048 * 2048 * OIT_LAYERS;
	osg::ref_ptr linked_list_buffer = new osg::UIntArray;
	osg::ref_ptr pdbo = new osg::PixelDataBufferObject();
	pdbo->setDataSize(linked_list_buffer_item_size * sizeof(GLuint) * 4);

	osg::ref_ptr tbo = new osg::TextureBuffer;
	osg::BindImageTexture* linked_list_image = new osg::BindImageTexture(1,
		osg::BindImageTexture::WRITE_ONLY, GL_RGBA32UI_EXT,

    //每绘制一帧前,用前序渲染初始化链表纹理。红宝书里直接通过绑定PBO 操作初始化,我这里直接做一次离屏渲染。 
	_OITClearHeadPointerPass = createRTTCamera(getViewportWidth(), getViewportHeight(), true, GL_DEPTH_BUFFER_BIT);
	_OITClearHeadPointerPass->attach(osg::Camera::DEPTH_BUFFER, getContext()->_tempDepthBuffer);
	osg::StateSet* ss = setShaderProgram(_OITClearHeadPointerPass, "clear_head_pointer");

	_OITPass = createRTTCamera(getViewportWidth(), getViewportHeight(), false, 0);
	_OITPass->attach(osg::Camera::DEPTH_BUFFER, getContext()->_depthBuffer);
	_OITPass->setCullCallback(new PassCallback(getContext()->getPipeline()));

	ss = setShaderProgram(_OITPass, "build_lists", osg::StateAttribute::ON | osg::StateAttribute::OVERRIDE);
	ss->setMode(GL_BLEND, osg::StateAttribute::ON | osg::StateAttribute::OVERRIDE);
	ss->setMode(GL_CULL_FACE, osg::StateAttribute::OFF | osg::StateAttribute::OVERRIDE);
	ss->setMode(GL_DEPTH_TEST, osg::StateAttribute::ON | osg::StateAttribute::OVERRIDE);
	ss->addUniform(new osg::Uniform("itemCount", linked_list_buffer_item_size));

	_OITDrawPass = createRTTCamera(getViewportWidth(), getViewportHeight(), true);
	_OITDrawPass->attach(osg::Camera::DEPTH_BUFFER, getContext()->_tempDepthBuffer);
	_OITDrawPass->attach(osg::Camera::COLOR_BUFFER0, _FinalOIT);
	ss = setShaderProgram(_OITDrawPass, "resolve_lists");
	ss->setTextureAttributeAndModes(0, getContext()->_outputTexture);
	ss->addUniform(new osg::Uniform("Final", 0));



void PPLL_OITPass::uninitialize()


#version 420 core
in vec4 osg_MultiTexCoord0;
in vec3 osg_Normal;
in vec4 osg_Vertex;
out vec2 TexCoords;
out vec3 WorldPos;
out vec3 WorldNormal;

uniform mat4 osg_ViewMatrixInverse;
uniform mat4 osg_ModelViewMatrix;
uniform mat4 osg_ViewMatrix;
uniform mat4 osg_ModelViewProjectionMatrix;

void main()
    TexCoords = osg_MultiTexCoord0.xy;  

    mat4 worldMatrix = osg_ViewMatrixInverse * osg_ModelViewMatrix;
    WorldPos = (worldMatrix * osg_Vertex).xyz;
    mat3 normalMatrix = mat3(worldMatrix);
    WorldNormal = normalize(normalMatrix * osg_Normal); 
    gl_Position = osg_ModelViewProjectionMatrix * osg_Vertex;
#version 420 core

layout (early_fragment_tests) in;
layout (binding = 0, r32ui) uniform uimage2D head_pointer_image;
layout (binding = 1, rgba32ui) uniform writeonly uimageBuffer list_buffer;
layout (binding = 0, offset = 0) uniform atomic_uint list_counter;

#include "chunk_math.glsl"
#include "forward_pbr_shading_parameters.frag"
#include "chunk_light.frag"
#include "tone_mapping.frag"

vec4 shading()
	vec4  _albedo    = getAlbedo();
	float _roughness = getRoughness();
	float _metallic  = getMetallic();
	float _ao        = getAo();
	vec3 camPos      = getCameraPosition();
	vec4 worldPos    = getWorldPosition();
	vec3 F0 = 0.16 * reflectance * reflectance * (1.0 - metallic) + _albedo.rgb * metallic;
	vec3 diffuse = _albedo.rgb * (1.0 - metallic);
	vec3 worldNormal;
	worldNormal = WorldNormal;

	vec3 Lo = vec3(0.0);
    vec3 ambient = vec3(0.0);
	for (int i = 0; i < NUMBER_LIGHTS; ++i)
		Light light = Lights[i];
		Lo += CalcPointOrDirectionalLight( light, camPos, worldPos.xyz, worldNormal, F0, diffuse, metallic, roughness, ambient );

	//Ambient lighting 
		vec3 kS = fresnelSchlick(max(dot(N, V), 0.0), F0);
		vec3 kD = 1.0 - kS;
		kD *= 1.0 - _metallic;	  
		vec3 irradiance = texture(irradianceMap, N).rgb;
		vec3 diffuse    = irradiance * _albedo;
		ambient = (kD * diffuse) * _ao;
		ambient = ambient * _albedo.rgb * _ao;
	vec3 color = ambient + Lo;
	color = tonemap(color);
	//Gamma correction
	color = pow(color, vec3(1.0/2.2)); 

          //vec3 I = normalize(WorldPos - camPos);
          //vec3 R = reflect(I, normalize(WorldNormal));
          //color = texture(irradianceMap, R).rgb;
		  //color = texture(irradianceMap,TexCoords2).rgb;
	return vec4(color, _albedo.a);

uniform int itemCount;

void main(void)
    uint index;
    uint old_head;
    uvec4 item;
    index = atomicCounterIncrement(list_counter) + 2;
	if(index > itemCount-1)
	  index = 1;
	  old_head = 0;
	  imageAtomicExchange(head_pointer_image, ivec2(gl_FragCoord.xy), uint(index));
	  item.x = old_head;
	  item.y = packUnorm4x8(vec4(1.0,0.0,0.0,1.0));
	  item.z = floatBitsToUint(gl_FragCoord.z);
	  imageStore(list_buffer, int(0), item);
	    old_head = imageAtomicExchange(head_pointer_image, ivec2(gl_FragCoord.xy), uint(index));
		vec4 surface_color = shading();
		item.x = old_head;
		item.y = packUnorm4x8(surface_color);
		item.z = floatBitsToUint(gl_FragCoord.z);
		imageStore(list_buffer, int(index-1), item);

#version 420 core

in vec4 osg_Vertex;
in vec4 osg_MultiTexCoord0;
uniform mat4 osg_ModelViewProjectionMatrix;

out vec2 TexCoord;

void main(void)
    TexCoord = osg_MultiTexCoord0.xy;
    gl_Position = osg_ModelViewProjectionMatrix * osg_Vertex;

#version 420 core

#pragma import_defines ( USE_FXAA )

#include "chunk_math.glsl"

// The per-pixel image containing the head pointers
layout (binding = 0, r32ui) uniform uimage2D head_pointer_image;
// Buffer containing linked lists of fragments
layout (binding = 1, rgba32ui) uniform uimageBuffer list_buffer;

// This is the output color
layout (location = 0) out vec4 finalColor;

// This is the maximum number of overlapping fragments allowed
#define MAX_FRAGMENTS 40

// Temporary array used for sorting fragments
uvec4 fragment_list[MAX_FRAGMENTS];

in vec2 TexCoord;
uniform sampler2D Final;

void main(void)
    int current_index;
    uint fragment_count = 0;

    current_index = int(imageLoad(head_pointer_image, ivec2(gl_FragCoord).xy).x) - 1;
    while (current_index >= 0 && fragment_count < MAX_FRAGMENTS)
        uvec4 fragment = imageLoad(list_buffer, int(current_index));
        fragment_list[fragment_count] = fragment;
        current_index = int(fragment.x) -1;
    uint i, j;
    if (fragment_count > 1)

        for (i = 0; i < fragment_count - 1; i++)
            for (j = i + 1; j < fragment_count; j++)
                uvec4 fragment1 = fragment_list[i];
                uvec4 fragment2 = fragment_list[j];

                float depth1 = uintBitsToFloat(fragment1.z);
                float depth2 = uintBitsToFloat(fragment2.z);

                if (depth1 < depth2)
                    fragment_list[i] = fragment2;
                    fragment_list[j] = fragment1;

    vec3 backgroundColor = texture(Final, TexCoord.xy).rgb;
    for (i = 0; i < fragment_count; i++)
        vec4 modulator  = unpackUnorm4x8(fragment_list[i].y);
        backgroundColor = mix(backgroundColor.rgb, modulator.rgb, modulator.a);

	finalColor = vec4(backgroundColor, luminance(backgroundColor));

链表法OIT 无疑绘制半透明实体是最正确的,但它有不可克服的缺陷:

1)资源占用未可预知, 要预先分配,对复杂透明实体,层次比较深,很容易把链表预分配的内存资源吃尽,导致绘制不正确。链表一个项占用 64字节, 比如满屏2K屏,一个透明层占用 2048 *2048*64 bytes = 256M ;


如果对正确性要求不是很高,基于权重混合的OIT 方法足够了,而且效率也比较高。

Demo 模型浏览器下载链接: https://pan.baidu.com/s/1H4lS-iKoTqroq6V-xiwpkw 提取码: f3ig 
