减少drawcall是游戏开发过程中的重要优化手段,在移动设备上更低的drawcall意味着更低的发热和更高的系统流畅度。
cocos显示对象把渲染数据放在RenderCommand中,然后在Renderer类render函数中统一进行渲染。这么设计是为了对渲染进行统一的管理,包括可以进行合并drawcall,还可以用独立的线程来进行渲染,但是cocos目前并没有这么做。
cocos渲染数据的组织结构是,显示元素->RenderCommand->RenderQueue->_renderGroups。cocos默认使用RenderGrop[0]进行渲染,一些需要独立渲染的对象会开启新的RenderGroup,例如ClippingNode。
void Renderer::render()
{
_isRendering = true;
if (_glViewAssigned)
{
//对所有randerqueue进行排序
for (auto &renderqueue : _renderGroups)
{
renderqueue.sort();
}
//默认渲染RenderGrop[0]
visitRenderQueue(_renderGroups[0]);
}
//清空所有渲染数据
clean();
_isRendering = false;
}
RanderQueue的排序过程非常简单,只对GlobalZOrder不为0的和透明的3D对象才进行渲染排序。
void RenderQueue::sort()
{
// Don't sort _queue0, it already comes sorted
std::sort(std::begin(_commands[QUEUE_GROUP::TRANSPARENT_3D]), std::end(_commands[QUEUE_GROUP::TRANSPARENT_3D]), compare3DCommand);
std::sort(std::begin(_commands[QUEUE_GROUP::GLOBALZ_NEG]), std::end(_commands[QUEUE_GROUP::GLOBALZ_NEG]), compareRenderCommand);
std::sort(std::begin(_commands[QUEUE_GROUP::GLOBALZ_POS]), std::end(_commands[QUEUE_GROUP::GLOBALZ_POS]), compareRenderCommand);
}
visitRenderQueue函数按照指定的顺序对各种RenderQueue中的各个RenderCommand统一进行渲染,渲染单个RenderCommand的函数是processRenderCommand。
void Renderer::visitRenderQueue(RenderQueue& queue)
{
//保存渲染前的opengl状态
queue.saveRenderState();
//渲染GlobalZ小于0的RenderQueque
const auto& zNegQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_NEG);
if (zNegQueue.size() > 0)
{
//设置opengl状态
......
for (auto it = zNegQueue.cbegin(); it != zNegQueue.cend(); ++it)
{
processRenderCommand(*it);
}
flush();
}
//渲染不透明的3D对象的RenderQueque
const auto& opaqueQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::OPAQUE_3D);
if (opaqueQueue.size() > 0)
{
//设置opengl状态
......
for (auto it = opaqueQueue.cbegin(); it != opaqueQueue.cend(); ++it)
{
processRenderCommand(*it);
}
flush();
}
//渲染透明的3D对象的RenderQueque
const auto& transQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::TRANSPARENT_3D);
if (transQueue.size() > 0)
{
//设置opengl状态
......
for (auto it = transQueue.cbegin(); it != transQueue.cend(); ++it)
{
processRenderCommand(*it);
}
flush();
}
//渲染GlobalZ等于0的RenderQueque
const auto& zZeroQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_ZERO);
if (zZeroQueue.size() > 0)
{
//设置opengl状态
......
for (auto it = zZeroQueue.cbegin(); it != zZeroQueue.cend(); ++it)
{
processRenderCommand(*it);
}
flush();
}
//渲染GlobalZ大于0的RenderQueque
const auto& zPosQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_POS);
if (zPosQueue.size() > 0)
{
//设置opengl状态
......
for (auto it = zPosQueue.cbegin(); it != zPosQueue.cend(); ++it)
{
processRenderCommand(*it);
}
flush();
}
//还原渲染前的opengl状态
queue.restoreRenderState();
}
渲染三角形和四边形的时候会自动合并渲染顶点数据,合并DrawCall。合并三角形顶点数据的方法是fillVerticesAndIndices
void Renderer::processRenderCommand(RenderCommand* command)
{
auto commandType = command->getType();
if( RenderCommand::Type::TRIANGLES_COMMAND == commandType)
{
//处理三角形Command之前先渲染其它Command的数据
flush3D();
flushQuads();
auto cmd = static_cast(command);
//如果累积的顶点数达到上限,立即进行渲染
if(cmd->isSkipBatching() || _filledVertex + cmd->getVertexCount() > VBO_SIZE || _filledIndex + cmd->getIndexCount() > INDEX_VBO_SIZE)
{
CCASSERT(cmd->getVertexCount()>= 0 && cmd->getVertexCount() < VBO_SIZE, "VBO for vertex is not big enough, please break the data down or use customized render command");
CCASSERT(cmd->getIndexCount()>= 0 && cmd->getIndexCount() < INDEX_VBO_SIZE, "VBO for index is not big enough, please break the data down or use customized render command");
drawBatchedTriangles();//渲染累积的三角形
}
//合并渲染的顶点并保存合并的command
_batchedCommands.push_back(cmd);
fillVerticesAndIndices(cmd);
if(cmd->isSkipBatching())//不需要合并的command立即进行渲染
{
drawBatchedTriangles();//渲染累积的三角形
}
}
else if ( RenderCommand::Type::QUAD_COMMAND == commandType )
{
//处理四边形Command之前先渲染其它Command的数据
flush3D();
flushTriangles();
auto cmd = static_cast(command);
//检查是否需要立即渲染
if(cmd->isSkipBatching()|| (_numberQuads + cmd->getQuadCount()) * 4 > VBO_SIZE )
{
CCASSERT(cmd->getQuadCount()>= 0 && cmd->getQuadCount() * 4 < VBO_SIZE, "VBO for vertex is not big enough, please break the data down or use customized render command");
drawBatchedQuads();//渲染累积的四边形数据
}
//合并数据并保存command
_batchQuadCommands.push_back(cmd);
fillQuads(cmd);
if(cmd->isSkipBatching())
{
drawBatchedQuads();//渲染累积的四边形数据
}
}
else if (RenderCommand::Type::MESH_COMMAND == commandType)
{
flush2D();
auto cmd = static_cast(command);
if (cmd->isSkipBatching() || _lastBatchedMeshCommand == nullptr || _lastBatchedMeshCommand->getMaterialID() != cmd->getMaterialID())
{
flush3D();
if(cmd->isSkipBatching())
{
// XXX: execute() will call bind() and unbind()
// but unbind() shouldn't be call if the next command is a MESH_COMMAND with Material.
// Once most of cocos2d-x moves to Pass/StateBlock, only bind() should be used.
cmd->execute();
}
else
{
cmd->preBatchDraw();
cmd->batchDraw();
_lastBatchedMeshCommand = cmd;
}
}
else
{
cmd->batchDraw();
}
}
else if(RenderCommand::Type::GROUP_COMMAND == commandType)
{
flush();
int renderQueueID = ((GroupCommand*) command)->getRenderQueueID();
visitRenderQueue(_renderGroups[renderQueueID]);
}
else if(RenderCommand::Type::CUSTOM_COMMAND == commandType)
{
flush();
auto cmd = static_cast(command);
cmd->execute();
}
else if(RenderCommand::Type::BATCH_COMMAND == commandType)
{
flush();
auto cmd = static_cast(command);
cmd->execute();
}
else if(RenderCommand::Type::PRIMITIVE_COMMAND == commandType)
{
flush();
auto cmd = static_cast(command);
cmd->execute();
}
else
{
CCLOGERROR("Unknown commands in renderQueue");
}
}
这个函数通过内存的拷贝合并顶点和索引数据,并更新顶点的索引总长度,渲染的方法是drawBatchedTriangles
void Renderer::fillVerticesAndIndices(const TrianglesCommand* cmd)
{
memcpy(_verts + _filledVertex, cmd->getVertices(), sizeof(V3F_C4B_T2F) * cmd->getVertexCount());
const Mat4& modelView = cmd->getModelView();
for(ssize_t i=0; i< cmd->getVertexCount(); ++i)
{
V3F_C4B_T2F *q = &_verts[i + _filledVertex];
Vec3 *vec1 = (Vec3*)&q->vertices;
modelView.transformPoint(vec1);
}
const unsigned short* indices = cmd->getIndices();
//fill index
for(ssize_t i=0; i< cmd->getIndexCount(); ++i)
{
_indices[_filledIndex + i] = _filledVertex + indices[i];
}
_filledVertex += cmd->getVertexCount();
_filledIndex += cmd->getIndexCount();
}
drawBatchedTriangles 方法在渲染每一个command的时候会和上一次记录的纹理ID进行比较,如果他们是相同的纹理,不渲染,如果不相同立即进行渲染,这个函数是合并DrawCall的决定性步骤。
void Renderer::drawBatchedTriangles() {
int indexToDraw = 0;
int startIndex = 0;
//Upload buffer to VBO
if(_filledVertex <= 0 || _filledIndex <= 0 || _batchedCommands.empty())
{
return;
}
//设置opengl状态并且把顶点和索引数据同步至显卡
......
//遍历合并的command
for(const auto& cmd : _batchedCommands)
{
auto newMaterialID = cmd->getMaterialID();
//如果渲染的纹理和上一个合并的纹理不是同一个,立即进行渲染
if(_lastMaterialID != newMaterialID || newMaterialID == MATERIAL_ID_DO_NOT_BATCH)
{
//渲染三角形
if(indexToDraw > 0)
{
glDrawElements(GL_TRIANGLES, (GLsizei) indexToDraw, GL_UNSIGNED_SHORT, (GLvoid*) (startIndex*sizeof(_indices[0])) );
_drawnBatches++;//增加渲染drawcall计数
_drawnVertices += indexToDraw;//增加渲染顶点计数
startIndex += indexToDraw;
indexToDraw = 0;
}
//启用并记录新的纹理id
cmd->useMaterial();
_lastMaterialID = newMaterialID;
}
indexToDraw += cmd->getIndexCount();
}
//如果还有command没有渲染,立即进行渲染
if(indexToDraw > 0)
{
glDrawElements(GL_TRIANGLES, (GLsizei) indexToDraw, GL_UNSIGNED_SHORT, (GLvoid*) (startIndex*sizeof(_indices[0])) );
_drawnBatches++;//增加渲染drawcall计数
_drawnVertices += indexToDraw;//增加渲染顶点计数
}
//还原opengl状态
......
//清空合并标记
_batchedCommands.clear();
_filledVertex = 0;
_filledIndex = 0;
}
参考资料:
https://learnopengl-cn.readthedocs.io/zh/latest/