在渲染物体之前,物体模型顶点数据保存在内存中,CPU通过向GPU发送渲染指令后,数据会复制到显存中,然后进行渲染。在这个过程中,CPU向GPU发送渲染指令的过程,名为Draw Call。OpenGL中的渲染指令是指: glDrawArrays(GL_TRIANGLES, 0, amount_of_vertices);函数
当我们在渲染一个场景时,该场景中包含非常多的简单模型,比如星星,草等,这些模型的顶点数据极少,但是每次单独渲染一个简单模型时都会调用一次Draw Call,渲染的速度很快,但是发送指令的过程是很慢的(CPU告诉GPU该从哪个缓冲读取数据,从哪寻找顶点属性,而且这些都是在相对缓慢的CPU到GPU总线(CPU to GPU Bus)上进行的),这就会使渲染整个场景的速度变得非常慢,传统的方案就是:
for(unsigned int i = 0; i < amount_of_models_to_draw; i++)
{
DoSomePreparations(); // 绑定VAO,绑定纹理,设置uniform等
glDrawArrays(GL_TRIANGLES, 0, amount_of_vertices);
}
但如果我们能将这些星星或者草一次性全部发给GPU去渲染,速度就会非常快,这就是批处理
批处理能节省Draw Call的数量,极大提升渲染速度,因此在Unity中,批处理的作用非常大
使用批处理的前提是多个物体使用同一个shader做渲染,在Unity中,就是指使用相同的材质球。
在OpenGL中使用批处理,只需要将glDrawArrays和glDrawElements的渲染调用分别改为glDrawArraysInstanced和glDrawElementsInstanced就可以了。这些渲染函数的实例化版本需要一个额外的参数,叫做实例数量(Instance Count)
这个函数本身并没有什么用。渲染同一个物体一千次对我们并没有什么用处,每个物体都是完全相同的,而且还在同一个位置。我们只能看见一个物体!处于这个原因,GLSL在顶点着色器中嵌入了另一个内建变量,gl_InstanceID。
在使用实例化渲染调用时,gl_InstanceID会从0开始,在每个实例被渲染时递增1。比如说,我们正在渲染第43个实例,那么顶点着色器中它的gl_InstanceID将会是42。因为每个实例都有唯一的ID,我们可以建立一个数组,将ID与位置值对应起来,将每个实例放置在世界的不同位置。
物体顶点数据
float quadVertices[] = {
// 位置 // 颜色
-0.05f, 0.05f, 1.0f, 0.0f, 0.0f,
0.05f, -0.05f, 0.0f, 1.0f, 0.0f,
-0.05f, -0.05f, 0.0f, 0.0f, 1.0f,
-0.05f, 0.05f, 1.0f, 0.0f, 0.0f,
0.05f, -0.05f, 0.0f, 1.0f, 0.0f,
0.05f, 0.05f, 0.0f, 1.0f, 1.0f
};
顶点着色器:
#version 330 core
layout (location = 0) in vec2 aPos;
layout (location = 1) in vec3 aColor;
out vec3 fColor;
uniform vec2 offsets[100];
void main()
{
vec2 offset = offsets[gl_InstanceID];
gl_Position = vec4(aPos + offset, 0.0, 1.0);
fColor = aColor;
}
片元着色器:
#version 330 core
out vec4 FragColor;
in vec3 fColor;
void main()
{
FragColor = vec4(fColor, 1.0);
}
绘制:
glBindVertexArray(quadVAO);
//绘制100个相同的图形
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 100);
顶点着色器:
#version 330 core
layout (location = 0) in vec2 aPos;
layout (location = 1) in vec3 aColor;
layout (location = 2) in vec2 aOffset;
out vec3 fColor;
void main()
{
fColor = aColor;
gl_Position = vec4(aPos + aOffset, 0.0, 1.0);
}
片元着色器:
#version 330 core
out vec4 FragColor;
in vec3 fColor;
void main()
{
FragColor = vec4(fColor, 1.0);
}
#include
#include
#include "shader.h"
#include
void framebuffer_size_callback(GLFWwindow* window, int width, int height);
// settings
const unsigned int SCR_WIDTH = 1280;
const unsigned int SCR_HEIGHT = 720;
int main()
{
// glfw: initialize and configure
// ------------------------------
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
// glfw window creation
// --------------------
GLFWwindow* window = glfwCreateWindow(SCR_WIDTH, SCR_HEIGHT, "LearnOpenGL", NULL, NULL);
if (window == NULL)
{
std::cout << "Failed to create GLFW window" << std::endl;
glfwTerminate();
return -1;
}
glfwMakeContextCurrent(window);
// glad: load all OpenGL function pointers
// ---------------------------------------
if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
{
std::cout << "Failed to initialize GLAD" << std::endl;
return -1;
}
// configure global opengl state
// -----------------------------
glEnable(GL_DEPTH_TEST);
// build and compile shaders
// -------------------------
Shader shader("batch.vs", "batch.fs");
// generate a list of 100 quad locations/translation-vectors
// ---------------------------------------------------------
glm::vec2 translations[100];
int index = 0;
float offset = 0.1f;
for (int y = -10; y < 10; y += 2)
{
for (int x = -10; x < 10; x += 2)
{
glm::vec2 translation;
translation.x = (float)x / 10.0f + offset;
translation.y = (float)y / 10.0f + offset;
translations[index++] = translation;
}
}
// store instance data in an array buffer
// --------------------------------------
unsigned int instanceVBO;
glGenBuffers(1, &instanceVBO);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec2) * 100, &translations[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// set up vertex data (and buffer(s)) and configure vertex attributes
// ------------------------------------------------------------------
float quadVertices[] = {
// positions // colors
-0.05f, 0.05f, 1.0f, 1.0f, 0.0f,
0.05f, -0.05f, 1.0f, 1.0f, 0.0f,
-0.05f, -0.05f, 1.0f, 1.0f, 0.0f,
-0.05f, 0.05f, 1.0f, 1.0f, 0.0f,
0.05f, -0.05f, 1.0f, 1.0f, 0.0f,
0.05f, 0.05f, 1.0f, 1.0f, 0.0f
};
unsigned int quadVAO, quadVBO;
glGenVertexArrays(1, &quadVAO);
glGenBuffers(1, &quadVBO);
glBindVertexArray(quadVAO);
glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(quadVertices), quadVertices, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(float), (void*)0);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(float), (void*)(2 * sizeof(float)));
// also set instance data
glEnableVertexAttribArray(2);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO); // this attribute comes from a different vertex buffer
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
//我们调用了glVertexAttribDivisor。这个函数告诉了OpenGL该什么时候更新顶点属性的内容至新一组数据。它的第一个参数是需要的顶点属性,第二个参数是属性除数(Attribute Divisor)。默认情况下,属性除数是0,告诉OpenGL我们需要在顶点着色器的每次迭代时更新顶点属性。将它设置为1时,我们告诉OpenGL我们希望在渲染一个新实例的时候更新顶点属性。而设置为2时,我们希望每2个实例更新一次属性,以此类推。我们将属性除数设置为1,是在告诉OpenGL,处于位置值2的顶点属性是一个实例化数组。
glVertexAttribDivisor(2, 1); // tell OpenGL this is an instanced vertex attribute.
// render loop
// -----------
while (!glfwWindowShouldClose(window))
{
// render
// ------
glClearColor(0.1f, 0.1f, 0.1f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// draw 100 instanced quads
shader.use();
glBindVertexArray(quadVAO);
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 100); // 100 triangles of 6 vertices each
glBindVertexArray(0);
// glfw: swap buffers and poll IO events (keys pressed/released, mouse moved etc.)
// -------------------------------------------------------------------------------
glfwSwapBuffers(window);
glfwPollEvents();
}
// optional: de-allocate all resources once they've outlived their purpose:
// ------------------------------------------------------------------------
glDeleteVertexArrays(1, &quadVAO);
glDeleteBuffers(1, &quadVBO);
glfwTerminate();
return 0;
}
// glfw: whenever the window size changed (by OS or user resize) this callback function executes
// ---------------------------------------------------------------------------------------------
void framebuffer_size_callback(GLFWwindow* window, int width, int height)
{
// make sure the viewport matches the new window dimensions; note that width and
// height will be significantly larger than specified on retina displays.
glViewport(0, 0, width, height);
}
其他头文件请参考之前的内容补充:
之前的文章