Skip to content

Commit e09c985

Browse files
author
drops
committed
Combine OpenGL uniforms into batches
1 parent 5e23682 commit e09c985

17 files changed

+381
-286
lines changed

res/gamedata/shaders/gl/deffer_detail_s_flat.vs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
11
#include "common.h"
22
#include "iostructs\v_detail.h"
33

4-
layout(std140) uniform EnvironmentDetailUBO {
5-
mat4 xform;
6-
mat4 formView;
7-
vec4 consts;
8-
vec4 scale;
9-
vec4 bias;
10-
vec4 wind;
11-
vec4 wave;
12-
vec3 sun;
13-
};
4+
#include "shared\ubo_render_dump.h"
145

156
uniform float4 array[61*4];
167

res/gamedata/shaders/gl/deffer_detail_w_flat.vs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
11
#include "common.h"
22
#include "iostructs\v_detail.h"
33

4-
layout(std140) uniform EnvironmentDetailUBO {
5-
mat4 xform;
6-
mat4 formView;
7-
vec4 consts;
8-
vec4 scale;
9-
vec4 bias;
10-
vec4 wind;
11-
vec4 wave;
12-
vec3 sun;
13-
};
4+
#include "shared\ubo_render_dump.h"
145

156
uniform float4 array[61*4];
167

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#ifndef OPENXRAY_UBO_RENDER_DUMP_H
2+
#define OPENXRAY_UBO_RENDER_DUMP_H
3+
4+
layout(std140) uniform RenderDumpUBO {
5+
float4x4 xform;
6+
float4x4 formView;
7+
float4 consts;
8+
float4 scale;
9+
float4 bias;
10+
float4 wind;
11+
float4 wave;
12+
float4 sun;
13+
};
14+
15+
#endif

src/Layers/xrRender/DetailManager.h

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -50,38 +50,9 @@ extern float dm_current_fade;// = float(2*dm_current_size)-.5f;
5050
extern float ps_current_detail_density;
5151
extern float ps_current_detail_height;
5252

53-
#ifdef USE_OGL
54-
#pragma pack(push, 1)
55-
struct EnvironmentDetailData
56-
{
57-
glm::mat4x4 xform;
58-
glm::mat4x4 xformView;
59-
glm::vec4 consts;
60-
glm::vec4 scale;
61-
glm::vec4 bias;
62-
glm::vec4 wind;
63-
glm::vec4 wave;
64-
glm::vec3 sun;
65-
66-
private:
67-
[[maybe_unused]]
68-
glm::vec1 _glsl_140_padding {};
69-
// 224 bytes
70-
};
71-
#pragma pack(pop)
72-
#endif
73-
7453
class ECORE_API CDetailManager
7554
{
7655
public:
77-
#ifdef USE_OGL
78-
EnvironmentDetailData environmentDetailData[3];
79-
80-
UniformBufferObject uniformBufferObject = {
81-
GL_NONE, GL_DYNAMIC_DRAW, sizeof(EnvironmentDetailData)*3
82-
};
83-
#endif
84-
8556
struct SlotItem
8657
{ // один кустик
8758
float scale;
@@ -214,12 +185,14 @@ class ECORE_API CDetailManager
214185
VertexStagingBuffer hw_VB;
215186
IndexStagingBuffer hw_IB;
216187

188+
#ifndef USE_OGL
217189
ref_constant hwc_consts;
218190
ref_constant hwc_wave;
219191
ref_constant hwc_wind;
220-
ref_constant hwc_array;
221192
ref_constant hwc_s_consts;
222193
ref_constant hwc_s_xform;
194+
#endif
195+
ref_constant hwc_array;
223196
ref_constant hwc_s_array;
224197
void hw_Load();
225198
void hw_Load_Geom();
@@ -258,5 +231,16 @@ class ECORE_API CDetailManager
258231

259232
CDetailManager();
260233
virtual ~CDetailManager();
234+
235+
#ifdef USE_OGL
236+
private:
237+
// TODO move to global registry ?
238+
GLuint renderDumpBlockBinding = 3;
239+
ShaderInstanceData renderDumpData[3];
240+
241+
UniformBufferObject renderDumpUBO = {
242+
BUFFER_DEBUG_NAME("DetailManagerUBO") GL_NONE, GL_DYNAMIC_DRAW, sizeof(ShaderInstanceData)*3
243+
};
244+
#endif
261245
};
262246
} // namespace xray::render::RENDER_NAMESPACE

src/Layers/xrRender/R_Backend.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#endif
2424
#ifdef USE_OGL
2525
#include <glm/glm.hpp>
26-
#include "gl_backend_uniforms.h"
26+
#include "gl_backend_extension.h"
2727
#endif
2828

2929
#include "FVF.h"
@@ -65,7 +65,7 @@ struct R_statistics
6565
#pragma warning(disable:4324)
6666
class ECORE_API CBackend
6767
#ifdef USE_OGL
68-
: public CBackendUniforms
68+
: public CBackendExtension
6969
#endif
7070
{
7171
public:
@@ -410,6 +410,7 @@ class ECORE_API CBackend
410410
public:
411411
#if defined(USE_OGL)
412412
ICF bool is_TessEnabled() { return false; }
413+
void RenderInstanced(D3DPRIMITIVETYPE T, u32 baseV, u32 startV, u32 countV, u32 startI, u32 PC, GLsizei instanceCount);
413414
#elif defined(USE_DX11)
414415
ICF bool is_TessEnabled();
415416
#else

src/Layers/xrRender/SkeletonX.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ void CSkeletonX::_Render(CBackend& cmd_list, ref_geom& hGeom, u32 vCount, u32 iO
9595

9696
// render
9797
#ifdef USE_OGL
98-
cmd_list.set_uniforms(array->vs.program, array->vs.location, uniformBuffer);
98+
cmd_list.setUniforms(array->vs.program, array->vs.location, uniformBuffer);
9999
#endif
100100
cmd_list.set_Geometry(hGeom);
101101
cmd_list.Render(D3DPT_TRIANGLELIST, 0, 0, vCount, iOffset, pCount);

src/Layers/xrRenderGL/glDetailManager_VS.cpp

Lines changed: 53 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,29 @@ void CDetailManager::hw_Load_Shaders()
1919
S.create("details" DELIMITER "set");
2020
R_constant_table& T0 = *S->E[0]->passes[0]->constants;
2121
R_constant_table& T1 = *S->E[1]->passes[0]->constants;
22-
hwc_consts = T0.get("consts");
23-
hwc_wave = T0.get("wave");
24-
hwc_wind = T0.get("dir2D");
2522
hwc_array = T0.get("array");
26-
hwc_s_consts = T1.get("consts");
27-
hwc_s_xform = T1.get("xform");
2823
hwc_s_array = T1.get("array");
24+
25+
// TODO: There has to be a better way to do this
26+
for (auto shader_element : S->E)
27+
{
28+
if (shader_element == nullptr)
29+
continue;
30+
31+
for (auto pass : shader_element->passes)
32+
{
33+
if (pass->vs && pass->vs->sh)
34+
{
35+
RCache.uboRegisterWithProgram(pass->vs, "RenderDumpUBO", renderDumpBlockBinding, renderDumpUBO);
36+
}
37+
}
38+
}
2939
}
3040

3141
void CDetailManager::hw_Render(CBackend& cmd_list)
3242
{
3343
using namespace detail_manager;
3444

35-
if (uniformBufferObject.id == GL_NONE)
36-
RCache.uniformBufferObjectGenerate(uniformBufferObject);
37-
3845
// Render-prepare
3946
// Update timer
4047
// Can't use Device.fTimeDelta since it is smoothed! Don't know why, but smoothed value looks more choppy!
@@ -58,51 +65,47 @@ void CDetailManager::hw_Render(CBackend& cmd_list)
5865
const auto& desc = g_pGamePersistent->Environment().CurrentEnv;
5966

6067
// Wave0
61-
float scale = 1.f / float(quant);
68+
float scale = 1.f / static_cast<float>(quant);
6269

6370
//environmentDetailUbo[0].xform = Device.mFullTransform;
64-
memcpy(&environmentDetailData[0].xform, &Device.mFullTransform, sizeof(glm::mat4));
71+
memcpy(&renderDumpData[0].xform, &Device.mFullTransform, sizeof(glm::mat4));
6572
//environmentDetailUbo[0].xformView;
66-
environmentDetailData[0].consts = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
67-
environmentDetailData[0].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
73+
renderDumpData[0].consts = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
74+
renderDumpData[0].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
6875
//environmentDetailUbo[0].bias;
69-
environmentDetailData[0].wind = wind1;
70-
environmentDetailData[0].wave = glm::vec4(1.f / 5.f, 1.f / 7.f, 1.f / 3.f, m_time_pos) / PI_MUL_2;
71-
environmentDetailData[0].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
72-
73-
//
76+
renderDumpData[0].wind = wind1;
77+
renderDumpData[0].wave = glm::vec4(1.f / 5.f, 1.f / 7.f, 1.f / 3.f, m_time_pos) / PI_MUL_2;
78+
renderDumpData[0].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
7479

7580
//environmentDetailUbo[1].xform = Device.mFullTransform;
76-
memcpy(&environmentDetailData[1].xform, &Device.mFullTransform, sizeof(glm::mat4));
81+
memcpy(&renderDumpData[1].xform, &Device.mFullTransform, sizeof(glm::mat4));
7782
//environmentDetailUbo[1].xformView;
78-
environmentDetailData[1].consts = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
79-
environmentDetailData[1].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
83+
renderDumpData[1].consts = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
84+
renderDumpData[1].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
8085
//environmentDetailUbo[1].bias;
81-
environmentDetailData[1].wind = wind2;
82-
environmentDetailData[1].wave = glm::vec4(1.f / 3.f, 1.f / 7.f, 1.f / 5.f, m_time_pos) / PI_MUL_2;
83-
environmentDetailData[1].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
84-
85-
//
86+
renderDumpData[1].wind = wind2;
87+
renderDumpData[1].wave = glm::vec4(1.f / 3.f, 1.f / 7.f, 1.f / 5.f, m_time_pos) / PI_MUL_2;
88+
renderDumpData[1].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
8689

8790
//environmentDetailUbo[2].xform = Device.mFullTransform;
88-
memcpy(&environmentDetailData[2].xform, &Device.mFullTransform, sizeof(glm::mat4));
91+
memcpy(&renderDumpData[2].xform, &Device.mFullTransform, sizeof(glm::mat4));
8992
//environmentDetailUbo[2].xformView;
90-
environmentDetailData[2].consts = glm::vec4(scale, scale, scale, 1.f);
91-
environmentDetailData[2].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
93+
renderDumpData[2].consts = glm::vec4(scale, scale, scale, 1.f);
94+
renderDumpData[2].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
9295
//environmentDetailUbo[2].bias;
93-
environmentDetailData[2].wind = wind2;
94-
environmentDetailData[2].wave = glm::vec4(1.f / 3.f, 1.f / 7.f, 1.f / 5.f, m_time_pos) / PI_MUL_2;
95-
environmentDetailData[2].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
96+
renderDumpData[2].wind = wind2;
97+
renderDumpData[2].wave = glm::vec4(1.f / 3.f, 1.f / 7.f, 1.f / 5.f, m_time_pos) / PI_MUL_2;
98+
renderDumpData[2].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
9699

97-
RCache.uniformBufferObjectPushToDevice(uniformBufferObject, uniformBufferObject.size, &environmentDetailData);
100+
cmd_list.uboPushToDevice(renderDumpUBO, renderDumpUBO.size, &renderDumpData);
98101

99-
CHK_GL(glBindBufferRange(GL_UNIFORM_BUFFER, 3, uniformBufferObject.id, 0, sizeof(EnvironmentDetailData)));
102+
cmd_list.uboBindRange(renderDumpUBO, renderDumpBlockBinding, 0, sizeof(ShaderInstanceData));
100103
hw_Render_dump(cmd_list, 1, 0);
101104

102-
CHK_GL(glBindBufferRange(GL_UNIFORM_BUFFER, 3, uniformBufferObject.id, sizeof(EnvironmentDetailData), sizeof(EnvironmentDetailData)));
105+
cmd_list.uboBindRange(renderDumpUBO, renderDumpBlockBinding, sizeof(ShaderInstanceData), sizeof(ShaderInstanceData));
103106
hw_Render_dump(cmd_list, 2, 0);
104107

105-
CHK_GL(glBindBufferRange(GL_UNIFORM_BUFFER, 3, uniformBufferObject.id, sizeof(EnvironmentDetailData)*2, sizeof(EnvironmentDetailData)));
108+
cmd_list.uboBindRange(renderDumpUBO, renderDumpBlockBinding, sizeof(ShaderInstanceData)*2, sizeof(ShaderInstanceData));
106109
hw_Render_dump(cmd_list, 0, 1);
107110
}
108111

@@ -118,41 +121,30 @@ void CDetailManager::hw_Render_dump(CBackend& cmd_list, u32 var_id, u32 lod_id)
118121

119122
vis_list& list = m_visibles [var_id];
120123

121-
const auto& desc = g_pGamePersistent->Environment().CurrentEnv;
122-
Fvector c_sun, c_ambient, c_hemi;
123-
c_sun.set(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z);
124-
c_sun.mul(.5f);
125-
c_ambient.set(desc.ambient.x, desc.ambient.y, desc.ambient.z);
126-
c_hemi.set(desc.hemi_color.x, desc.hemi_color.y, desc.hemi_color.z);
127-
128124
// Iterate
129125
for (u32 O = 0; O < objects.size(); O++)
130126
{
131127
CDetail& Object = *objects [O];
132128
xr_vector<SlotItemVec*>& vis = list [O];
129+
133130
if (!vis.empty())
134131
{
135132
for (u32 iPass = 0; iPass < Object.shader->E[lod_id]->passes.size(); ++iPass)
136133
{
137-
// TODO register only once
138-
RCache.uniformBufferObjectRegisterWithProgram(Object.shader->E[lod_id]->passes[iPass]->vs, "EnvironmentDetailUBO", 3, uniformBufferObject);
139-
140134
cmd_list.set_Element(Object.shader->E[lod_id], iPass);
141135
cmd_list.apply_lmaterial();
142136

143137
ref_constant constArray = cmd_list.get_c(strArray);
144138
VERIFY(constArray);
145139

146-
u32 dwBatch = 0;
140+
u32 instanceCount = 0;
147141
xr_vector<glm::vec4> uniformBuffer;
148142
uniformBuffer.reserve(hw_BatchSize*4);
149143

150144
for (auto items : vis)
151145
{
152146
for (auto& instance : *items)
153147
{
154-
u32 base = dwBatch * 4;
155-
156148
// Build matrix ( 3x4 matrix, last row - color )
157149
float scale = instance->scale_calculated;
158150
Fmatrix& M = instance->mRotY;
@@ -165,35 +157,28 @@ void CDetailManager::hw_Render_dump(CBackend& cmd_list, u32 var_id, u32 lod_id)
165157
// R2 only needs hemisphere
166158
uniformBuffer.emplace_back(instance->c_sun, instance->c_sun, instance->c_sun, instance->c_hemi);
167159

168-
dwBatch ++;
169-
if (dwBatch == hw_BatchSize)
160+
instanceCount++;
161+
// flush when buffer full
162+
if (instanceCount == hw_BatchSize) // flush
170163
{
171-
// flush
172-
RImplementation.BasicStats.DetailCount += dwBatch;
173-
u32 dwCNT_verts = dwBatch * Object.number_vertices;
174-
u32 dwCNT_prims = dwBatch * Object.number_indices / 3;
175-
176-
cmd_list.set_uniforms(constArray->vs.program, constArray->vs.location, uniformBuffer);
177-
glDrawElementsInstancedBaseVertex(GL_TRIANGLES, Object.number_indices, GL_UNSIGNED_SHORT, (void*)(iOffset * sizeof(GLushort)), dwBatch, vOffset);
164+
RImplementation.BasicStats.DetailCount += instanceCount;
178165

179-
cmd_list.stat.r.s_details.add(dwCNT_verts);
180-
uniformBuffer.clear();
166+
cmd_list.setUniforms(constArray->vs.program, constArray->vs.location, uniformBuffer);
167+
cmd_list.RenderInstanced(D3DPT_TRIANGLELIST, vOffset, 0, Object.number_vertices, iOffset, Object.number_indices / 3, instanceCount);
181168

182169
// restart
183-
dwBatch = 0;
170+
instanceCount = 0;
171+
uniformBuffer.clear();
184172
}
185173
}
186174
}
187-
// flush if nessecary
188-
if (dwBatch)
175+
// flush if necessary
176+
if (instanceCount)
189177
{
190-
RImplementation.BasicStats.DetailCount += dwBatch;
191-
u32 dwCNT_verts = dwBatch * Object.number_vertices;
192-
u32 dwCNT_prims = dwBatch * Object.number_indices / 3;
178+
RImplementation.BasicStats.DetailCount += instanceCount;
193179

194-
cmd_list.set_uniforms(constArray->vs.program, constArray->vs.location, uniformBuffer);
195-
glDrawElementsInstancedBaseVertex(GL_TRIANGLES, Object.number_indices, GL_UNSIGNED_SHORT, (void*)(iOffset * sizeof(GLushort)), dwBatch, vOffset);
196-
cmd_list.stat.r.s_details.add(dwCNT_verts);
180+
cmd_list.setUniforms(constArray->vs.program, constArray->vs.location, uniformBuffer);
181+
cmd_list.RenderInstanced(D3DPT_TRIANGLELIST, vOffset, 0, Object.number_vertices, iOffset, Object.number_indices / 3, instanceCount);
197182
}
198183
}
199184
}

0 commit comments

Comments
 (0)