Skip to content

Commit 5e23682

Browse files
author
drops
committed
Combine OpenGL uniforms into batches
1 parent a6553e1 commit 5e23682

File tree

9 files changed

+275
-94
lines changed

9 files changed

+275
-94
lines changed

res/gamedata/shaders/gl/deffer_detail_s_flat.vs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
11
#include "common.h"
22
#include "iostructs\v_detail.h"
33

4-
uniform float4 consts; // {1/quant,1/quant,diffusescale,ambient}
5-
//uniform float4 array [200] : register(c12);
6-
//tbuffer DetailsData
7-
//{
8-
uniform float4 array[61*4];
9-
//}
4+
layout(std140) uniform EnvironmentDetailUBO {
5+
mat4 xform;
6+
mat4 formView;
7+
vec4 consts;
8+
vec4 scale;
9+
vec4 bias;
10+
vec4 wind;
11+
vec4 wave;
12+
vec3 sun;
13+
};
14+
15+
uniform float4 array[61*4];
1016

1117
v2p_flat _main (v_detail v)
1218
{
1319
v2p_flat O;
1420
// index
15-
int i = int(v.misc.w);
21+
int i = gl_InstanceID*4;
1622
float4 m0 = array[i+0];
1723
float4 m1 = array[i+1];
1824
float4 m2 = array[i+2];

res/gamedata/shaders/gl/deffer_detail_w_flat.vs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
#include "common.h"
22
#include "iostructs\v_detail.h"
33

4-
uniform float4 consts; // {1/quant,1/quant,diffusescale,ambient}
5-
uniform float4 wave; // cx,cy,cz,tm
6-
uniform float4 dir2D;
7-
//uniform float4 array [200] : register(c12);
8-
//tbuffer DetailsData
9-
//{
10-
uniform float4 array[61*4];
11-
//}
4+
layout(std140) uniform EnvironmentDetailUBO {
5+
mat4 xform;
6+
mat4 formView;
7+
vec4 consts;
8+
vec4 scale;
9+
vec4 bias;
10+
vec4 wind;
11+
vec4 wave;
12+
vec3 sun;
13+
};
14+
15+
uniform float4 array[61*4];
1216

1317
v2p_flat _main (v_detail v)
1418
{
1519
v2p_flat O;
1620
// index
17-
int i = int(v.misc.w);
21+
int i = gl_InstanceID*4;
1822
float4 m0 = array[i+0];
1923
float4 m1 = array[i+1];
2024
float4 m2 = array[i+2];
@@ -33,7 +37,7 @@ v2p_flat _main (v_detail v)
3337
float H = pos.y - base; // height of vertex (scaled)
3438
float frac = v.misc.z*consts.x; // fractional
3539
float inten = H * dp;
36-
float2 result = calc_xz_wave (dir2D.xz*inten,frac);
40+
float2 result = calc_xz_wave (wind.xz*inten,frac);
3741
pos = float4(pos.x+result.x, pos.y, pos.z+result.y, 1);
3842

3943
// Normal in world coords

src/Layers/xrRender/DetailManager.h

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,38 @@ extern float dm_current_fade;// = float(2*dm_current_size)-.5f;
5050
extern float ps_current_detail_density;
5151
extern float ps_current_detail_height;
5252

53+
#ifdef USE_OGL
54+
#pragma pack(push, 1)
55+
struct EnvironmentDetailData
56+
{
57+
glm::mat4x4 xform;
58+
glm::mat4x4 xformView;
59+
glm::vec4 consts;
60+
glm::vec4 scale;
61+
glm::vec4 bias;
62+
glm::vec4 wind;
63+
glm::vec4 wave;
64+
glm::vec3 sun;
65+
66+
private:
67+
[[maybe_unused]]
68+
glm::vec1 _glsl_140_padding {};
69+
// 224 bytes
70+
};
71+
#pragma pack(pop)
72+
#endif
73+
5374
class ECORE_API CDetailManager
5475
{
5576
public:
77+
#ifdef USE_OGL
78+
EnvironmentDetailData environmentDetailData[3];
79+
80+
UniformBufferObject uniformBufferObject = {
81+
GL_NONE, GL_DYNAMIC_DRAW, sizeof(EnvironmentDetailData)*3
82+
};
83+
#endif
84+
5685
struct SlotItem
5786
{ // один кустик
5887
float scale;
@@ -70,9 +99,9 @@ class ECORE_API CDetailManager
7099

71100
struct SlotPart
72101
{ //
73-
u32 id; // ID модельки
74-
SlotItemVec items; // список кустиков
75-
SlotItemVec r_items[3]; // список кустиков for render
102+
u32 id; // ID models
103+
SlotItemVec items; // list of bushes
104+
SlotItemVec r_items[3]; // list of bushes for render
76105
};
77106

78107
enum SlotType : u32
@@ -197,7 +226,11 @@ class ECORE_API CDetailManager
197226
void hw_Load_Shaders();
198227
void hw_Unload();
199228
void hw_Render(CBackend& cmd_list);
229+
#ifdef USE_OGL
230+
void hw_Render_dump(CBackend& cmd_list, u32 var_id, u32 lod_id);
231+
#else
200232
void hw_Render_dump(CBackend& cmd_list, const Fvector4& consts, const Fvector4& wave, const Fvector4& wind, u32 var_id, u32 lod_id);
233+
#endif
201234

202235
// get unpacked slot
203236
DetailSlot& QueryDB(int sx, int sz);

src/Layers/xrRender/SH_Atomic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ struct ECORE_API SVS : public xr_resource_named
3131
ID3DVertexShader* sh;
3232
#elif defined(USE_OGL)
3333
GLuint sh;
34+
uint32_t bindingSlots = 0;
3435
#else
3536
# error No graphics API selected or enabled!
3637
#endif

src/Layers/xrRenderGL/glDetailManager_VS.cpp

Lines changed: 65 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -32,67 +32,83 @@ void CDetailManager::hw_Render(CBackend& cmd_list)
3232
{
3333
using namespace detail_manager;
3434

35+
if (uniformBufferObject.id == GL_NONE)
36+
RCache.uniformBufferObjectGenerate(uniformBufferObject);
37+
3538
// Render-prepare
3639
// Update timer
3740
// Can't use Device.fTimeDelta since it is smoothed! Don't know why, but smoothed value looks more choppy!
3841
float fDelta = Device.fTimeGlobal - m_global_time_old;
3942
if (fDelta < 0 || fDelta > 1)
43+
{
4044
fDelta = 0.03f;
45+
}
4146
m_global_time_old = Device.fTimeGlobal;
4247

4348
m_time_rot_1 += PI_MUL_2 * fDelta / swing_current.rot1;
4449
m_time_rot_2 += PI_MUL_2 * fDelta / swing_current.rot2;
4550
m_time_pos += fDelta * swing_current.speed;
4651

47-
//float tm_rot1 = (PI_MUL_2*Device.fTimeGlobal/swing_current.rot1);
48-
//float tm_rot2 = (PI_MUL_2*Device.fTimeGlobal/swing_current.rot2);
49-
float tm_rot1 = m_time_rot_1;
50-
float tm_rot2 = m_time_rot_2;
51-
52-
Fvector4 dir1, dir2;
53-
dir1.set(_sin(tm_rot1), 0, _cos(tm_rot1), 0).normalize().mul(swing_current.amp1);
54-
dir2.set(_sin(tm_rot2), 0, _cos(tm_rot2), 0).normalize().mul(swing_current.amp2);
52+
glm::vec4 wind1 = normalize(glm::vec4(_sin(m_time_rot_1), 0, _cos(m_time_rot_1), 0)) * swing_current.amp1;
53+
glm::vec4 wind2 = normalize(glm::vec4(_sin(m_time_rot_2), 0, _cos(m_time_rot_2), 0)) * swing_current.amp2;
5554

5655
// Setup geometry and DMA
5756
cmd_list.set_Geometry(hw_Geom);
5857

58+
const auto& desc = g_pGamePersistent->Environment().CurrentEnv;
59+
5960
// Wave0
6061
float scale = 1.f / float(quant);
61-
Fvector4 wave;
62-
Fvector4 consts;
63-
consts.set(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
64-
//wave.set (1.f/5.f, 1.f/7.f, 1.f/3.f, Device.fTimeGlobal*swing_current.speed);
65-
wave.set(1.f / 5.f, 1.f / 7.f, 1.f / 3.f, m_time_pos);
66-
//RCache.set_c (&*hwc_consts, scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient); // consts
67-
//RCache.set_c (&*hwc_wave, wave.div(PI_MUL_2)); // wave
68-
//RCache.set_c (&*hwc_wind, dir1); // wind-dir
69-
//hw_Render_dump (&*hwc_array, 1, 0, c_hdr );
70-
hw_Render_dump(cmd_list, consts, wave.div(PI_MUL_2), dir1, 1, 0);
71-
72-
// Wave1
73-
//wave.set (1.f/3.f, 1.f/7.f, 1.f/5.f, Device.fTimeGlobal*swing_current.speed);
74-
wave.set(1.f / 3.f, 1.f / 7.f, 1.f / 5.f, m_time_pos);
75-
//RCache.set_c (&*hwc_wave, wave.div(PI_MUL_2)); // wave
76-
//RCache.set_c (&*hwc_wind, dir2); // wind-dir
77-
//hw_Render_dump (&*hwc_array, 2, 0, c_hdr );
78-
hw_Render_dump(cmd_list, consts, wave.div(PI_MUL_2), dir2, 2, 0);
79-
80-
// Still
81-
consts.set(scale, scale, scale, 1.f);
82-
//RCache.set_c (&*hwc_s_consts,scale, scale, scale, 1.f);
83-
//RCache.set_c (&*hwc_s_xform, Device.mFullTransform);
84-
//hw_Render_dump (&*hwc_s_array, 0, 1, c_hdr );
85-
hw_Render_dump(cmd_list, consts, wave.div(PI_MUL_2), dir2, 0, 1);
62+
63+
//environmentDetailUbo[0].xform = Device.mFullTransform;
64+
memcpy(&environmentDetailData[0].xform, &Device.mFullTransform, sizeof(glm::mat4));
65+
//environmentDetailUbo[0].xformView;
66+
environmentDetailData[0].consts = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
67+
environmentDetailData[0].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
68+
//environmentDetailUbo[0].bias;
69+
environmentDetailData[0].wind = wind1;
70+
environmentDetailData[0].wave = glm::vec4(1.f / 5.f, 1.f / 7.f, 1.f / 3.f, m_time_pos) / PI_MUL_2;
71+
environmentDetailData[0].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
72+
73+
//
74+
75+
//environmentDetailUbo[1].xform = Device.mFullTransform;
76+
memcpy(&environmentDetailData[1].xform, &Device.mFullTransform, sizeof(glm::mat4));
77+
//environmentDetailUbo[1].xformView;
78+
environmentDetailData[1].consts = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
79+
environmentDetailData[1].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
80+
//environmentDetailUbo[1].bias;
81+
environmentDetailData[1].wind = wind2;
82+
environmentDetailData[1].wave = glm::vec4(1.f / 3.f, 1.f / 7.f, 1.f / 5.f, m_time_pos) / PI_MUL_2;
83+
environmentDetailData[1].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
84+
85+
//
86+
87+
//environmentDetailUbo[2].xform = Device.mFullTransform;
88+
memcpy(&environmentDetailData[2].xform, &Device.mFullTransform, sizeof(glm::mat4));
89+
//environmentDetailUbo[2].xformView;
90+
environmentDetailData[2].consts = glm::vec4(scale, scale, scale, 1.f);
91+
environmentDetailData[2].scale = glm::vec4(scale, scale, ps_r__Detail_l_aniso, ps_r__Detail_l_ambient);
92+
//environmentDetailUbo[2].bias;
93+
environmentDetailData[2].wind = wind2;
94+
environmentDetailData[2].wave = glm::vec4(1.f / 3.f, 1.f / 7.f, 1.f / 5.f, m_time_pos) / PI_MUL_2;
95+
environmentDetailData[2].sun = glm::vec3(desc.sun_color.x, desc.sun_color.y, desc.sun_color.z) * 0.5f;
96+
97+
RCache.uniformBufferObjectPushToDevice(uniformBufferObject, uniformBufferObject.size, &environmentDetailData);
98+
99+
CHK_GL(glBindBufferRange(GL_UNIFORM_BUFFER, 3, uniformBufferObject.id, 0, sizeof(EnvironmentDetailData)));
100+
hw_Render_dump(cmd_list, 1, 0);
101+
102+
CHK_GL(glBindBufferRange(GL_UNIFORM_BUFFER, 3, uniformBufferObject.id, sizeof(EnvironmentDetailData), sizeof(EnvironmentDetailData)));
103+
hw_Render_dump(cmd_list, 2, 0);
104+
105+
CHK_GL(glBindBufferRange(GL_UNIFORM_BUFFER, 3, uniformBufferObject.id, sizeof(EnvironmentDetailData)*2, sizeof(EnvironmentDetailData)));
106+
hw_Render_dump(cmd_list, 0, 1);
86107
}
87108

88-
void CDetailManager::hw_Render_dump(CBackend& cmd_list, const Fvector4& consts, const Fvector4& wave, const Fvector4& wind, u32 var_id,
89-
u32 lod_id)
109+
void CDetailManager::hw_Render_dump(CBackend& cmd_list, u32 var_id, u32 lod_id)
90110
{
91-
static shared_str strConsts("consts");
92-
static shared_str strWave("wave");
93-
static shared_str strDir2D("dir2D");
94111
static shared_str strArray("array");
95-
static shared_str strXForm("xform");
96112

97113
RImplementation.BasicStats.DetailCount = 0;
98114

@@ -118,35 +134,15 @@ void CDetailManager::hw_Render_dump(CBackend& cmd_list, const Fvector4& consts,
118134
{
119135
for (u32 iPass = 0; iPass < Object.shader->E[lod_id]->passes.size(); ++iPass)
120136
{
121-
// Setup matrices + colors (and flush it as necessary)
122-
//RCache.set_Element (Object.shader->E[lod_id]);
137+
// TODO register only once
138+
RCache.uniformBufferObjectRegisterWithProgram(Object.shader->E[lod_id]->passes[iPass]->vs, "EnvironmentDetailUBO", 3, uniformBufferObject);
139+
123140
cmd_list.set_Element(Object.shader->E[lod_id], iPass);
124141
cmd_list.apply_lmaterial();
125142

126-
// This could be cached in the corresponding consatant buffer
127-
// as it is done for DX9
128-
cmd_list.set_c(strConsts, consts);
129-
cmd_list.set_c(strWave, wave);
130-
cmd_list.set_c(strDir2D, wind);
131-
cmd_list.set_c(strXForm, Device.mFullTransform);
132-
133143
ref_constant constArray = cmd_list.get_c(strArray);
134144
VERIFY(constArray);
135145

136-
//u32 c_base = x_array->vs.index;
137-
//Fvector4* c_storage = RCache.get_ConstantCache_Vertex().get_array_f().access(c_base);
138-
// TODO: OGL: Implement a constants buffer.
139-
/*Fvector4* c_storage=0;
140-
// Map constants to memory directly
141-
{
142-
void* pVData;
143-
RCache.get_ConstantDirect( strArray,
144-
hw_BatchSize*sizeof(Fvector4)*4,
145-
&pVData, 0, 0);
146-
c_storage = (Fvector4*) pVData;
147-
}
148-
VERIFY(c_storage);*/
149-
150146
u32 dwBatch = 0;
151147
xr_vector<glm::vec4> uniformBuffer;
152148
uniformBuffer.reserve(hw_BatchSize*4);
@@ -167,22 +163,19 @@ void CDetailManager::hw_Render_dump(CBackend& cmd_list, const Fvector4& consts,
167163

168164
// Build color
169165
// R2 only needs hemisphere
170-
float h = instance->c_hemi;
171-
float s = instance->c_sun;
166+
uniformBuffer.emplace_back(instance->c_sun, instance->c_sun, instance->c_sun, instance->c_hemi);
172167

173-
uniformBuffer.emplace_back(s, s, s, h);
174168
dwBatch ++;
175169
if (dwBatch == hw_BatchSize)
176170
{
177171
// flush
178172
RImplementation.BasicStats.DetailCount += dwBatch;
179173
u32 dwCNT_verts = dwBatch * Object.number_vertices;
180174
u32 dwCNT_prims = dwBatch * Object.number_indices / 3;
181-
//RCache.get_ConstantCache_Vertex().b_dirty = TRUE;
182-
//RCache.get_ConstantCache_Vertex().get_array_f().dirty (c_base,c_base+dwBatch*4);
183175

184176
cmd_list.set_uniforms(constArray->vs.program, constArray->vs.location, uniformBuffer);
185-
cmd_list.Render(D3DPT_TRIANGLELIST, vOffset, 0, dwCNT_verts, iOffset, dwCNT_prims);
177+
glDrawElementsInstancedBaseVertex(GL_TRIANGLES, Object.number_indices, GL_UNSIGNED_SHORT, (void*)(iOffset * sizeof(GLushort)), dwBatch, vOffset);
178+
186179
cmd_list.stat.r.s_details.add(dwCNT_verts);
187180
uniformBuffer.clear();
188181

@@ -197,16 +190,15 @@ void CDetailManager::hw_Render_dump(CBackend& cmd_list, const Fvector4& consts,
197190
RImplementation.BasicStats.DetailCount += dwBatch;
198191
u32 dwCNT_verts = dwBatch * Object.number_vertices;
199192
u32 dwCNT_prims = dwBatch * Object.number_indices / 3;
200-
//RCache.get_ConstantCache_Vertex().b_dirty = TRUE;
201-
//RCache.get_ConstantCache_Vertex().get_array_f().dirty (c_base,c_base+dwBatch*4);
193+
202194
cmd_list.set_uniforms(constArray->vs.program, constArray->vs.location, uniformBuffer);
203-
cmd_list.Render(D3DPT_TRIANGLELIST, vOffset, 0, dwCNT_verts, iOffset, dwCNT_prims);
195+
glDrawElementsInstancedBaseVertex(GL_TRIANGLES, Object.number_indices, GL_UNSIGNED_SHORT, (void*)(iOffset * sizeof(GLushort)), dwBatch, vOffset);
204196
cmd_list.stat.r.s_details.add(dwCNT_verts);
205197
}
206198
}
207199
}
208-
vOffset += hw_BatchSize * Object.number_vertices;
209-
iOffset += hw_BatchSize * Object.number_indices;
200+
vOffset += Object.number_vertices;
201+
iOffset += Object.number_indices;
210202
}
211203
}
212204
} // namespace xray::render::RENDER_NAMESPACE

0 commit comments

Comments
 (0)