@@ -1183,99 +1183,140 @@ r_window_submit(OS_Handle window, R_Handle window_equip, R_PassList *passes)
1183
1183
case R_PassKind_Blur:
1184
1184
{
1185
1185
R_PassParams_Blur *params = pass->params_blur ;
1186
- ID3D11SamplerState *sampler = r_d3d11_state->samplers [R_Tex2DSampleKind_Nearest ];
1186
+ ID3D11SamplerState *sampler = r_d3d11_state->samplers [R_Tex2DSampleKind_Linear ];
1187
1187
ID3D11VertexShader *vshad = r_d3d11_state->vshads [R_D3D11_VShadKind_Blur];
1188
1188
ID3D11PixelShader *pshad = r_d3d11_state->pshads [R_D3D11_PShadKind_Blur];
1189
1189
ID3D11Buffer *uniforms_buffer = r_d3d11_state->uniform_type_kind_buffers [R_D3D11_VShadKind_Blur];
1190
-
1191
- // - rjf: perform blur on each axis
1192
- ID3D11RenderTargetView *rtvs[Axis2_COUNT] =
1193
- {
1194
- wnd->stage_scratch_color_rtv ,
1195
- wnd->stage_color_rtv ,
1196
- };
1197
- ID3D11ShaderResourceView *srvs[Axis2_COUNT] =
1190
+
1191
+ // rjf: setup output merger
1192
+ d_ctx->OMSetDepthStencilState (r_d3d11_state->noop_depth_stencil , 0 );
1193
+ d_ctx->OMSetBlendState (r_d3d11_state->main_blend_state , 0 , 0xffffffff );
1194
+
1195
+ // rjf: set up viewport
1196
+ Vec2S32 resolution = wnd->last_resolution ;
1197
+ D3D11_VIEWPORT viewport = { 0 .0f , 0 .0f , (F32)resolution.x , (F32)resolution.y , 0 .0f , 1 .0f };
1198
+ d_ctx->RSSetViewports (1 , &viewport);
1199
+ d_ctx->RSSetState (r_d3d11_state->main_rasterizer );
1200
+
1201
+ // rjf: setup input assembly
1202
+ d_ctx->IASetPrimitiveTopology (D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
1203
+ d_ctx->IASetInputLayout (0 );
1204
+
1205
+ // rjf: setup shaders
1206
+ d_ctx->VSSetShader (vshad, 0 , 0 );
1207
+ d_ctx->VSSetConstantBuffers (0 , 1 , &uniforms_buffer);
1208
+ d_ctx->PSSetShader (pshad, 0 , 0 );
1209
+ d_ctx->PSSetSamplers (0 , 1 , &sampler);
1210
+
1211
+ // rjf: setup scissor rect
1198
1212
{
1199
- wnd->stage_color_srv ,
1200
- wnd->stage_scratch_color_srv ,
1201
- };
1202
- for (Axis2 axis = (Axis2)0 ; axis < Axis2_COUNT; axis = (Axis2)(axis+1 ))
1213
+ D3D11_RECT rect = { 0 };
1214
+ rect.left = 0 ;
1215
+ rect.right = (LONG)wnd->last_resolution .x ;
1216
+ rect.top = 0 ;
1217
+ rect.bottom = (LONG)wnd->last_resolution .y ;
1218
+ d_ctx->RSSetScissorRects (1 , &rect);
1219
+ }
1220
+
1221
+ // rjf: set up uniforms
1222
+ R_D3D11_Uniforms_Blur uniforms = { 0 };
1203
1223
{
1204
- // rjf: setup output merger
1205
- d_ctx->OMSetRenderTargets (1 , &rtvs[axis], 0 );
1206
- d_ctx->OMSetDepthStencilState (r_d3d11_state->noop_depth_stencil , 0 );
1207
- d_ctx->OMSetBlendState (r_d3d11_state->main_blend_state , 0 , 0xffffffff );
1208
-
1209
- // rjf: set up viewport
1210
- Vec2S32 resolution = wnd->last_resolution ;
1211
- D3D11_VIEWPORT viewport = { 0 .0f , 0 .0f , (F32)resolution.x , (F32)resolution.y , 0 .0f , 1 .0f };
1212
- d_ctx->RSSetViewports (1 , &viewport);
1213
- d_ctx->RSSetState (r_d3d11_state->main_rasterizer );
1214
-
1215
- // rjf: setup input assembly
1216
- d_ctx->IASetPrimitiveTopology (D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
1217
- d_ctx->IASetInputLayout (0 );
1218
-
1219
- // rjf: set up uniforms
1224
+ F32 weights[ArrayCount (uniforms.kernel )*2 ] = {0 };
1225
+
1226
+ F32 blur_size = Min (params->blur_size , ArrayCount (weights));
1227
+ U64 blur_count = (U64)round_f32 (blur_size);
1228
+
1229
+ F32 stdev = (blur_size-1 .f )/2 .f ;
1230
+ F32 one_over_root_2pi_stdev2 = 1 /sqrt_f32 (2 *pi32*stdev*stdev);
1231
+ F32 euler32 = 2 .718281828459045f ;
1232
+
1233
+ weights[0 ] = 1 .f ;
1234
+ if (stdev > 0 .f )
1220
1235
{
1221
- F32 stdev = (params->blur_size -1 .f )/2 .f ;
1222
- F32 one_over_root_2pi_stdev2 = 1 /sqrt_f32 (2 *pi32*stdev*stdev);
1223
- F32 euler32 = 2 .718281828459045f ;
1224
- R_D3D11_Uniforms_Blur uniforms = {0 };
1225
- uniforms.viewport_size = v2f32 (resolution.x , resolution.y );
1226
- uniforms.rect = params->rect ;
1227
- uniforms.blur_size = params->blur_size ;
1228
- uniforms.is_vertical = (F32)!!axis;
1229
- MemoryCopyArray (uniforms.corner_radii .v , params->corner_radii );
1230
- F32 kernel_x = 0 ;
1231
- uniforms.kernel [0 ].v [0 ] = 1 .f ;
1232
- if (stdev > 0 .f )
1233
- {
1234
- for (U64 idx = 0 ; idx < ArrayCount (uniforms.kernel ); idx += 1 )
1235
- {
1236
- for (U64 v_idx = 0 ; v_idx < ArrayCount (uniforms.kernel [idx].v ); v_idx += 1 )
1237
- {
1238
- uniforms.kernel [idx].v [v_idx] = one_over_root_2pi_stdev2*pow_f32 (euler32, -kernel_x*kernel_x/(2 .f *stdev*stdev));
1239
- kernel_x += 1 ;
1240
- }
1241
- }
1242
- }
1243
- if (uniforms.kernel [0 ].v [0 ] > 1 .f )
1236
+ for (U64 idx = 0 ; idx < blur_count; idx += 1 )
1244
1237
{
1245
- MemoryZeroArray (uniforms. kernel ) ;
1246
- uniforms. kernel [ 0 ]. v [ 0 ] = 1 . f ;
1238
+ F32 kernel_x = (F32)idx ;
1239
+ weights[idx] = one_over_root_2pi_stdev2* pow_f32 (euler32, -kernel_x*kernel_x/( 2 . f *stdev*stdev));
1247
1240
}
1248
- D3D11_MAPPED_SUBRESOURCE sub_rsrc = {0 };
1249
- r_d3d11_state->device_ctx ->Map (uniforms_buffer, 0 , D3D11_MAP_WRITE_DISCARD, 0 , &sub_rsrc);
1250
- MemoryCopy ((U8 *)sub_rsrc.pData , &uniforms, sizeof (uniforms));
1251
- r_d3d11_state->device_ctx ->Unmap (uniforms_buffer, 0 );
1252
1241
}
1253
-
1254
- // rjf: setup shaders
1255
- d_ctx->VSSetShader (vshad, 0 , 0 );
1256
- d_ctx->VSSetConstantBuffers (0 , 1 , &uniforms_buffer);
1257
- d_ctx->PSSetShader (pshad, 0 , 0 );
1258
- d_ctx->PSSetConstantBuffers (0 , 1 , &uniforms_buffer);
1259
- d_ctx->PSSetShaderResources (0 , 1 , &srvs[axis]);
1260
- d_ctx->PSSetSamplers (0 , 1 , &sampler);
1261
-
1262
- // rjf: setup scissor rect
1242
+ if (weights[0 ] > 1 .f )
1263
1243
{
1264
- D3D11_RECT rect = {0 };
1265
- rect.left = 0 ;
1266
- rect.right = (LONG)wnd->last_resolution .x ;
1267
- rect.top = 0 ;
1268
- rect.bottom = (LONG)wnd->last_resolution .y ;
1269
- d_ctx->RSSetScissorRects (1 , &rect);
1244
+ MemoryZeroArray (weights);
1245
+ weights[0 ] = 1 .f ;
1270
1246
}
1271
-
1272
- // rjf: draw
1273
- d_ctx->Draw (4 , 0 );
1274
-
1275
- // rjf: unset srv
1276
- ID3D11ShaderResourceView *srv = 0 ;
1277
- d_ctx->PSSetShaderResources (0 , 1 , &srv);
1247
+ else
1248
+ {
1249
+ // prepare weights & offsets for bilinear lookup
1250
+ // blur filter wants to calculate w0*pixel[pos] + w1*pixel[pos+1] + ...
1251
+ // with bilinear filter we can do this calulation by doing only w*sample(pos+t) = w*((1-t)*pixel[pos] + t*pixel[pos+1])
1252
+ // we can see w0=w*(1-t) and w1=w*t
1253
+ // thus w=w0+w1 and t=w1/w
1254
+ for (U64 idx = 1 ; idx < blur_count; idx += 2 )
1255
+ {
1256
+ F32 w0 = weights[idx + 0 ];
1257
+ F32 w1 = weights[idx + 1 ];
1258
+ F32 w = w0 + w1;
1259
+ F32 t = w1 / w;
1260
+
1261
+ // each kernel element is float2(weight, offset)
1262
+ // weights & offsets are adjusted for bilinear sampling
1263
+ // zw elements are not used, a bit of waste but it allows for simpler shader code
1264
+ uniforms.kernel [(idx+1 )/2 ] = v4f32 (w, (F32)idx + t, 0 , 0 );
1265
+ }
1266
+ uniforms.kernel [0 ].x = weights[0 ];
1267
+ }
1268
+
1269
+ // technically we need just direction be different
1270
+ // but there are 256 bytes of usable space anyway for each constant buffer chunk
1271
+
1272
+ uniforms.passes [Axis2_X].viewport_size = v2f32 (resolution.x , resolution.y );
1273
+ uniforms.passes [Axis2_X].rect = params->rect ;
1274
+ uniforms.passes [Axis2_X].direction = v2f32 (1 .f / resolution.x , 0 );
1275
+ uniforms.passes [Axis2_X].blur_count = 1 + blur_count / 2 ; // 2x smaller because of bilinear sampling
1276
+ MemoryCopyArray (uniforms.passes [Axis2_X].corner_radii .v , params->corner_radii );
1277
+
1278
+ uniforms.passes [Axis2_Y].viewport_size = v2f32 (resolution.x , resolution.y );
1279
+ uniforms.passes [Axis2_Y].rect = params->rect ;
1280
+ uniforms.passes [Axis2_Y].direction = v2f32 (0 , 1 .f / resolution.y );
1281
+ uniforms.passes [Axis2_Y].blur_count = 1 + blur_count / 2 ; // 2x smaller because of bilinear sampling
1282
+ MemoryCopyArray (uniforms.passes [Axis2_Y].corner_radii .v , params->corner_radii );
1283
+
1284
+ D3D11_MAPPED_SUBRESOURCE sub_rsrc = {0 };
1285
+ r_d3d11_state->device_ctx ->Map (uniforms_buffer, 0 , D3D11_MAP_WRITE_DISCARD, 0 , &sub_rsrc);
1286
+ MemoryCopy ((U8 *)sub_rsrc.pData , &uniforms, sizeof (uniforms));
1287
+ r_d3d11_state->device_ctx ->Unmap (uniforms_buffer, 0 );
1278
1288
}
1289
+
1290
+ ID3D11Buffer* uniforms_buffers[] = { uniforms_buffer, uniforms_buffer };
1291
+
1292
+ U32 uniform_offset[Axis2_COUNT][2 ] =
1293
+ {
1294
+ { 0 * sizeof (R_D3D11_Uniforms_BlurPass) / 16 , OffsetOf (R_D3D11_Uniforms_Blur, kernel) / 16 },
1295
+ { 1 * sizeof (R_D3D11_Uniforms_BlurPass) / 16 , OffsetOf (R_D3D11_Uniforms_Blur, kernel) / 16 },
1296
+ };
1297
+
1298
+ U32 uniform_count[Axis2_COUNT][2 ] =
1299
+ {
1300
+ { sizeof (R_D3D11_Uniforms_BlurPass) / 16 , sizeof (uniforms.kernel ) / 16 },
1301
+ { sizeof (R_D3D11_Uniforms_BlurPass) / 16 , sizeof (uniforms.kernel ) / 16 },
1302
+ };
1303
+
1304
+ // rjf: for unsetting srv
1305
+ ID3D11ShaderResourceView* srv = 0 ;
1306
+
1307
+ // horizontal pass
1308
+ d_ctx->OMSetRenderTargets (1 , &wnd->stage_scratch_color_rtv , 0 );
1309
+ d_ctx->PSSetConstantBuffers1 (0 , ArrayCount (uniforms_buffers), uniforms_buffers, uniform_offset[Axis2_X], uniform_count[Axis2_X]);
1310
+ d_ctx->PSSetShaderResources (0 , 1 , &wnd->stage_color_srv );
1311
+ d_ctx->Draw (4 , 0 );
1312
+ d_ctx->PSSetShaderResources (0 , 1 , &srv);
1313
+
1314
+ // vertical pass
1315
+ d_ctx->OMSetRenderTargets (1 , &wnd->stage_color_rtv , 0 );
1316
+ d_ctx->PSSetConstantBuffers1 (0 , ArrayCount (uniforms_buffers), uniforms_buffers, uniform_offset[Axis2_Y], uniform_count[Axis2_Y]);
1317
+ d_ctx->PSSetShaderResources (0 , 1 , &wnd->stage_scratch_color_srv );
1318
+ d_ctx->Draw (4 , 0 );
1319
+ d_ctx->PSSetShaderResources (0 , 1 , &srv);
1279
1320
}break ;
1280
1321
1281
1322
0 commit comments