|
1 | | -import { Fn, If, instancedArray, invocationLocalIndex, countTrailingZeros, Loop, workgroupArray, subgroupSize, workgroupBarrier, workgroupId, uint, select, invocationSubgroupIndex, dot, uvec4, vec4, float, subgroupAdd, array, subgroupShuffle, subgroupInclusiveAdd, subgroupBroadcast, invocationSubgroupMetaIndex, arrayBuffer } from 'three/tsl'; |
| 1 | +import { |
| 2 | + StorageInstancedBufferAttribute |
| 3 | +} from 'three'; |
| 4 | +import { Fn, If, instancedArray, invocationLocalIndex, countTrailingZeros, Loop, workgroupArray, subgroupSize, workgroupBarrier, workgroupId, uint, select, invocationSubgroupIndex, dot, uvec4, vec4, float, subgroupAdd, array, subgroupShuffle, subgroupInclusiveAdd, subgroupBroadcast, invocationSubgroupMetaIndex, arrayBuffer, storage } from 'three/tsl'; |
2 | 5 |
|
3 | 6 | const divRoundUp = ( size, part_size ) => { |
4 | 7 |
|
@@ -68,6 +71,12 @@ export class PrefixSum { |
68 | 71 | */ |
69 | 72 | this.renderer = renderer; |
70 | 73 |
|
| 74 | + if ( this.renderer.backend.device === null ) { |
| 75 | + |
| 76 | + renderer.backend.init(); |
| 77 | + |
| 78 | + } |
| 79 | + |
71 | 80 | /** |
72 | 81 | * @type {PrefixSumStorageObjects} |
73 | 82 | */ |
@@ -132,7 +141,14 @@ export class PrefixSum { |
132 | 141 | * |
133 | 142 | * @type {number} |
134 | 143 | */ |
135 | | - this.workgroupSize = options.workgroupSize ? options.workgroupSize : Math.min( this.vecCount, 64 ); |
| 144 | + this.workgroupSize = options.workgroupSize ? options.workgroupSize : Math.min( this.vecCount, this.renderer.backend.device.limits.maxComputeWorkgroupSizeX ); |
| 145 | + |
| 146 | + /** |
| 147 | + * The minimumn subgroup size specified by the renderer's graphics device. |
| 148 | + * |
| 149 | + * @type {number} |
| 150 | + */ |
| 151 | + this.minSubgroupSize = ( this.renderer.backend.device.adapterInfo && this.renderer.backend.device.adapterInfo.subgroupMinSize ) ? this.renderer.backend.device.adapterInfo.subgroupMinSize : 4; |
136 | 152 |
|
137 | 153 | /** |
138 | 154 | * The maximum number of elements that will be read by an individual workgroup in the reduction step. |
@@ -179,10 +195,17 @@ export class PrefixSum { |
179 | 195 | _createStorageBuffers( inputArray ) { |
180 | 196 |
|
181 | 197 | this.arrayBuffer = this.type === 'uint' ? Uint32Array.from( inputArray ) : Float32Array.from( inputArray ); |
| 198 | + this.outputArrayBuffer = this.type === 'uint' ? Uint32Array.from( inputArray ) : Float32Array.from( inputArray ); |
| 199 | + |
| 200 | + const inputAttribute = new StorageInstancedBufferAttribute( this.arrayBuffer, 1 ); |
| 201 | + const outputAttribute = new StorageInstancedBufferAttribute( this.outputArrayBuffer, 1 ); |
| 202 | + |
| 203 | + this.storageBuffers.dataBuffer = storage( inputAttribute, this.vecType, inputAttribute.count / 4 ).setName( `Prefix_Sum_Input_Vec_${id}` ); |
| 204 | + this.storageBuffers.unvectorizedDataBuffer = storage( inputAttribute, this.type, inputAttribute.count ).setName( `Prefix_Sum_Input_Unvec_${id}` ); |
| 205 | + |
| 206 | + this.storageBuffers.outputBuffer = storage( outputAttribute, this.vecType, outputAttribute.count / 4 ).setName( `Prefix_Sum_Output_Vec_${id}` ); |
| 207 | + this.storageBuffers.unvectorizedOutputBuffer = storage( outputAttribute, this.type, outputAttribute.count ).setName( `Prefix_Sum_Output_Unvec_${id}` ); |
182 | 208 |
|
183 | | - this.storageBuffers.unvectorizedDataBuffer = instancedArray( this.arrayBuffer, this.type ).setPBO( true ).setName( `Prefix_Sum_Input_Unvec_${id}` ); |
184 | | - this.storageBuffers.dataBuffer = instancedArray( this.arrayBuffer, this.vecType ).setPBO( true ).setName( `Prefix_Sum_Input_Vec_${id}` ); |
185 | | - this.storageBuffers.outputBuffer = instancedArray( this.arrayBuffer, this.vecType ).setName( `Prefix_Sum_Output_${id}` ); |
186 | 209 | this.storageBuffers.reductionBuffer = instancedArray( this.numWorkgroups, this.type ).setPBO( true ).setName( `Prefix_Sum_Reduction_${id}` ); |
187 | 210 |
|
188 | 211 | } |
@@ -472,6 +495,19 @@ export class PrefixSum { |
472 | 495 | _getSpineScanFn() { |
473 | 496 |
|
474 | 497 | const { reductionBuffer } = this.storageBuffers; |
| 498 | + |
| 499 | + if ( this.numWorkgroups <= this.minSubgroupSize ) { |
| 500 | + |
| 501 | + const fnDef = Fn( () => { |
| 502 | + |
| 503 | + reductionBuffer.element( invocationSubgroupIndex ).assign( subgroupInclusiveAdd( reductionBuffer.element( invocationSubgroupIndex ) ) ); |
| 504 | + |
| 505 | + } )().compute( this.numWorkgroups, [ this.workgroupSize ] ); |
| 506 | + |
| 507 | + return fnDef; |
| 508 | + |
| 509 | + } |
| 510 | + |
475 | 511 | const { subgroupReductionArray, unvectorizedSubgroupOffset, spineSize, subgroupSizeLog } = this.utilityNodes; |
476 | 512 | const { unvectorizedWorkPerInvocation } = this; |
477 | 513 |
|
@@ -630,16 +666,13 @@ export class PrefixSum { |
630 | 666 |
|
631 | 667 | } )().compute( this.numWorkgroups, [ this.workgroupSize ] ); |
632 | 668 |
|
633 | | - console.log( fnDef ); |
634 | | - |
635 | 669 | return fnDef; |
636 | 670 |
|
637 | 671 | } |
638 | 672 |
|
639 | 673 | _getDownsweepFn() { |
640 | 674 |
|
641 | 675 | const { dataBuffer, reductionBuffer, outputBuffer } = this.storageBuffers; |
642 | | - const { vecType } = this; |
643 | 676 | const { subgroupOffset, workgroupOffset, subgroupReductionArray, subgroupSizeLog, spineSize } = this.utilityNodes; |
644 | 677 |
|
645 | 678 | const { workPerInvocation, vecCount } = this; |
@@ -958,9 +991,9 @@ export class PrefixSum { |
958 | 991 | */ |
959 | 992 | async compute() { |
960 | 993 |
|
961 | | - await this.computeStep( this.currentStep ); |
962 | | - await this.computeStep( this.currentStep ); |
963 | | - await this.computeStep( this.currentStep ); |
| 994 | + await this.computeReduce(); |
| 995 | + await this.computeSpineScan(); |
| 996 | + await this.computeDownsweep(); |
964 | 997 |
|
965 | 998 | } |
966 | 999 |
|
|
0 commit comments