@@ -8,17 +8,21 @@ use crate::SolverInterface;
88/// Implements a constant zero boundary condition.
99pub struct DirectSolver3Pt1DOpt < ' a , StencilType : TVStencil < 1 , 3 > > {
1010 stencil : & ' a StencilType ,
11+ chunk_size : usize ,
1112}
1213
1314impl < ' a , StencilType : TVStencil < 1 , 3 > > DirectSolver3Pt1DOpt < ' a , StencilType > {
14- pub fn new ( stencil : & ' a StencilType ) -> Self {
15+ pub fn new ( stencil : & ' a StencilType , chunk_size : usize ) -> Self {
1516 let expected_offsets = [
1617 vector ! [ 1 ] , // 0
1718 vector ! [ -1 ] , // 1
1819 vector ! [ 0 ] , // 4
1920 ] ;
2021 assert_eq ! ( & expected_offsets, stencil. offsets( ) ) ;
21- DirectSolver3Pt1DOpt { stencil }
22+ DirectSolver3Pt1DOpt {
23+ stencil,
24+ chunk_size,
25+ }
2226 }
2327
2428 fn apply_step < DomainType : DomainView < 1 > + Send > (
@@ -51,12 +55,12 @@ impl<'a, StencilType: TVStencil<1, 3>> DirectSolver3Pt1DOpt<'a, StencilType> {
5155
5256 let const_output: & DomainType = output;
5357 rayon:: scope ( |s| {
54- profiling:: scope!( "direct_solver: Thread Callback" ) ;
55- let chunk_size = ( n_r - 2 ) / ( threads * 2 ) ;
58+ let chunk_size = ( ( n_r - 2 ) / threads) . max ( self . chunk_size ) ;
5659 let mut start: usize = 1 ;
5760 while start < n_r - 1 {
5861 let end = ( start + chunk_size) . min ( n_r - 1 ) ;
5962 s. spawn ( move |_| {
63+ profiling:: scope!( "direct_solver: Thread Callback" ) ;
6064 let mut o = const_output. unsafe_mut_access ( ) ;
6165 for i in start..end {
6266 * o. buffer_mut ( ) . get_unchecked_mut ( i) = w
@@ -104,9 +108,17 @@ pub struct Direct3Pt1DSolver<'a, StencilType: TVStencil<1, 3>> {
104108}
105109
106110impl < ' a , StencilType : TVStencil < 1 , 3 > > Direct3Pt1DSolver < ' a , StencilType > {
107- pub fn new ( stencil : & ' a StencilType , steps : usize , threads : usize ) -> Self {
111+ pub fn new (
112+ stencil : & ' a StencilType ,
113+ steps : usize ,
114+ threads : usize ,
115+ chunk_size : usize ,
116+ ) -> Self {
108117 Direct3Pt1DSolver {
109- solver : DirectSolver3Pt1DOpt { stencil } ,
118+ solver : DirectSolver3Pt1DOpt {
119+ stencil,
120+ chunk_size,
121+ } ,
110122 steps,
111123 threads,
112124 }
0 commit comments