11
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
// See the License for the specific language governing permissions and
13
13
// limitations under the License.
14
-
15
14
use std:: collections:: { BTreeMap , HashSet } ;
15
+ use std:: num:: NonZeroU32 ;
16
16
use std:: sync:: LazyLock ;
17
17
use std:: time:: Duration ;
18
18
19
+ use governor:: { Quota , RateLimiter } ;
19
20
use itertools:: Itertools ;
20
21
use multimap:: MultiMap ;
21
22
use risingwave_common:: array:: { Op , RowRef } ;
@@ -159,6 +160,8 @@ pub struct HashJoinExecutor<K: HashKey, S: StateStore, const T: JoinTypePrimitiv
159
160
160
161
/// watermark column index -> `BufferedWatermarks`
161
162
watermark_buffers : BTreeMap < usize , BufferedWatermarks < SideTypePrimitive > > ,
163
+
164
+ high_join_amplification_threshold : usize ,
162
165
}
163
166
164
167
impl < K : HashKey , S : StateStore , const T : JoinTypePrimitive > std:: fmt:: Debug
@@ -195,6 +198,7 @@ struct EqJoinArgs<'a, K: HashKey, S: StateStore> {
195
198
append_only_optimize : bool ,
196
199
chunk_size : usize ,
197
200
cnt_rows_received : & ' a mut u32 ,
201
+ high_join_amplification_threshold : usize ,
198
202
}
199
203
200
204
impl < K : HashKey , S : StateStore , const T : JoinTypePrimitive > HashJoinExecutor < K , S , T > {
@@ -218,6 +222,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
218
222
is_append_only : bool ,
219
223
metrics : Arc < StreamingMetrics > ,
220
224
chunk_size : usize ,
225
+ high_join_amplification_threshold : usize ,
221
226
) -> Self {
222
227
let side_l_column_n = input_l. schema ( ) . len ( ) ;
223
228
@@ -446,6 +451,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
446
451
chunk_size,
447
452
cnt_rows_received : 0 ,
448
453
watermark_buffers,
454
+ high_join_amplification_threshold,
449
455
}
450
456
}
451
457
@@ -539,6 +545,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
539
545
append_only_optimize : self . append_only_optimize ,
540
546
chunk_size : self . chunk_size ,
541
547
cnt_rows_received : & mut self . cnt_rows_received ,
548
+ high_join_amplification_threshold : self . high_join_amplification_threshold ,
542
549
} ) {
543
550
left_time += left_start_time. elapsed ( ) ;
544
551
yield Message :: Chunk ( chunk?) ;
@@ -563,6 +570,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
563
570
append_only_optimize : self . append_only_optimize ,
564
571
chunk_size : self . chunk_size ,
565
572
cnt_rows_received : & mut self . cnt_rows_received ,
573
+ high_join_amplification_threshold : self . high_join_amplification_threshold ,
566
574
} ) {
567
575
right_time += right_start_time. elapsed ( ) ;
568
576
yield Message :: Chunk ( chunk?) ;
@@ -777,6 +785,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
777
785
append_only_optimize,
778
786
chunk_size,
779
787
cnt_rows_received,
788
+ high_join_amplification_threshold,
780
789
..
781
790
} = args;
782
791
@@ -832,13 +841,16 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
832
841
833
842
if let Some ( rows) = & matched_rows {
834
843
join_matched_join_keys. observe ( rows. len ( ) as _ ) ;
835
- if rows. len ( ) >= 10000 {
836
- static LOG_SUPPERSSER : LazyLock < LogSuppresser > =
837
- LazyLock :: new ( LogSuppresser :: default) ;
844
+ if rows. len ( ) > high_join_amplification_threshold {
845
+ static LOG_SUPPERSSER : LazyLock < LogSuppresser > = LazyLock :: new ( || {
846
+ LogSuppresser :: new ( RateLimiter :: direct ( Quota :: per_minute (
847
+ NonZeroU32 :: new ( 1 ) . unwrap ( ) ,
848
+ ) ) )
849
+ } ) ;
838
850
if let Ok ( suppressed_count) = LOG_SUPPERSSER . check ( ) {
839
851
let join_key_data_types = side_update. ht . join_key_data_types ( ) ;
840
852
let key = key. deserialize ( join_key_data_types) ?;
841
- tracing:: warn!( target: "hash_join_amplification " ,
853
+ tracing:: warn!( target: "high_join_amplification " ,
842
854
suppressed_count,
843
855
matched_rows_len = rows. len( ) ,
844
856
update_table_id = side_update. ht. table_id( ) ,
@@ -1213,6 +1225,7 @@ mod tests {
1213
1225
false ,
1214
1226
Arc :: new ( StreamingMetrics :: unused ( ) ) ,
1215
1227
1024 ,
1228
+ 2048 ,
1216
1229
) ;
1217
1230
( tx_l, tx_r, executor. boxed ( ) . execute ( ) )
1218
1231
}
@@ -1305,6 +1318,7 @@ mod tests {
1305
1318
true ,
1306
1319
Arc :: new ( StreamingMetrics :: unused ( ) ) ,
1307
1320
1024 ,
1321
+ 2048 ,
1308
1322
) ;
1309
1323
( tx_l, tx_r, executor. boxed ( ) . execute ( ) )
1310
1324
}
0 commit comments