5353
5454
5555/**
56- * This basic {@code BroadcastJoinOperator} implement a basic broadcast join algorithm.
57- * This algorithm assumes that the broadcast table has to fit in memory since we are not supporting any spilling.
58- *
59- * For left join, inner join, right join and full join,
60- * <p>It takes the right table as the broadcast side and materialize a hash table. Then for each of the left table row,
61- * it looks up for the corresponding row(s) from the hash table and create a joint row.
62- *
63- * <p>For each of the data block received from the left table, it will generate a joint data block.
64- * We currently support left join, inner join, right join and full join.
65- * The output is in the format of [left_row, right_row]
56+ * This {@code HashJoinOperator} implements the hash join algorithm.
57+ * <p>This algorithm assumes that the right table has to fit in memory since we are not supporting any spilling. It
58+ * reads the complete hash partitioned right table and materialize the data into a hash table. Then for each of the left
59+ * table row, it looks up for the corresponding row(s) from the hash table and create a joint row.
60+ * <p>For each of the data block received from the left table, it generates a joint data block. The output is in the
61+ * format of [left_row, right_row].
6662 */
6763// TODO: Move inequi out of hashjoin. (https://github.com/apache/pinot/issues/9728)
6864// TODO: Support memory size based resource limit.
@@ -122,9 +118,9 @@ public class HashJoinOperator extends MultiStageOperator {
122118 public HashJoinOperator (OpChainExecutionContext context , MultiStageOperator leftInput , DataSchema leftSchema ,
123119 MultiStageOperator rightInput , JoinNode node ) {
124120 super (context );
125- Preconditions .checkState (SUPPORTED_JOIN_TYPES .contains (node .getJoinType ()),
126- "Join type: " + node .getJoinType () + " is not supported!" );
127121 _joinType = node .getJoinType ();
122+ Preconditions .checkState (SUPPORTED_JOIN_TYPES .contains (_joinType ), "Join type: % is not supported for hash join" ,
123+ _joinType );
128124 _leftKeySelector = KeySelectorFactory .getKeySelector (node .getLeftKeys ());
129125 _rightKeySelector = KeySelectorFactory .getKeySelector (node .getRightKeys ());
130126 _leftColumnSize = leftSchema .size ();
@@ -231,8 +227,8 @@ private void buildBroadcastHashTable()
231227 // Row based overflow check.
232228 if (container .size () + _currentRowsInHashTable > _maxRowsInJoin ) {
233229 if (_joinOverflowMode == JoinOverFlowMode .THROW ) {
234- throwProcessingExceptionForJoinRowLimitExceeded ("Cannot build in memory hash table for join operator, "
235- + " reached number of rows limit: " + _maxRowsInJoin );
230+ throwProcessingExceptionForJoinRowLimitExceeded (
231+ "Cannot build in memory hash table for join operator, reached number of rows limit: " + _maxRowsInJoin );
236232 } else {
237233 // Just fill up the buffer.
238234 int remainingRows = _maxRowsInJoin - _currentRowsInHashTable ;
@@ -319,25 +315,6 @@ private List<Object[]> buildJoinedRows(TransferableBlock leftBlock)
319315 }
320316 }
321317
322- private List <Object []> buildJoinedDataBlockSemi (TransferableBlock leftBlock )
323- throws ProcessingException {
324- List <Object []> container = leftBlock .getContainer ();
325- List <Object []> rows = new ArrayList <>(container .size ());
326-
327- for (Object [] leftRow : container ) {
328- Object key = _leftKeySelector .getKey (leftRow );
329- // SEMI-JOIN only checks existence of the key
330- if (_broadcastRightTable .containsKey (key )) {
331- if (incrementJoinedRowsAndCheckLimit ()) {
332- break ;
333- }
334- rows .add (joinRow (leftRow , null ));
335- }
336- }
337-
338- return rows ;
339- }
340-
341318 private List <Object []> buildJoinedDataBlockDefault (TransferableBlock leftBlock )
342319 throws ProcessingException {
343320 List <Object []> container = leftBlock .getContainer ();
@@ -389,6 +366,25 @@ private List<Object[]> buildJoinedDataBlockDefault(TransferableBlock leftBlock)
389366 return rows ;
390367 }
391368
369+ private List <Object []> buildJoinedDataBlockSemi (TransferableBlock leftBlock )
370+ throws ProcessingException {
371+ List <Object []> container = leftBlock .getContainer ();
372+ List <Object []> rows = new ArrayList <>(container .size ());
373+
374+ for (Object [] leftRow : container ) {
375+ Object key = _leftKeySelector .getKey (leftRow );
376+ // SEMI-JOIN only checks existence of the key
377+ if (_broadcastRightTable .containsKey (key )) {
378+ if (incrementJoinedRowsAndCheckLimit ()) {
379+ break ;
380+ }
381+ rows .add (leftRow );
382+ }
383+ }
384+
385+ return rows ;
386+ }
387+
392388 private List <Object []> buildJoinedDataBlockAnti (TransferableBlock leftBlock )
393389 throws ProcessingException {
394390 List <Object []> container = leftBlock .getContainer ();
@@ -401,7 +397,7 @@ private List<Object[]> buildJoinedDataBlockAnti(TransferableBlock leftBlock)
401397 if (incrementJoinedRowsAndCheckLimit ()) {
402398 break ;
403399 }
404- rows .add (joinRow ( leftRow , null ) );
400+ rows .add (leftRow );
405401 }
406402 }
407403
@@ -430,18 +426,11 @@ private List<Object[]> buildNonMatchRightRows() {
430426
431427 private Object [] joinRow (@ Nullable Object [] leftRow , @ Nullable Object [] rightRow ) {
432428 Object [] resultRow = new Object [_resultColumnSize ];
433- int idx = 0 ;
434429 if (leftRow != null ) {
435- for (Object obj : leftRow ) {
436- resultRow [idx ++] = obj ;
437- }
430+ System .arraycopy (leftRow , 0 , resultRow , 0 , leftRow .length );
438431 }
439- // This is needed since left row can be null and we need to advance the idx to the beginning of right row.
440- idx = _leftColumnSize ;
441432 if (rightRow != null ) {
442- for (Object obj : rightRow ) {
443- resultRow [idx ++] = obj ;
444- }
433+ System .arraycopy (rightRow , 0 , resultRow , _leftColumnSize , rightRow .length );
445434 }
446435 return resultRow ;
447436 }
@@ -485,8 +474,8 @@ private boolean incrementJoinedRowsAndCheckLimit()
485474 _currentJoinedRows ++;
486475 if (_currentJoinedRows > _maxRowsInJoin ) {
487476 if (_joinOverflowMode == JoinOverFlowMode .THROW ) {
488- throwProcessingExceptionForJoinRowLimitExceeded ("Cannot process join, reached number of rows limit: "
489- + _maxRowsInJoin );
477+ throwProcessingExceptionForJoinRowLimitExceeded (
478+ "Cannot process join, reached number of rows limit: " + _maxRowsInJoin );
490479 } else {
491480 // Skip over remaining blocks until we reach the end of stream since we already breached the rows limit.
492481 logger ().info ("Terminating join operator early as the maximum number of rows limit was reached: {}" ,
@@ -504,15 +493,15 @@ private void throwProcessingExceptionForJoinRowLimitExceeded(String reason)
504493 throws ProcessingException {
505494 ProcessingException resourceLimitExceededException =
506495 new ProcessingException (QueryException .SERVER_RESOURCE_LIMIT_EXCEEDED_ERROR_CODE );
507- resourceLimitExceededException .setMessage (
508- reason + ". Consider increasing the limit for the maximum number of rows in a join either via the query "
509- + "option '" + CommonConstants .Broker .Request .QueryOptionKey .MAX_ROWS_IN_JOIN + "' or the '"
510- + PinotHintOptions .JoinHintOptions .MAX_ROWS_IN_JOIN + "' hint in the '"
511- + PinotHintOptions . JOIN_HINT_OPTIONS + "'. Alternatively, if partial results are acceptable, the join"
512- + " overflow mode can be set to '" + JoinOverFlowMode .BREAK .name () + "' either via the query option '"
513- + CommonConstants .Broker .Request .QueryOptionKey .JOIN_OVERFLOW_MODE + "' or the '"
514- + PinotHintOptions .JoinHintOptions .JOIN_OVERFLOW_MODE + "' hint in the '"
515- + PinotHintOptions . JOIN_HINT_OPTIONS + "'." );
496+ resourceLimitExceededException .setMessage (reason
497+ + ". Consider increasing the limit for the maximum number of rows in a join either via the query option ' "
498+ + CommonConstants .Broker .Request .QueryOptionKey .MAX_ROWS_IN_JOIN + "' or the '"
499+ + PinotHintOptions .JoinHintOptions .MAX_ROWS_IN_JOIN + "' hint in the '" + PinotHintOptions . JOIN_HINT_OPTIONS
500+ + "'. Alternatively, if partial results are acceptable, the join overflow mode can be set to ' "
501+ + JoinOverFlowMode .BREAK .name () + "' either via the query option '"
502+ + CommonConstants .Broker .Request .QueryOptionKey .JOIN_OVERFLOW_MODE + "' or the '"
503+ + PinotHintOptions .JoinHintOptions .JOIN_OVERFLOW_MODE + "' hint in the '" + PinotHintOptions . JOIN_HINT_OPTIONS
504+ + "'." );
516505 throw resourceLimitExceededException ;
517506 }
518507
0 commit comments