diff --git a/src/engine/AddCombinedRowToTable.h b/src/engine/AddCombinedRowToTable.h
index e7a4fc2e4f..2397b575cd 100644
--- a/src/engine/AddCombinedRowToTable.h
+++ b/src/engine/AddCombinedRowToTable.h
@@ -16,6 +16,7 @@
 
 #include "backports/concepts.h"
 #include "engine/LocalVocab.h"
+#include "engine/Result.h"
 #include "engine/idTable/IdTable.h"
 #include "engine/idTable/IdTableConcepts.h"
 #include "global/Id.h"
@@ -259,6 +260,14 @@ class AddCombinedRowToIdTable {
 
   LocalVocab& localVocab() { return mergedVocab_; }
 
+  // Move both the result table and local vocab out as an IdTableVocabPair.
+  // This is a convenience method for the common pattern of moving both out.
+  auto toIdTableVocabPair() && {
+    flush();
+    return Result::IdTableVocabPair{std::move(resultTable_),
+                                    std::move(mergedVocab_)};
+  }
+
   // Disable copying and moving, it is currently not needed and makes it harder
   // to reason about
   AddCombinedRowToIdTable(const AddCombinedRowToIdTable&) = delete;
diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp
index bd381824af..198075f0f9 100644
--- a/src/engine/IndexScan.cpp
+++ b/src/engine/IndexScan.cpp
@@ -586,6 +586,9 @@ struct IndexScan::SharedGeneratorState {
   bool hasUndef_ = false;
   // Indicates if the generator has been fully consumed.
   bool doneFetching_ = false;
+  // If true, filter the left side (skip non-matching inputs). If false, pass
+  // through all inputs even if they don't match any blocks.
+  bool filterLeftSide_ = true;
 
   // Advance the `iterator` to the next non-empty table. Set `hasUndef_` to true
   // if the first table is undefined. Also set `doneFetching_` if the generator
@@ -643,13 +646,59 @@ struct IndexScan::SharedGeneratorState {
           // We have seen entries in the join column that are larger than the
           // largest block in the index scan, which means that there will be no
           // more matches.
+          if (!filterLeftSide_) {
+            // Case B: Push current table before marking as done.
+            prefetchedValues_.push_back(std::move(*iterator_.value()));
+          }
           doneFetching_ = true;
           return;
         }
-        // The current `joinColumn` has no matching block in the index, we can
-        // safely skip appending it to `prefetchedValues_`, but future values
-        // might require later blocks from the index.
-        continue;
+        // Case A: The current `joinColumn` has no matching block in the index.
+        if (filterLeftSide_) {
+          // We can safely skip appending it to `prefetchedValues_`, but future
+          // values might require later blocks from the index.
+          continue;
+        } else {
+          // When not filtering, push the table to prefetchedValues.
+          prefetchedValues_.push_back(std::move(*iterator_.value()));
+          // If buffer grows too large, find a dummy block to add.
+          if (prefetchedValues_.size() > 5) {
+            // Find the last value in the join column of the last prefetched
+            // table.
+            const auto& lastPrefetched = prefetchedValues_.back();
+            auto lastJoinColumn =
+                lastPrefetched.idTable_.getColumn(joinColumn_);
+            AD_CORRECTNESS_CHECK(!lastJoinColumn.empty());
+            Id lastValue = lastJoinColumn.back();
+            // Find the smallest block whose first entry is larger than
+            // lastValue.
+            // TODO<joka921> This should always be the first block that is still
+            // available. also remove code duplication with the above code.
+            bool foundBlock = false;
+            size_t numBlocksHandled = 0;
+            for (const auto& block : metaBlocks_.getBlockMetadataView()) {
+              ++numBlocksHandled;
+              if (CompressedRelationReader::getRelevantIdFromTriple(
+                      block.firstTriple_, metaBlocks_) > lastValue) {
+                // Found a suitable block, add it to pendingBlocks.
+                pendingBlocks_.push_back(block);
+                lastEntryInBlocks_ =
+                    CompressedRelationReader::getRelevantIdFromTriple(
+                        block.lastTriple_, metaBlocks_);
+                AD_CORRECTNESS_CHECK(numBlocksHandled == 1);
+                metaBlocks_.removePrefix(numBlocksHandled);
+                foundBlock = true;
+                break;
+              }
+            }
+            if (!foundBlock) {
+              // No more blocks available, mark as done.
+              doneFetching_ = true;
+              return;
+            }
+          }
+          continue;
+        }
       }
       prefetchedValues_.push_back(std::move(*iterator_.value()));
       ql::ranges::move(newBlocks, std::back_inserter(pendingBlocks_));
@@ -690,7 +739,19 @@ Result::LazyResult IndexScan::createPrefilteredJoinSide(
 
         if (prefetched.empty()) {
           AD_CORRECTNESS_CHECK(state->doneFetching_);
-          return LoopControl::makeBreak();
+          // If not filtering left side, yield all remaining elements.
+          AD_CORRECTNESS_CHECK(state->iterator_.has_value());
+          auto it = state->iterator_.value();
+          if (!state->filterLeftSide_ && it != state->generator_.end()) {
+            // Advance the iterator past the last value we already yielded.
+            ++it;
+            return LoopControl::breakWithYieldAll(
+                ql::ranges::subrange(it, state->generator_.end()) |
+                ql::views::filter(
+                    [](const auto& block) { return !block.idTable_.empty(); }));
+          } else {
+            return LoopControl::makeBreak();
+          }
         }
 
         // Make a defensive copy of the values to avoid modification during
@@ -769,17 +830,27 @@ Result::LazyResult IndexScan::createPrefilteredIndexScanSide(
 
 // _____________________________________________________________________________
 std::pair<Result::LazyResult, Result::LazyResult> IndexScan::prefilterTables(
-    Result::LazyResult input, ColumnIndex joinColumn) {
+    Result::LazyResult input, ColumnIndex joinColumn, bool filterLeftSide) {
   AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0);
   auto metaBlocks = getMetadataForScan();
 
   if (!metaBlocks.has_value()) {
     // Return empty results
-    return {Result::LazyResult{}, Result::LazyResult{}};
+    return {filterLeftSide ? Result::LazyResult{} : std::move(input),
+            Result::LazyResult{}};
   }
 
-  auto state = std::make_shared<SharedGeneratorState>(SharedGeneratorState{
-      std::move(input), joinColumn, std::move(metaBlocks.value())});
+  auto state = std::make_shared<SharedGeneratorState>(
+      SharedGeneratorState{std::move(input),
+                           joinColumn,
+                           std::move(metaBlocks.value()),
+                           std::nullopt,
+                           {},
+                           {},
+                           std::nullopt,
+                           false,
+                           false,
+                           filterLeftSide});
   return {createPrefilteredJoinSide(state),
           createPrefilteredIndexScanSide(state)};
 }
diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h
index 92035f478d..2fbdc86e2e 100644
--- a/src/engine/IndexScan.h
+++ b/src/engine/IndexScan.h
@@ -132,7 +132,8 @@ class IndexScan final : public Operation {
   // there are undef values, the second generator represents the full index
   // scan.
   std::pair<Result::LazyResult, Result::LazyResult> prefilterTables(
-      Result::LazyResult input, ColumnIndex joinColumn);
+      Result::LazyResult input, ColumnIndex joinColumn,
+      bool filterLeftSide = true);
 
  private:
   // Implementation detail that allows to consume a lazy range from two other
diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp
index 65d2e54145..d09239134f 100644
--- a/src/engine/Join.cpp
+++ b/src/engine/Join.cpp
@@ -11,6 +11,7 @@
 #include <sstream>
 #include <vector>
 
+#include "JoinWithIndexScanHelpers.h"
 #include "backports/functional.h"
 #include "backports/type_traits.h"
 #include "engine/AddCombinedRowToTable.h"
@@ -29,7 +30,7 @@
 #include "util/JoinAlgorithms/JoinAlgorithms.h"
 
 using namespace qlever::joinHelpers;
-
+using namespace qlever::joinWithIndexScanHelpers;
 using std::endl;
 using std::string;
 
@@ -566,40 +567,6 @@ void Join::addCombinedRowToIdTable(const ROW_A& rowA, const ROW_B& rowB,
   }
 }
 
-// _____________________________________________________________________________
-namespace {
-// Type alias for the general InputRangeTypeErased with specific types.
-using IteratorWithSingleCol = InputRangeTypeErased<IdTableAndFirstCol<IdTable>>;
-
-// Convert a `CompressedRelationReader::IdTableGeneratorInputRange` to a
-// `InputRangeTypeErased<IdTableAndFirstCol<IdTable>>` for more efficient access
-// in the join columns below. This also makes sure the runtime information of
-// the passed `IndexScan` is updated properly as the range is consumed.
-IteratorWithSingleCol convertGenerator(
-    CompressedRelationReader::IdTableGeneratorInputRange gen, IndexScan& scan) {
-  // Store the generator in a wrapper so we can access its details after moving
-  auto generatorStorage =
-      std::make_shared<CompressedRelationReader::IdTableGeneratorInputRange>(
-          std::move(gen));
-
-  using SendPriority = RuntimeInformation::SendPriority;
-
-  auto range = CachingTransformInputRange(
-      *generatorStorage,
-      [generatorStorage, &scan,
-       sendPriority = SendPriority::Always](auto& table) mutable {
-        scan.updateRuntimeInfoForLazyScan(generatorStorage->details(),
-                                          sendPriority);
-        sendPriority = SendPriority::IfDue;
-        // IndexScans don't have a local vocabulary, so we can just use an empty
-        // one.
-        return IdTableAndFirstCol{std::move(table), LocalVocab{}};
-      });
-
-  return IteratorWithSingleCol{std::move(range)};
-}
-}  // namespace
-
 // ______________________________________________________________________________________________________
 Result Join::computeResultForTwoIndexScans(bool requestLaziness) const {
   return createResult(
@@ -626,17 +593,13 @@ Result Join::computeResultForTwoIndexScans(bool requestLaziness) const {
         // of the child. If we serialize it whenever the join operation yields a
         // table that's frequent enough and reduces the overhead.
         auto leftBlocks =
-            convertGenerator(std::move(leftBlocksInternal), *leftScan);
-        auto rightBlocks =
-            convertGenerator(std::move(rightBlocksInternal), *rightScan);
+            convertGeneratorFromScan(std::move(leftBlocksInternal), *leftScan);
+        auto rightBlocks = convertGeneratorFromScan(
+            std::move(rightBlocksInternal), *rightScan);
 
         ad_utility::zipperJoinForBlocksWithoutUndef(leftBlocks, rightBlocks,
                                                     std::less{}, rowAdder);
-        leftScan->runtimeInfo().status_ =
-            RuntimeInformation::Status::lazilyMaterializedCompleted;
-        rightScan->runtimeInfo().status_ =
-            RuntimeInformation::Status::lazilyMaterializedCompleted;
-
+        setScanStatusToLazilyCompleted(*leftScan, *rightScan);
         auto localVocab = std::move(rowAdder.localVocab());
         return Result::IdTableVocabPair{std::move(rowAdder).resultTable(),
                                         std::move(localVocab)};
@@ -661,11 +624,12 @@ Result Join::computeResultForIndexScanAndIdTable(
         const IdTable& idTable = resultWithIdTable->idTable();
         auto rowAdder = makeRowAdder(std::move(yieldTable));
 
-        auto permutationIdTable = ad_utility::IdTableAndFirstCol{
-            idTable.asColumnSubsetView(idTableIsRightInput
-                                           ? joinColMap.permutationRight()
-                                           : joinColMap.permutationLeft()),
-            resultWithIdTable->getCopyOfLocalVocab()};
+        auto permutationIdTable =
+            ad_utility::IdTableAndFirstCols<1, IdTableView<0>>{
+                idTable.asColumnSubsetView(idTableIsRightInput
+                                               ? joinColMap.permutationRight()
+                                               : joinColMap.permutationLeft()),
+                resultWithIdTable->getCopyOfLocalVocab()};
 
         ad_utility::Timer timer{
             ad_utility::timer::Timer::InitialStatus::Started};
@@ -676,7 +640,7 @@ Result Join::computeResultForIndexScanAndIdTable(
         std::optional<std::shared_ptr<const Result>> indexScanResult =
             std::nullopt;
         auto rightBlocks = [&scan, idTableHasUndef, &permutationIdTable,
-                            &indexScanResult]() -> LazyInputView {
+                            &indexScanResult]() -> LazyInputView<1> {
           if (idTableHasUndef) {
             indexScanResult =
                 scan->getResult(false, ComputationMode::LAZY_IF_SUPPORTED);
@@ -686,7 +650,8 @@ Result Join::computeResultForIndexScanAndIdTable(
           } else {
             auto rightBlocksInternal =
                 scan->lazyScanForJoinOfColumnWithScan(permutationIdTable.col());
-            return convertGenerator(std::move(rightBlocksInternal), *scan);
+            return convertGeneratorFromScan(std::move(rightBlocksInternal),
+                                            *scan);
           }
         }();
 
@@ -704,8 +669,7 @@ Result Join::computeResultForIndexScanAndIdTable(
         } else {
           doJoin(blockForIdTable, rightBlocks);
         }
-        scan->runtimeInfo().status_ =
-            RuntimeInformation::Status::lazilyMaterializedCompleted;
+        setScanStatusToLazilyCompleted(*scan);
 
         auto localVocab = std::move(rowAdder.localVocab());
         return Result::IdTableVocabPair{std::move(rowAdder).resultTable(),
@@ -741,8 +705,7 @@ Result Join::computeResultForIndexScanAndLazyOperation(
             convertGenerator(std::move(indexScanSide),
                              joinColMap.permutationRight()),
             std::less{}, rowAdder);
-        scan->runtimeInfo().status_ =
-            RuntimeInformation::Status::lazilyMaterializedCompleted;
+        setScanStatusToLazilyCompleted(*scan);
 
         auto localVocab = std::move(rowAdder.localVocab());
         return Result::IdTableVocabPair{std::move(rowAdder).resultTable(),
diff --git a/src/engine/JoinHelpers.h b/src/engine/JoinHelpers.h
index 7387b6f4dd..d5247debe7 100644
--- a/src/engine/JoinHelpers.h
+++ b/src/engine/JoinHelpers.h
@@ -11,6 +11,9 @@
 #include <optional>
 #include <vector>
 
+#include "engine/AddCombinedRowToTable.h"
+#include "engine/IndexScan.h"
+#include "engine/Operation.h"
 #include "engine/QueryExecutionTree.h"
 #include "engine/Result.h"
 #include "engine/idTable/IdTable.h"
@@ -29,6 +32,7 @@ static constexpr size_t CHUNK_SIZE = 100'000;
 
 using namespace ad_utility;
 
+// Forward declaration for getRowAdderForJoin
 using OptionalPermutation = std::optional<std::vector<ColumnIndex>>;
 
 // _____________________________________________________________________________
@@ -39,27 +43,66 @@ inline void applyPermutation(IdTable& idTable,
   }
 }
 
-using LazyInputView = InputRangeTypeErased<IdTableAndFirstCol<IdTable>>;
+template <size_t NumJoinCols = 1>
+using LazyInputView =
+    InputRangeTypeErased<IdTableAndFirstCols<NumJoinCols, IdTable>>;
 
 // Convert a `generator<IdTableVocab>` to a `generator<IdTableAndFirstCol>` for
 // more efficient access in the join columns below and apply the given
 // permutation to each table.
-CPP_template(typename Input)(
+CPP_template(typename Input, size_t numJoinColumns = 1)(
     requires SameAsAny<Input, Result::Generator, Result::LazyResult>)
-    LazyInputView
-    convertGenerator(Input gen, OptionalPermutation permutation = {}) {
+    LazyInputView<numJoinColumns> convertGenerator(
+        Input gen, OptionalPermutation permutation = {}) {
   auto transformer = [permutation = std::move(permutation)](auto& element) {
     auto& [table, localVocab] = element;
     applyPermutation(table, permutation);
     // Make sure to actually move the table into the wrapper so that the tables
     // live as long as the wrapper.
-    return IdTableAndFirstCol{std::move(table), std::move(localVocab)};
+    return IdTableAndFirstCols<numJoinColumns, std::decay_t<decltype(table)>>{
+        std::move(table), std::move(localVocab)};
   };
   return InputRangeTypeErased{
       CachingTransformInputRange(std::move(gen), std::move(transformer))};
 }
+// _____________________________________________________________________________
+// Type alias for the general InputRangeTypeErased with specific types.
+template <size_t NumJoinCols = 1>
+using IteratorWithSingleCol =
+    InputRangeTypeErased<IdTableAndFirstCols<NumJoinCols, IdTable>>;
+
+// Convert a `CompressedRelationReader::IdTableGeneratorInputRange` to a
+// `InputRangeTypeErased<IdTableAndFirstCol<IdTable>>` for more efficient access
+// in the join columns below. This also makes sure the runtime information of
+// the passed `IndexScan` is updated properly as the range is consumed.
+template <size_t numJoinColumns = 1>
+IteratorWithSingleCol<numJoinColumns> convertGeneratorFromScan(
+    CompressedRelationReader::IdTableGeneratorInputRange gen, IndexScan& scan) {
+  // Store the generator in a wrapper so we can access its details after moving
+  auto generatorStorage =
+      std::make_shared<CompressedRelationReader::IdTableGeneratorInputRange>(
+          std::move(gen));
+
+  using SendPriority = RuntimeInformation::SendPriority;
+
+  auto range = CachingTransformInputRange(
+      *generatorStorage,
+      [generatorStorage, &scan,
+       sendPriority = SendPriority::Always](auto& table) mutable {
+        scan.updateRuntimeInfoForLazyScan(generatorStorage->details(),
+                                          sendPriority);
+        sendPriority = SendPriority::IfDue;
+        // IndexScans don't have a local vocabulary, so we can just use an empty
+        // one.
+        return IdTableAndFirstCols<numJoinColumns, IdTable>{std::move(table),
+                                                            LocalVocab{}};
+      });
+
+  return IteratorWithSingleCol<numJoinColumns>{std::move(range)};
+}
 
-using MaterializedInputView = std::array<IdTableAndFirstCol<IdTableView<0>>, 1>;
+using MaterializedInputView =
+    std::array<IdTableAndFirstCols<1, IdTableView<0>>, 1>;
 
 // Wrap a fully materialized result in a `IdTableAndFirstCol` and an array. It
 // then fulfills the concept `view<IdTableAndFirstCol>` which is required by the
@@ -67,15 +110,15 @@ using MaterializedInputView = std::array<IdTableAndFirstCol<IdTableView<0>>, 1>;
 // conceptually does exactly the same for lazy inputs.
 inline MaterializedInputView asSingleTableView(
     const Result& result, const std::vector<ColumnIndex>& permutation) {
-  return std::array{
-      IdTableAndFirstCol{result.idTable().asColumnSubsetView(permutation),
-                         result.getCopyOfLocalVocab()}};
+  return {IdTableAndFirstCols<1, IdTableView<0>>{
+      result.idTable().asColumnSubsetView(permutation),
+      result.getCopyOfLocalVocab()}};
 }
 
 // Wrap a result either in an array with a single element or in a range wrapping
 // the lazy result generator. Note that the lifetime of the view is coupled to
 // the lifetime of the result.
-inline std::variant<LazyInputView, MaterializedInputView> resultToView(
+inline std::variant<LazyInputView<1>, MaterializedInputView> resultToView(
     const Result& result, const std::vector<ColumnIndex>& permutation) {
   if (result.isFullyMaterialized()) {
     return asSingleTableView(result, permutation);
@@ -99,7 +142,7 @@ CPP_template_2(typename ActionT)(
   return generatorFromActionWithCallback<Result::IdTableVocabPair>(
       [runLazyJoin = std::move(runLazyJoin),
        permutation = std::move(permutation)](
-          std::function<void(Result::IdTableVocabPair)> callback) {
+          std::function<void(Result::IdTableVocabPair)> callback) mutable {
         auto yieldValue = [&permutation,
                            &callback](Result::IdTableVocabPair value) {
           if (value.idTable_.empty()) {
@@ -123,6 +166,40 @@ CPP_template_2(typename ActionT)(
       });
 }
 
+// Helper function to create a Result from an action, either lazy or
+// materialized depending on the requestLaziness parameter. The action is
+// expected to be a callable that takes a callback and returns an
+// IdTableVocabPair. An optional permutation can be applied to the result.
+template <typename Action>
+inline Result createResultFromAction(bool requestLaziness, Action&& action,
+                                     std::vector<ColumnIndex> resultSortedOn,
+                                     OptionalPermutation permutation = {}) {
+  if (requestLaziness) {
+    return {runLazyJoinAndConvertToGenerator(std::forward<Action>(action),
+                                             std::move(permutation)),
+            std::move(resultSortedOn)};
+  } else {
+    auto [idTable, localVocab] = action(ad_utility::noop);
+    applyPermutation(idTable, permutation);
+    return {std::move(idTable), std::move(resultSortedOn),
+            std::move(localVocab)};
+  }
+}
+
+// Helper function to create an AddCombinedRowToIdTable for join operations.
+// This encapsulates the common pattern of constructing the row adder with
+// parameters derived from the operation.
+inline auto getRowAdderForJoin(
+    const Operation& op, size_t numJoinColumns, bool keepJoinColumns,
+    AddCombinedRowToIdTable::BlockwiseCallback yieldTable) {
+  return AddCombinedRowToIdTable{numJoinColumns,
+                                 IdTable{op.getResultWidth(), op.allocator()},
+                                 op.getCancellationHandle(),
+                                 keepJoinColumns,
+                                 CHUNK_SIZE,
+                                 std::move(yieldTable)};
+}
+
 // Helper function to check if the join of two columns propagate the value
 // returned by `Operation::columnOriginatesFromGraphOrUndef`.
 inline bool doesJoinProduceGuaranteedGraphValuesOrUndef(
diff --git a/src/engine/JoinWithIndexScanHelpers.h b/src/engine/JoinWithIndexScanHelpers.h
new file mode 100644
index 0000000000..c028143f39
--- /dev/null
+++ b/src/engine/JoinWithIndexScanHelpers.h
@@ -0,0 +1,63 @@
+// Copyright 2026, University of Freiburg
+// Chair of Algorithms and Data Structures
+// Author: Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de)
+
+#ifndef QLEVER_SRC_ENGINE_JOINWITHINDEXSCANHELPERS_H
+#define QLEVER_SRC_ENGINE_JOINWITHINDEXSCANHELPERS_H
+
+#include "engine/AddCombinedRowToTable.h"
+#include "engine/IndexScan.h"
+#include "engine/Result.h"
+#include "index/CompressedRelation.h"
+#include "util/Iterators.h"
+#include "util/JoinAlgorithms/JoinAlgorithms.h"
+#include "util/JoinAlgorithms/JoinColumnMapping.h"
+
+namespace qlever::joinWithIndexScanHelpers {
+
+// Tag types to indicate the join semantics
+struct InnerJoinTag {};
+struct OptionalJoinTag {};
+struct MinusTag {};
+
+// Helper to convert generators to the format expected by join algorithms
+using IteratorWithSingleCol = ad_utility::InputRangeTypeErased<
+    ad_utility::IdTableAndFirstCols<1, IdTable>>;
+
+inline IteratorWithSingleCol convertGenerator(
+    CompressedRelationReader::IdTableGeneratorInputRange&& gen,
+    IndexScan& scan) {
+  // Store the generator in a wrapper so we can access its details after moving
+  auto generatorStorage =
+      std::make_shared<CompressedRelationReader::IdTableGeneratorInputRange>(
+          std::move(gen));
+
+  using SendPriority = RuntimeInformation::SendPriority;
+
+  auto range = ad_utility::CachingTransformInputRange(
+      *generatorStorage,
+      [generatorStorage, &scan,
+       sendPriority = SendPriority::Always](auto& table) mutable {
+        scan.updateRuntimeInfoForLazyScan(generatorStorage->details(),
+                                          sendPriority);
+        sendPriority = SendPriority::IfDue;
+        // IndexScans don't have a local vocabulary, so we can just use an empty
+        // one.
+        return ad_utility::IdTableAndFirstCols<1, IdTable>{std::move(table),
+                                                           LocalVocab{}};
+      });
+
+  return IteratorWithSingleCol{std::move(range)};
+}
+
+// Helper to set scan status to lazily completed (variadic, accepts 1+ scans)
+template <typename... Scans>
+inline void setScanStatusToLazilyCompleted(Scans&... scans) {
+  (void(scans.runtimeInfo().status_ =
+            RuntimeInformation::Status::lazilyMaterializedCompleted),
+   ...);
+}
+
+}  // namespace qlever::joinWithIndexScanHelpers
+
+#endif  // QLEVER_SRC_ENGINE_JOINWITHINDEXSCANHELPERS_H
diff --git a/src/engine/Operation.h b/src/engine/Operation.h
index 6b0c63e1c3..cd9f6552f0 100644
--- a/src/engine/Operation.h
+++ b/src/engine/Operation.h
@@ -199,6 +199,10 @@ class Operation {
 
   virtual uint64_t getSizeEstimate() final;
 
+  const SharedCancellationHandle& getCancellationHandle() const {
+    return cancellationHandle_;
+  }
+
  private:
   virtual uint64_t getSizeEstimateBeforeLimit() = 0;
 
diff --git a/src/engine/OptionalJoin.cpp b/src/engine/OptionalJoin.cpp
index e4a25ef301..cd4df87aac 100644
--- a/src/engine/OptionalJoin.cpp
+++ b/src/engine/OptionalJoin.cpp
@@ -8,14 +8,18 @@
 #include "engine/AddCombinedRowToTable.h"
 #include "engine/CallFixedSize.h"
 #include "engine/Engine.h"
+#include "engine/IndexScan.h"
 #include "engine/JoinHelpers.h"
+#include "engine/JoinWithIndexScanHelpers.h"
 #include "engine/Service.h"
 #include "engine/Sort.h"
+#include "global/RuntimeParameters.h"
 #include "util/Algorithm.h"
 #include "util/JoinAlgorithms/IndexNestedLoopJoin.h"
 #include "util/JoinAlgorithms/JoinAlgorithms.h"
 
 using namespace qlever::joinHelpers;
+using namespace qlever::joinWithIndexScanHelpers;
 
 using std::endl;
 using std::string;
@@ -115,6 +119,17 @@ Result OptionalJoin::computeResult(bool requestLaziness) {
     return std::move(res).value();
   }
 
+  if (getRuntimeParameter<&RuntimeParameters::prefilteredOptionalJoin_>() &&
+      (_joinColumns.size() == 1 ||
+       (implementation_ == Implementation::OnlyUndefInLastJoinColumnOfLeft &&
+        _joinColumns.size() == 2))) {
+    if (auto indexScan =
+            std::dynamic_pointer_cast<IndexScan>(_right->getRootOperation())) {
+      return optionalJoinWithIndexScan(_left->getResult(true),
+                                       std::move(indexScan), requestLaziness);
+    }
+  }
+
   IdTable idTable{getResultWidth(), getExecutionContext()->getAllocator()};
 
   AD_CONTRACT_CHECK(idTable.numColumns() >= _joinColumns.size() ||
@@ -466,10 +481,8 @@ Result OptionalJoin::lazyOptionalJoin(std::shared_ptr<const Result> left,
   auto action = [this, left = std::move(left), right = std::move(right),
                  joinColMap = std::move(joinColMap)](
                     std::function<void(IdTable&, LocalVocab&)> yieldTable) {
-    ad_utility::AddCombinedRowToIdTable rowAdder{
-        _joinColumns.size(), IdTable{getResultWidth(), allocator()},
-        cancellationHandle_, keepJoinColumns_,
-        CHUNK_SIZE,          std::move(yieldTable)};
+    auto rowAdder = getRowAdderForJoin(*this, _joinColumns.size(),
+                                       keepJoinColumns_, std::move(yieldTable));
     auto leftRange = resultToView(*left, joinColMap.permutationLeft());
     auto rightRange = resultToView(*right, joinColMap.permutationRight());
     std::visit(
@@ -483,16 +496,90 @@ Result OptionalJoin::lazyOptionalJoin(std::shared_ptr<const Result> left,
     return Result::IdTableVocabPair{std::move(rowAdder).resultTable(),
                                     std::move(localVocab)};
   };
+  return createResultFromAction(requestLaziness, std::move(action),
+                                resultSortedOn(), std::move(resultPermutation));
+}
+// _____________________________________________________________________________
+Result OptionalJoin::optionalJoinWithIndexScan(
+    std::shared_ptr<const Result> left, std::shared_ptr<IndexScan> rightScan,
+    bool requestLaziness) {
+  // Currently only supports a single join column.
+  AD_CORRECTNESS_CHECK(_joinColumns.size() == 1 ||
+                       implementation_ ==
+                           Implementation::OnlyUndefInLastJoinColumnOfLeft);
+  ad_utility::JoinColumnMapping joinColMap{
+      _joinColumns, _left->getResultWidth(), _right->getResultWidth(),
+      keepJoinColumns_};
+
+  auto resultPermutation = joinColMap.permutationResult();
+
+  using namespace ad_utility::use_value_identity;
+  auto getAction = [&](auto leftIsMaterializedV) {
+    static constexpr bool leftIsMaterialized = leftIsMaterializedV;
+    return [this, left = std::move(left), rightScan = std::move(rightScan),
+            joinColMap = std::move(joinColMap)](
+               std::function<void(IdTable&, LocalVocab&)> yieldTable) {
+      auto rowAdder = getRowAdderForJoin(
+          *this, _joinColumns.size(), keepJoinColumns_, std::move(yieldTable));
+      auto getLeftAndRightRange = [&]<size_t numJoinCols>() {
+        auto firstJoinColLeft = _joinColumns.at(0).at(0);
+        if constexpr (leftIsMaterialized) {
+          auto rightBlocksInternal = rightScan->lazyScanForJoinOfColumnWithScan(
+              left->idTable().getColumn(firstJoinColLeft));
+          auto rightRange = convertGeneratorFromScan<numJoinCols>(
+              std::move(rightBlocksInternal), *rightScan);
+          auto permutationIdTable =
+              ad_utility::IdTableAndFirstCols<numJoinCols, IdTableView<0>>{
+                  left->idTable().asColumnSubsetView(
+                      joinColMap.permutationLeft()),
+                  left->getCopyOfLocalVocab()};
+          auto leftRange = std::array{std::move(permutationIdTable)};
+
+          return std::pair{std::move(leftRange), std::move(rightRange)};
+        } else {
+          auto [leftJoinSide, indexScanSide] = rightScan->prefilterTables(
+              left->idTables(), firstJoinColLeft, false);
+          auto leftRange =
+              convertGenerator<std::decay_t<decltype(leftJoinSide)>,
+                               numJoinCols>(std::move(leftJoinSide),
+                                            joinColMap.permutationLeft());
+          auto rightRange =
+              convertGenerator<std::decay_t<decltype(indexScanSide)>,
+                               numJoinCols>(std::move(indexScanSide),
+                                            joinColMap.permutationRight());
+          return std::pair{std::move(leftRange), std::move(rightRange)};
+        }
+      };
+      if (_joinColumns.size() == 1) {
+        // Note: The `zipperJoinForBlocksWithPotentialUndef` automatically
+        // switches to a more efficient implementation if there are no UNDEF
+        // values in any of the inputs.
+        auto [leftRange, rightRange] =
+            getLeftAndRightRange.template operator()<1>();
+        zipperJoinForBlocksWithPotentialUndef(
+            std::move(leftRange), std::move(rightRange), std::less{}, rowAdder,
+            {}, {}, ad_utility::OptionalJoinTag{});
+      } else {
+        AD_CORRECTNESS_CHECK(implementation_ ==
+                             Implementation::OnlyUndefInLastJoinColumnOfLeft);
+        auto [leftRange, rightRange] =
+            getLeftAndRightRange.template operator()<2>();
+        specialOptionalJoinForBlocks(
+            std::move(leftRange), std::move(rightRange),
+            std::integral_constant<size_t, 2>{}, rowAdder);
+      }
+      setScanStatusToLazilyCompleted(*rightScan);
+      return std::move(rowAdder).toIdTableVocabPair();
+    };
+  };
 
-  if (requestLaziness) {
-    return {runLazyJoinAndConvertToGenerator(std::move(action),
-                                             std::move(resultPermutation)),
-            resultSortedOn()};
-  } else {
-    auto [idTable, localVocab] = action(ad_utility::noop);
-    applyPermutation(idTable, resultPermutation);
-    return {std::move(idTable), resultSortedOn(), std::move(localVocab)};
-  }
+  auto createResult = [&](auto isMaterialized) {
+    return createResultFromAction(requestLaziness, getAction(isMaterialized),
+                                  resultSortedOn(),
+                                  std::move(resultPermutation));
+  };
+  return left->isFullyMaterialized() ? createResult(vi<true>)
+                                     : createResult(vi<false>);
 }
 
 // _____________________________________________________________________________
diff --git a/src/engine/OptionalJoin.h b/src/engine/OptionalJoin.h
index 428951ebc6..56304ddb2b 100644
--- a/src/engine/OptionalJoin.h
+++ b/src/engine/OptionalJoin.h
@@ -9,6 +9,8 @@
 #include "engine/Operation.h"
 #include "engine/QueryExecutionTree.h"
 
+// Forward declaration
+class IndexScan;
 class OptionalJoin : public Operation {
  private:
   std::shared_ptr<QueryExecutionTree> _left;
@@ -81,6 +83,18 @@ class OptionalJoin : public Operation {
                           std::shared_ptr<const Result> right,
                           bool requestLaziness);
 
+  // Compute the result for the result from the `left` subtree
+  // and the `rightScan`. This function applied block prefiltering for the
+  // `rightScan`. This function currently only supports single-column OPTIONAL
+  // joins, or OPTIONAL joins on two columns where UNDEF values are only in the
+  // last (i.e. the second) join column. The `left` and `rightScan` have to be
+  // obtained from the members `_left` and `_right` respectively, as those
+  // members will be used to get additional required metadata for the arguments
+  // `left` and `rightScan`.
+  Result optionalJoinWithIndexScan(std::shared_ptr<const Result> left,
+                                   std::shared_ptr<IndexScan> rightScan,
+                                   bool requestLaziness);
+
  private:
   std::unique_ptr<Operation> cloneImpl() const override;
 
diff --git a/src/global/RuntimeParameters.cpp b/src/global/RuntimeParameters.cpp
index 5a409a2c32..2715cc1675 100644
--- a/src/global/RuntimeParameters.cpp
+++ b/src/global/RuntimeParameters.cpp
@@ -51,6 +51,7 @@ RuntimeParameters::RuntimeParameters() {
   add(materializedViewWriterMemory_);
   add(defaultQueryTimeout_);
   add(sortInMemoryThreshold_);
+  add(prefilteredOptionalJoin_);
 
   defaultQueryTimeout_.setParameterConstraint(
       [](std::chrono::seconds value, std::string_view parameterName) {
diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h
index 67cdfbf3f8..d131d8759b 100644
--- a/src/global/RuntimeParameters.h
+++ b/src/global/RuntimeParameters.h
@@ -135,6 +135,8 @@ struct RuntimeParameters {
   MemorySizeParameter sortInMemoryThreshold_{
       ad_utility::MemorySize::gigabytes(5), "sort-in-memory-threshold"};
 
+  Bool prefilteredOptionalJoin_{true, "prefiltered-optional-join"};
+
   // ___________________________________________________________________________
   // IMPORTANT NOTE: IF YOU ADD PARAMETERS ABOVE, ALSO REGISTER THEM IN THE
   // CONSTRUCTOR, S.T. THEY CAN ALSO BE ACCESSED VIA THE RUNTIME INTERFACE.
diff --git a/src/util/JoinAlgorithms/JoinAlgorithms.h b/src/util/JoinAlgorithms/JoinAlgorithms.h
index aa8858936c..fe42d9c065 100644
--- a/src/util/JoinAlgorithms/JoinAlgorithms.h
+++ b/src/util/JoinAlgorithms/JoinAlgorithms.h
@@ -38,11 +38,11 @@ CPP_concept UnaryIteratorFunction =
     ql::concepts::invocable<F, ql::ranges::iterator_t<Range>>;
 
 // A  function `F` fulfills `BinaryIteratorFunction` if it can be called with
-// two arguments of the `Range`'s iterator type (NOT value type).
-template <typename F, typename Range>
+// two arguments of the `Range1/2`'s iterator types (NOT value type).
+template <typename F, typename Range1, typename Range2>
 CPP_concept BinaryIteratorFunction =
-    ql::concepts::invocable<F, ql::ranges::iterator_t<Range>,
-                            ql::ranges::iterator_t<Range>>;
+    ql::concepts::invocable<F, ql::ranges::iterator_t<Range1>,
+                            ql::ranges::iterator_t<Range2>>;
 
 // Helper type to indicate the different join modes.
 enum class JoinType { JOIN, OPTIONAL, MINUS };
@@ -470,6 +470,44 @@ CPP_template(typename RangeSmaller, typename RangeLarger, typename LessThan,
   }
 }
 
+// Struct that compares two row-like types lexicographically, but only the first
+// `numColumns - 1` entries of the column. This is used by the
+// `specialOptionalJoin`, where the last join column has special semantics, as
+// it might contain undef values.
+struct CompareAllButLast {
+  size_t numColumns_;
+  explicit CompareAllButLast(size_t numColumns) : numColumns_(numColumns) {}
+
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const {
+    for (size_t i = 0; i < numColumns_ - 1; ++i) {
+      const Id& aId = a[i];
+      const Id& bId = b[i];
+      if (aId != bId) {
+        return aId < bId;
+      }
+    }
+    return false;
+  };
+};
+// Same as `CompareAllButLast`, but checks for equality.
+struct CompareEqButLast {
+  size_t numColumns_;
+  explicit CompareEqButLast(size_t numColumns) : numColumns_(numColumns) {}
+
+  template <typename A, typename B>
+  bool operator()(const A& a, const B& b) const {
+    for (size_t i = 0; i < numColumns_ - 1; ++i) {
+      const Id& aId = a[i];
+      const Id& bId = b[i];
+      if (aId != bId) {
+        return false;
+      }
+    }
+    return true;
+  };
+};
+
 /**
  * @brief Perform an OPTIONAL join for the following special case: The `right`
  * input contains no UNDEF values in any of its join columns, the `left`
@@ -486,21 +524,23 @@ CPP_template(typename RangeSmaller, typename RangeLarger, typename LessThan,
  * @param compatibleRowAction Same as in `zipperJoinWithUndef`
  * @param elFromFirstNotFoundAction Same as in `zipperJoinWithUndef`
  */
-CPP_template(typename CompatibleActionT, typename NotFoundActionT,
+CPP_template(typename LeftTableLike, typename RightTableLike,
+             typename CompatibleActionT, typename NotFoundActionT,
              typename CancellationFuncT)(
-    requires BinaryIteratorFunction<CompatibleActionT, IdTableView<0>> CPP_and UnaryIteratorFunction<
-        NotFoundActionT, IdTableView<0>>
-        CPP_and ql::concepts::invocable<
-            CancellationFuncT>) void specialOptionalJoin(const IdTableView<0>&
-                                                             left,
-                                                         const IdTableView<0>&
-                                                             right,
-                                                         const CompatibleActionT&
-                                                             compatibleRowAction,
-                                                         const NotFoundActionT&
-                                                             elFromFirstNotFoundAction,
-                                                         const CancellationFuncT&
-                                                             checkCancellation) {
+    requires BinaryIteratorFunction<CompatibleActionT, LeftTableLike,
+                                    RightTableLike>
+        CPP_and UnaryIteratorFunction<NotFoundActionT, LeftTableLike>
+            CPP_and ql::concepts::invocable<
+                CancellationFuncT>) void specialOptionalJoin(const LeftTableLike&
+                                                                 left,
+                                                             const RightTableLike&
+                                                                 right,
+                                                             const CompatibleActionT&
+                                                                 compatibleRowAction,
+                                                             const NotFoundActionT&
+                                                                 elFromFirstNotFoundAction,
+                                                             const CancellationFuncT&
+                                                                 checkCancellation) {
   auto it1 = std::begin(left);
   auto end1 = std::end(left);
   auto it2 = std::begin(right);
@@ -510,32 +550,27 @@ CPP_template(typename CompatibleActionT, typename NotFoundActionT,
     return;
   }
 
+  // TODO<joka921> this argument should be passed in in a way that allows us to
+  // easily move it to compile time whenever possible.
   size_t numColumns = (*it1).size();
   // A predicate that compares two rows lexicographically but ignores the last
   // column.
-  auto compareAllButLast = [numColumns](const auto& a, const auto& b) {
-    for (size_t i = 0; i < numColumns - 1; ++i) {
-      if (a[i] != b[i]) {
-        return a[i] < b[i];
-      }
-    }
-    return false;
-  };
+  auto compareAllButLast = CompareAllButLast{numColumns};
 
   // Similar to the previous lambda, but checks for equality.
-  auto compareEqButLast = [numColumns](const auto& a, const auto& b) {
-    for (size_t i = 0; i < numColumns - 1; ++i) {
-      if (a[i] != b[i]) {
-        return false;
-      }
-    }
-    return true;
-  };
+  auto compareEqButLast = CompareEqButLast{numColumns};
 
   // The last columns from the left and right input. Those will be dealt with
   // separately.
-  ql::span<const Id> lastColumnLeft = left.getColumn(left.numColumns() - 1);
-  ql::span<const Id> lastColumnRight = right.getColumn(right.numColumns() - 1);
+  // TODO<joka921> This is a little inefficient, should be a getColumn on a
+  // column based interface, but that requires refactoring of all the
+  // types passed in here (in particular, we are using
+  // `ql::ranges::subrange<IdTable>` etc.
+  auto getLastJoinColum = [numColumns](const auto& row) {
+    return row[numColumns - 1];
+  };
+  auto lastColumnLeft = left | ql::views::transform(getLastJoinColum);
+  auto lastColumnRight = right | ql::views::transform(getLastJoinColum);
 
   while (it1 < end1 && it2 < end2) {
     checkCancellation();
@@ -584,12 +619,12 @@ CPP_template(typename CompatibleActionT, typename NotFoundActionT,
     // Set up the corresponding sub-ranges of the last columns.
     auto beg = it1 - left.begin();
     auto end = endSame1 - left.begin();
-    ql::span<const Id> leftSub{lastColumnLeft.begin() + beg,
-                               lastColumnLeft.begin() + end};
+    ql::ranges::subrange leftSub{lastColumnLeft.begin() + beg,
+                                 lastColumnLeft.begin() + end};
     beg = it2 - right.begin();
     end = endSame2 - right.begin();
-    ql::span<const Id> rightSub{lastColumnRight.begin() + beg,
-                                lastColumnRight.begin() + end};
+    ql::ranges::subrange rightSub{lastColumnRight.begin() + beg,
+                                  lastColumnRight.begin() + end};
 
     // Set up the generator for the UNDEF values.
     // TODO<joka921> We could probably also apply this optimization if both
@@ -723,30 +758,43 @@ class BlockAndSubrange {
 };
 
 // A helper struct for the zipper join on blocks algorithm (see below). It
-// combines the current iterator, then end iterator, the relevant projection to
+// combines the current iterator, the end iterator, the relevant projection to
 // obtain the input to the comparison, and a buffer for blocks that are
 // currently required by the join algorithm for one side of the join.
-template <typename Iterator, typename End, typename Projection>
+// It also stores a typedef `ProjectedEl` which is used to store a projection
+// result. This must have value semantics, as it is stored permanently.
+// The default is `decay_t<decltype(projection(*it_))>`;
+template <typename Iterator, typename End, typename Projection,
+          typename ProjectedElT = std::monostate>
 struct JoinSide {
   using CurrentBlocks =
       std::vector<detail::BlockAndSubrange<ql::iter_value_t<Iterator>>>;
   Iterator it_;
   [[no_unique_address]] const End end_;
   const Projection& projection_;
+  // Dummy, only required for a better interface of `makeJoinSide` below.
+  std::type_identity<ProjectedElT> projectedElT_{};
   CurrentBlocks currentBlocks_{};
   CurrentBlocks undefBlocks_{};
 
   // Type aliases for a single element from a block from the left/right input.
   using value_type = ql::ranges::range_value_t<ql::iter_value_t<Iterator>>;
   // Type alias for the result of the projection.
-  using ProjectedEl =
-      std::decay_t<std::invoke_result_t<const Projection&, value_type>>;
+  using ProjectedEl = std::conditional_t<
+      std::is_same_v<ProjectedElT, std::monostate>,
+      std::decay_t<std::invoke_result_t<const Projection&, value_type>>,
+      ProjectedElT>;
 };
 
-// Deduction guide required by the `makeJoinSide` function.
+// Deduction guides required by the `makeJoinSide` function, with and without
+// explicitly specified `ProjectedEl`.
 template <typename It, typename End, typename Projection>
 JoinSide(It, End, const Projection&) -> JoinSide<It, End, Projection>;
 
+template <typename It, typename End, typename Projection, typename ProjectedElT>
+JoinSide(It, End, const Projection&, std::type_identity<ProjectedElT>)
+    -> JoinSide<It, End, Projection, ProjectedElT>;
+
 // Create a `JoinSide` object from a range of `blocks` and a `projection`. Note
 // that the `blocks` are stored as a reference, so the caller is responsible for
 // keeping them valid until the join is completed.
@@ -756,6 +804,15 @@ auto makeJoinSide(Blocks& blocks, const Projection& projection) {
                   projection};
 }
 
+// Overload for `makeJoinSide` that also specified the `ProjectedEl` type
+// explicitly.
+template <typename Blocks, typename Projection, typename ProjectedEl>
+auto makeJoinSide(Blocks& blocks, const Projection& projection,
+                  std::type_identity<ProjectedEl> tg) {
+  return JoinSide{ql::ranges::begin(blocks), ql::ranges::end(blocks),
+                  projection, tg};
+}
+
 // A concept to identify instantiations of the `JoinSide` template.
 template <typename T>
 CPP_concept IsJoinSide = ad_utility::isInstantiation<T, JoinSide>;
@@ -832,12 +889,20 @@ static constexpr size_t FETCH_BLOCKS = 3;
 // After adding the Cartesian product we start a new round with a new
 // `currentEl` (5 in this example). New blocks are added to one of the buffers
 // if they become empty at one point in the algorithm.
-CPP_template(typename LeftSide, typename RightSide, typename LessThan,
-             typename CompatibleRowAction, typename IsUndef = AlwaysFalse)(
+//
+// NOTE: This class is implemented as a generic, CRTP-style framework with two
+// customization points: The actual join algorithm on materialized (sub-)blocks.
+// (called `joinSubranges`), and the algorithm used to perform the Cartesian
+// product of matching elements across block boundaries (called
+// `cartesianProductImpl`). This framework allows us to also implement certain
+// cases of `OptionalJoin` that contain UNDEF values (see below).
+CPP_template(typename Derived, typename LeftSide, typename RightSide,
+             typename LessThan, typename CompatibleRowAction,
+             typename IsUndef = AlwaysFalse)(
     requires IsJoinSide<LeftSide> CPP_and IsJoinSide<RightSide> CPP_and
         InvocableWithExactReturnType<
             IsUndef, bool,
-            typename LeftSide::ProjectedEl>) struct BlockZipperJoinImpl {
+            typename LeftSide::ProjectedEl>) struct BlockZipperJoinImplCRTP {
   // The left and right inputs of the join
   LeftSide leftSide_;
   RightSide rightSide_;
@@ -847,6 +912,17 @@ CPP_template(typename LeftSide, typename RightSide, typename LessThan,
   CompatibleRowAction& compatibleRowAction_;
   [[no_unique_address]] IsUndef isUndefined_{};
 
+  // Constructor.
+  BlockZipperJoinImplCRTP(LeftSide leftSide, RightSide rightSide,
+                          const LessThan& lessThan,
+                          CompatibleRowAction& compatibleRowAction,
+                          IsUndef isUndefined = {})
+      : leftSide_{std::move(leftSide)},
+        rightSide_{std::move(rightSide)},
+        lessThan_{lessThan},
+        compatibleRowAction_{compatibleRowAction},
+        isUndefined_{std::move(isUndefined)} {}
+
   using LeftBlocks = typename LeftSide::CurrentBlocks;
   using RightBlocks = typename RightSide::CurrentBlocks;
 
@@ -874,7 +950,7 @@ CPP_template(typename LeftSide, typename RightSide, typename LessThan,
   // Recompute the `currentEl`. It is the minimum of the last element in the
   // first block of either of the join sides.
   ProjectedEl getCurrentEl() {
-    auto getFirst = [](const Side& side) {
+    auto getFirst = [](const Side& side) -> ProjectedEl {
       return side.projection_(side.currentBlocks_.front().back());
     };
     return std::min(getFirst(leftSide_), getFirst(rightSide_), lessThan_);
@@ -985,14 +1061,8 @@ CPP_template(typename LeftSide, typename RightSide, typename LessThan,
   // blocks on the right and add them to the result.
   void addCartesianProduct(const LeftBlocks& blocksLeft,
                            const RightBlocks& blocksRight) {
-    // TODO<C++23> use `ql::views::cartesian_product`.
-    for (const auto& lBlock : blocksLeft) {
-      for (const auto& rBlock : blocksRight) {
-        compatibleRowAction_.setInput(lBlock.fullBlock(), rBlock.fullBlock());
-        compatibleRowAction_.addRows(lBlock.getIndexRange(),
-                                     rBlock.getIndexRange());
-      }
-    }
+    static_cast<Derived*>(this)->addCartesianProductImpl(blocksLeft,
+                                                         blocksRight);
   }
 
   // Handle non-matching rows from the left side for an optional join or a minus
@@ -1157,26 +1227,36 @@ CPP_template(typename LeftSide, typename RightSide, typename LessThan,
     // All undefined values should already be processed at this point.
     AD_CORRECTNESS_CHECK(!isUndefined_(subrangeLeft.front()));
     AD_CORRECTNESS_CHECK(!isUndefined_(subrangeRight.front()));
+
+    // Lambda that binds the common arguments for the various calls below
+    // (the inputs and the `rowAdder` are always the same, it is just the
+    // UNDEF configuration that is different.
+    auto doJoin = [&](auto&&... args) {
+      return static_cast<Derived*>(this)->joinSubranges(
+          ql::ranges::subrange{subrangeLeft.begin(), currentElItL},
+          ql::ranges::subrange{subrangeRight.begin(), currentElItR},
+          RowIndexAdder{addRowIndex, addRowIndices}, AD_FWD(args)...);
+    };
     // If we have undefined values stored, we need to provide a generator that
     // yields iterators to the individual undefined values.
     if constexpr (potentiallyHasUndef) {
-      // We pass `std::false_type`, to disable coverage checks for the undefined
-      // values that are stored in `side.undefBlocks_`, which we already have
-      // processed ourselves and don't lie within the passed subrange, which
-      // this function assumes otherwise.
-      [[maybe_unused]] auto res = zipperJoinWithUndef(
-          ql::ranges::subrange{subrangeLeft.begin(), currentElItL},
-          ql::ranges::subrange{subrangeRight.begin(), currentElItR}, lessThan_,
-          RowIndexAdder{addRowIndex, addRowIndices},
-          findUndefValues<true>(fullBlockLeft, fullBlockRight, begL, begR),
-          findUndefValues<false>(fullBlockLeft, fullBlockRight, begL, begR),
-          addNotFoundRowIndex, noop, std::false_type{});
+      // Check at runtime if we actually have UNDEF blocks. If not, we should
+      // use the simpler code path (with std::true_type for coverage).
+      if (!hasUndef(leftSide_) && !hasUndef(rightSide_)) {
+        // No UNDEFs found at runtime, use the simpler code path.
+        doJoin(noop, noop, addNotFoundRowIndex, noop, std::true_type{});
+      } else {
+        // We pass `std::false_type`, to disable coverage checks for the
+        // undefined values that are stored in `side.undefBlocks_`, which we
+        // already have processed ourselves and don't lie within the passed
+        // subrange, which this function assumes otherwise.
+        doJoin(
+            findUndefValues<true>(fullBlockLeft, fullBlockRight, begL, begR),
+            findUndefValues<false>(fullBlockLeft, fullBlockRight, begL, begR),
+            addNotFoundRowIndex, noop, std::false_type{});
+      }
     } else {
-      [[maybe_unused]] auto res = zipperJoinWithUndef(
-          ql::ranges::subrange{subrangeLeft.begin(), currentElItL},
-          ql::ranges::subrange{subrangeRight.begin(), currentElItR}, lessThan_,
-          RowIndexAdder{addRowIndex, addRowIndices}, noop, noop,
-          addNotFoundRowIndex);
+      doJoin(noop, noop, addNotFoundRowIndex, noop, std::true_type{});
     }
     compatibleRowAction_.flush();
 
@@ -1469,12 +1549,17 @@ CPP_template(typename LeftSide, typename RightSide, typename LessThan,
     } else {
       fetchAndProcessUndefinedBlocks(joinType == JoinType::OPTIONAL);
     }
+
     if (potentiallyHasUndef && !hasUndef(leftSide_) && !hasUndef(rightSide_)) {
       // Run the join without UNDEF values if there are none. No need to move
       // since LeftSide and RightSide are references.
-      BlockZipperJoinImpl<LeftSide, RightSide, LessThan, CompatibleRowAction,
-                          AlwaysFalse>{leftSide_, rightSide_, lessThan_,
-                                       compatibleRowAction_, AlwaysFalse{}}
+      // We need to also rebind the `Derived` class to the changed last template
+      // parameter, otherwise we will get errors because the new `CRTP` class we
+      // create below doesn't directly inherit from `Derived` anymore.
+      BlockZipperJoinImplCRTP<typename Derived::template Rebind<AlwaysFalse>,
+                              LeftSide, RightSide, LessThan,
+                              CompatibleRowAction, AlwaysFalse>{
+          leftSide_, rightSide_, lessThan_, compatibleRowAction_, AlwaysFalse{}}
           .template runJoin<joinType>();
       return;
     }
@@ -1502,6 +1587,66 @@ CPP_template(typename LeftSide, typename RightSide, typename LessThan,
 #undef Blocks
 };
 
+// Concrete implementation of BlockZipperJoinImpl that provides the default
+// behavior for `addCartesianProduct` and `joinSubranges`.
+CPP_template(typename LeftSide, typename RightSide, typename LessThan,
+             typename CompatibleRowAction, typename IsUndef = AlwaysFalse)(
+    requires IsJoinSide<LeftSide> CPP_and IsJoinSide<RightSide> CPP_and
+        InvocableWithExactReturnType<
+            IsUndef, bool,
+            typename LeftSide::ProjectedEl>) struct BlockZipperJoinImpl
+    : BlockZipperJoinImplCRTP<BlockZipperJoinImpl<LeftSide, RightSide, LessThan,
+                                                  CompatibleRowAction, IsUndef>,
+                              LeftSide, RightSide, LessThan,
+                              CompatibleRowAction, IsUndef> {
+  using Base = BlockZipperJoinImplCRTP<BlockZipperJoinImpl, LeftSide, RightSide,
+                                       LessThan, CompatibleRowAction, IsUndef>;
+  using typename Base::LeftBlocks;
+  using typename Base::RightBlocks;
+
+  // Helper for rebinding this class to a new value for the `IsUndef` template
+  // parameter.
+  template <typename NewIsUndef>
+  using Rebind = BlockZipperJoinImpl<LeftSide, RightSide, LessThan,
+                                     CompatibleRowAction, NewIsUndef>;
+
+  // Inherit constructors from the base class.
+  using Base::Base;
+
+  // Implement the `addCartesianProduct` customization point with the default
+  // behavior.
+  void addCartesianProductImpl(const LeftBlocks& blocksLeft,
+                               const RightBlocks& blocksRight) {
+    for (const auto& [lBlock, rBlock] :
+         ::ranges::views::cartesian_product(blocksLeft, blocksRight)) {
+      this->compatibleRowAction_.setInput(lBlock.fullBlock(),
+                                          rBlock.fullBlock());
+      this->compatibleRowAction_.addRows(lBlock.getIndexRange(),
+                                         rBlock.getIndexRange());
+    }
+  }
+
+  // Implement the `joinSubranges` customization point by forwarding to
+  // `zipperJoinWithUndef`.
+  template <typename SubrangeLeft, typename SubrangeRight,
+            typename RowIndexAdder, typename FindSmallerUndefRangesLeft,
+            typename FindSmallerUndefRangesRight,
+            typename ElFromFirstNotFoundAction, typename CheckCancellation,
+            typename CoverUndefRanges>
+  void joinSubranges(
+      const SubrangeLeft& subrangeLeft, const SubrangeRight& subrangeRight,
+      const RowIndexAdder& rowIndexAdder,
+      const FindSmallerUndefRangesLeft& findSmallerUndefRangesLeft,
+      const FindSmallerUndefRangesRight& findSmallerUndefRangesRight,
+      ElFromFirstNotFoundAction elFromFirstNotFoundAction,
+      CheckCancellation checkCancellation, CoverUndefRanges coverUndefRanges) {
+    [[maybe_unused]] auto res = zipperJoinWithUndef(
+        subrangeLeft, subrangeRight, this->lessThan_, rowIndexAdder,
+        findSmallerUndefRangesLeft, findSmallerUndefRangesRight,
+        elFromFirstNotFoundAction, checkCancellation, coverUndefRanges);
+  }
+};
+
 // Deduction guide for the above struct.
 template <typename LHS, typename RHS, typename LessThan,
           typename CompatibleRowAction>
@@ -1512,8 +1657,195 @@ template <typename LHS, typename RHS, typename LessThan,
 BlockZipperJoinImpl(LHS&, RHS&, const LessThan&, CompatibleRowAction&, IsUndef)
     -> BlockZipperJoinImpl<LHS, RHS, LessThan, CompatibleRowAction, IsUndef>;
 
+// CRTP-based implementation for the special optional join on blocks, where the
+// only column that may contain UNDEF is the last join column of the left input.
+// This implementation compares all-but-last (always defined!) columns in the
+// outer join logic, and then performs a one-column join on the last column for
+// matching groups. Preconditions:
+// - Right input contains no UNDEF values
+// - Left input only contains UNDEF in the last column
+// - Both inputs are sorted lexicographically
+CPP_template(typename LeftSide, typename RightSide,
+             typename CompatibleRowAction)(
+    requires IsJoinSide<LeftSide> CPP_and
+        IsJoinSide<RightSide>) struct BlockZipperJoinImplForSpecialOptionalJoin
+    : BlockZipperJoinImplCRTP<BlockZipperJoinImplForSpecialOptionalJoin<
+                                  LeftSide, RightSide, CompatibleRowAction>,
+                              LeftSide, RightSide, CompareAllButLast,
+                              CompatibleRowAction, AlwaysFalse> {
+  using Base =
+      BlockZipperJoinImplCRTP<BlockZipperJoinImplForSpecialOptionalJoin,
+                              LeftSide, RightSide, CompareAllButLast,
+                              CompatibleRowAction, AlwaysFalse>;
+  using typename Base::LeftBlocks;
+  using typename Base::RightBlocks;
+
+  template <typename NewIsUndef>
+  using Rebind = BlockZipperJoinImplForSpecialOptionalJoin;
+
+  // Inherit constructors from base class.
+  using Base::Base;
+
+  // Implement addCartesianProduct customization point for special optional
+  // join. As the base class ensures, that those are equal up to the last join
+  // column, it suffices to perform a single column join on the last column.
+  // Note: We currently copy all the contents of `blocksLeft` and `blocksRight`,
+  // because the interfaces we are using currently requires the interface of a
+  // single IdTable-like thing.
+  // TODO<joka921> mitigate this requirement, or at least assess how expensive
+  // it is.
+  void addCartesianProductImpl(const LeftBlocks& blocksLeft,
+                               const RightBlocks& blocksRight) {
+    auto isEmpty = [](const auto& side) {
+      return side.empty() || side.front().fullBlock().empty();
+    };
+    // Nothing to do if one of the left or right sides is empty.
+    // Note: In this case the base class (in the `addCartesianProductImpl`
+    // function) has already added the rows from left to the result as per the
+    // OPTIONAL semanntics.
+    if (isEmpty(blocksLeft) || isEmpty(blocksRight)) {
+      return;
+    }
+
+    // Get allocator and number of columns from the first block.
+    // TODO<joka921> pass in a proper allocator here.
+    auto allocator = makeUnlimitedAllocator<Id>();
+    AD_CORRECTNESS_CHECK(!blocksLeft.empty() && !blocksRight.empty());
+
+    // TODO<joka921> This can be much more efficient, in particular it could use
+    // zero copying.
+    // Concatenate all rows from blocksLeft into a single IdTable.
+
+    auto materializeBlocksAsTable = [&allocator](const auto& blocks) {
+      // Note: It is crucial that we go through the `asStaticView` interface of
+      // the `IdTableAndFirstCols` interface, because we need to copy ALL the
+      // columns, not only the join columns, because the resultAdder needs them.
+      size_t numCols =
+          blocks.front().fullBlock().template asStaticView<0>().numColumns();
+      IdTable table(numCols, allocator);
+
+      // TODO<joka921> preallocate the sum of the index-range sizes.
+      for (const auto& block : blocks) {
+        const auto& staticView = block.fullBlock().template asStaticView<0>();
+        for (size_t idx : block.getIndexRange()) {
+          table.push_back(staticView[idx]);
+        }
+      }
+      return table;
+    };
+    IdTable leftTable = materializeBlocksAsTable(blocksLeft);
+    IdTable rightTable = materializeBlocksAsTable(blocksRight);
+
+    // If either table is empty, we don't have to do anything (same as above).
+    // TODO<joka921> Check if this case can happen at all, or whether we can
+    if (leftTable.empty() || rightTable.empty()) {
+      return;
+    }
+
+    // TODO<joka921> A little wonky, is always a constant, pass it into this
+    // class!
+    AD_CORRECTNESS_CHECK(!blocksLeft.front().fullBlock().empty());
+    size_t numJoinCols = blocksLeft.front().fullBlock()[0].size();
+
+    // Extract the last join columns, on which we have to perform the join..
+    auto lastColLeft = leftTable.getColumn(numJoinCols - 1);
+    auto lastColRight = rightTable.getColumn(numJoinCols - 1);
+
+    this->compatibleRowAction_.setInput(leftTable, rightTable);
+    // Set up actions for the single-column join on the last column.
+    auto compAction = [this, begL = lastColLeft.begin(),
+                       begR = lastColRight.begin()](const auto& itL,
+                                                    const auto& itR) {
+      size_t leftIdx = itL - begL;
+      size_t rightIdx = itR - begR;
+      // Call the row adder with the full rows.
+      this->compatibleRowAction_.addRow(leftIdx, rightIdx);
+    };
+
+    // Set up the generator for UNDEF values in the left last column.
+    // TODO<joka921> Could optimize the case that there is no UNDEF at all.
+    auto endOfUndef = ql::ranges::find_if_not(lastColLeft, &Id::isUndefined);
+    auto findSmallerUndefRangeLeft = [&lastColLeft, endOfUndef](auto&&...) {
+      return ad_utility::IteratorRange{lastColLeft.begin(), endOfUndef};
+    };
+
+    auto notFoundAction = [this, begL = lastColLeft.begin()](const auto& it) {
+      size_t leftIdx = it - begL;
+      this->compatibleRowAction_.addOptionalRow(leftIdx);
+    };
+
+    // Perform the join on the last column only.
+    [[maybe_unused]] auto res = zipperJoinWithUndef(
+        lastColLeft, lastColRight, std::less<>{}, compAction,
+        findSmallerUndefRangeLeft, noop, notFoundAction, noop);
+
+    this->compatibleRowAction_.flush();
+  }
+
+  // Implement joinSubranges customization point by forwarding to
+  // specialOptionalJoin (the non-block version).
+  template <typename SubrangeLeft, typename SubrangeRight,
+            typename RowIndexAdder, typename FindSmallerUndefRangesLeft,
+            typename FindSmallerUndefRangesRight,
+            typename ElFromFirstNotFoundAction, typename CheckCancellation,
+            typename CoverUndefRanges>
+  void joinSubranges(const SubrangeLeft& subrangeLeft,
+                     const SubrangeRight& subrangeRight,
+                     const RowIndexAdder& rowIndexAdder,
+                     [[maybe_unused]] const FindSmallerUndefRangesLeft&
+                         findSmallerUndefRangesLeft,
+                     [[maybe_unused]] const FindSmallerUndefRangesRight&
+                         findSmallerUndefRangesRight,
+                     ElFromFirstNotFoundAction elFromFirstNotFoundAction,
+                     CheckCancellation checkCancellation,
+                     [[maybe_unused]] CoverUndefRanges coverUndefRanges) {
+    // Forward the arguments to the `specialOptionalJoin`.
+    // Note:: The `FindSmallerUndefRanges...` arguments are ignored dummys which
+    // are required by the interface, but the undef handling is hardcoded in the
+    // `specialOptionalJoin`.
+    specialOptionalJoin(subrangeLeft, subrangeRight, rowIndexAdder,
+                        elFromFirstNotFoundAction, checkCancellation);
+  }
+};
+
 }  // namespace detail
 
+/**
+ * @brief Perform a special optional join for input ranges of blocks.
+ * This is a simplified implementation that works on blocks of row-like data.
+ * Preconditions:
+ * - Right input contains no UNDEF values in the join columns.
+ * - Left input only contains UNDEF in the last join column
+ * - Both inputs are sorted lexicographically.
+ * - numJoinColumns must be equal to the number of columns in `leftBlocks` and
+ * `rightBlocks`. The join matches on all-but-last columns, then performs a join
+ * on the last column within matching groups.
+ */
+template <typename LeftBlocks, typename RightBlocks,
+          typename CompatibleRowAction>
+void specialOptionalJoinForBlocks(LeftBlocks&& leftBlocks,
+                                  RightBlocks&& rightBlocks,
+                                  auto numJoinColumns,
+                                  CompatibleRowAction& compatibleRowAction) {
+  using ProjectedLeft = ql::ranges::range_value_t<
+      ql::ranges::range_value_t<std::decay_t<LeftBlocks>>>;
+  using ProjectedRight = ql::ranges::range_value_t<
+      ql::ranges::range_value_t<std::decay_t<RightBlocks>>>;
+  static_assert(std::is_same_v<ProjectedLeft, ProjectedRight>);
+  auto leftSide = detail::makeJoinSide(leftBlocks, ql::identity{},
+                                       std::type_identity<ProjectedLeft>{});
+  auto rightSide = detail::makeJoinSide(rightBlocks, ql::identity{},
+                                        std::type_identity<ProjectedLeft>{});
+  using LeftSide = decltype(leftSide);
+  using RightSide = decltype(rightSide);
+  detail::BlockZipperJoinImplForSpecialOptionalJoin<LeftSide, RightSide,
+                                                    CompatibleRowAction>
+      impl{leftSide, rightSide,
+           CompareAllButLast{static_cast<size_t>(numJoinColumns)},
+           compatibleRowAction};
+  impl.template runJoin<JoinType::OPTIONAL>();
+}
+
 /**
  * @brief Perform a zipper/merge join between two sorted inputs that are given
  * as blocks of inputs, e.g. `std::vector<std::vector<int>>` or
diff --git a/src/util/JoinAlgorithms/JoinColumnMapping.h b/src/util/JoinAlgorithms/JoinColumnMapping.h
index c6ef20cbec..8e65995f06 100644
--- a/src/util/JoinAlgorithms/JoinColumnMapping.h
+++ b/src/util/JoinAlgorithms/JoinColumnMapping.h
@@ -114,16 +114,96 @@ class JoinColumnMapping {
   }
 };
 
+struct GetColsFromTable {
+  template <size_t numCols, typename Table>
+  decltype(auto) operator()(Table& table) {
+    return [&table]<size_t... I>(std::index_sequence<I...>) {
+      return ::ranges::views::zip(table.getColumn(I)...) |
+             ::ranges::views::transform([](auto&& tuple) {
+               return std::apply(
+                   [](auto&... refs) { return std::array{refs...}; },
+                   AD_FWD(tuple));
+             });
+    }(std::make_index_sequence<numCols>());
+  }
+};
+
 // A class that stores a complete `IdTable`, but when being treated as a range
-// via the `begin/end/operator[]` functions, then it only gives access to the
-// first column. This is very useful for the lazy join implementations
-// (currently used in `Join.cpp`), where we need very efficient access to the
-// join column for comparing rows, but also need to store the complete table to
-// be able to write the other columns of a matching row to the result.
-// This class is templated so we can use it for `IdTable` as well as for
-// `IdTableView`.
+// via the `begin/end/operator[]` functions, then it only gives `const` access
+// to the first `numCols`(via the `GetColsFromTable` struct above). This is very
+// useful for the lazy join implementations (currently used in `Join.cpp` and
+// `OptionalJoin.cpp`), where we need very efficient access to the join column
+// for comparing rows, but also need to store the complete table to be able to
+// write the other columns of a matching row to the result. This class is
+// templated so we can use it for `IdTable` as well as for `IdTableView`.
+// Note: The current implementation always copies the columns when they are
+// accessed (as a `std::array<Id, numCols>`. The reason is, that we want
+// something with a constant size that can be iterated via a runtime for-loop.
+// `std::array` can't store references, and `std::tuple<Id&...>` can't be
+// iterated.
+// TODO<joka921> Implement an iterable tuple of the same types, but actually,
+// for only two or three columns the full arrays (which can be optimized by the
+// compiler) shouldn't be too bad..
+template <size_t numCols, typename Table>
+struct IdTableAndFirstCols {
+ private:
+  Table table_;
+  LocalVocab localVocab_;
+
+ public:
+  // Typedef needed for generic interfaces.
+  using ConstBaseIterator = ql::ranges::iterator_t<
+      decltype(GetColsFromTable{}.template operator()<numCols>(
+          std::declval<const Table&>()))>;
+  using iterator = ConstBaseIterator;
+  using const_iterator = ConstBaseIterator;
+  // Get access to the first column.
+  decltype(auto) cols() const {
+    return GetColsFromTable{}.template operator()<numCols>(table_);
+  }
+  // Construct by taking ownership of the table.
+  IdTableAndFirstCols(Table t, LocalVocab localVocab)
+      : table_{std::move(t)}, localVocab_{std::move(localVocab)} {}
+
+  // The following functions all refer to the same column.
+  const_iterator begin() const { return cols().begin(); }
+  const_iterator end() const { return cols().end(); }
+
+  bool empty() const { return cols().empty(); }
+
+  decltype(auto) operator[](size_t idx) const { return cols()[idx]; }
+  decltype(auto) front() const { return cols().front(); }
+  decltype(auto) back() const { return cols().back(); }
+
+  size_t size() const { return cols().size(); }
+
+  // Note: This function only refers to the exposed `numCols` column, not to all
+  // the columns in the underlying `Table`. This interface is currently used by
+  // the `specialOptionalJoin` function in `JoinAlgorithms.h`.
+  constexpr size_t numColumns() const { return numCols; }
+  decltype(auto) getColumn(size_t columnIndex) const {
+    return table_.getColumn(columnIndex);
+  }
+
+  // This interface is required in `Join.cpp` by the `AddCombinedRowToTable`
+  // class. Calling this function yields the same type, no matter if `Table` is
+  // `IdTable` or `IdTableView`. In addition, it refers to the full underlying
+  // table, not only to the first `numColumns` tables.
+  template <size_t I = 0>
+  IdTableView<I> asStaticView() const {
+    return table_.template asStaticView<I>();
+  }
+
+  const LocalVocab& getLocalVocab() const { return localVocab_; }
+};
+
+// Specialization of `IdTableAndFirstCol` for only a single column where we
+// don't need to copy into an `array`, but directly return single `Id&`. Note:
+// this changes the interface (in particular the single rows can't be iterated
+// over), but currently this is used by the `Join` class, which expects this
+// interface.
 template <typename Table>
-struct IdTableAndFirstCol {
+struct IdTableAndFirstCols<1, Table> {
  private:
   Table table_;
   LocalVocab localVocab_;
@@ -135,7 +215,7 @@ struct IdTableAndFirstCol {
       std::decay_t<decltype(std::as_const(table_).getColumn(0).begin())>;
 
   // Construct by taking ownership of the table.
-  IdTableAndFirstCol(Table t, LocalVocab localVocab)
+  IdTableAndFirstCols(Table t, LocalVocab localVocab)
       : table_{std::move(t)}, localVocab_{std::move(localVocab)} {}
 
   // Get access to the first column.
@@ -158,7 +238,8 @@ struct IdTableAndFirstCol {
 
   // This interface is required in `Join.cpp` by the `AddCombinedRowToTable`
   // class. Calling this function yields the same type, no matter if `Table` is
-  // `IdTable` or `IdTableView`.
+  // `IdTable` or `IdTableView`. In addition, it refers to the full underlying
+  // table, not only to the first `numColumns` tables.
   template <size_t I = 0>
   IdTableView<I> asStaticView() const {
     return table_.template asStaticView<I>();
diff --git a/test/JoinAlgorithmsTest.cpp b/test/JoinAlgorithmsTest.cpp
index 5140b59f08..b4767936e4 100644
--- a/test/JoinAlgorithmsTest.cpp
+++ b/test/JoinAlgorithmsTest.cpp
@@ -5,7 +5,11 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include <random>
+#include <set>
+
 #include "./util/GTestHelpers.h"
+#include "util/IdTableHelpers.h"
 #include "util/JoinAlgorithms/JoinAlgorithms.h"
 #include "util/TransparentFunctors.h"
 
@@ -561,3 +565,373 @@ TEST(JoinAlgorithm, DefaultIsUndefinedFunctionAlwaysReturnsFalse) {
   EXPECT_FALSE(impl.isUndefined_(I(1)));
   EXPECT_FALSE(impl.isUndefined_(Id::makeUndefined()));
 }
+
+// _____________________________________________________________________________
+// Tests for specialOptionalJoinForBlocks
+// _____________________________________________________________________________
+
+namespace {
+// Helper types for testing special optional join with Id values.
+using IdBlock = IdTable;
+using IdNestedBlock = std::vector<IdBlock>;
+using IdJoinResult = std::vector<std::array<Id, 2>>;
+
+auto makeVec = [](const auto&... tables) {
+  std::vector<IdTable> result;
+  (..., result.push_back(tables.clone()));
+  return result;
+};
+
+auto makeTable = [](const VectorTable& table) {
+  return makeIdTableFromVector(table);
+};
+
+// RowAdder for Id-based blocks.
+struct IdRowAdder {
+  const IdBlock* left_{};
+  const IdBlock* right_{};
+  IdJoinResult* target_{};
+
+  void setInput(const IdBlock& left, const IdBlock& right) {
+    left_ = &left;
+    right_ = &right;
+  }
+
+  void setOnlyLeftInputForOptionalJoin(const IdBlock& left) { left_ = &left; }
+
+  void addRow(size_t leftIndex, size_t rightIndex) {
+    auto x1 = (*left_)[leftIndex][0];
+    auto x2 = (*left_)[leftIndex][1];
+    auto y1 = (*right_)[rightIndex][0];
+    auto y2 = (*right_)[rightIndex][1];
+    AD_CONTRACT_CHECK(x1 == y1);
+    target_->push_back(std::array{x1, x2.isUndefined() ? y2 : x2});
+  }
+
+  void addOptionalRow(size_t leftIndex) {
+    auto x1 = (*left_)[leftIndex][0];
+    auto x2 = (*left_)[leftIndex][1];
+    target_->emplace_back(std::array{x1, x2});
+  }
+
+  template <typename R1, typename R2>
+  void addRows(const R1& leftIndices, const R2& rightIndices) {
+    for (auto leftIdx : leftIndices) {
+      for (auto rightIdx : rightIndices) {
+        addRow(leftIdx, rightIdx);
+      }
+    }
+  }
+
+  // Operator() for iterator-based interface (matches).
+  template <typename LeftIt, typename RightIt>
+  void operator()(LeftIt leftIt, RightIt rightIt) {
+    auto [x1, x2] = *leftIt;
+    auto [y1, y2] = *rightIt;
+    AD_CONTRACT_CHECK(x1 == y1);
+    target_->push_back(std::array{x1, x2, y2});
+  }
+
+  // Operator() for iterator-based interface (non-matches).
+  template <typename LeftIt>
+  void operator()(LeftIt leftIt) {
+    auto [x1, x2] = *leftIt;
+    target_->push_back(std::array{x1, x2, Id::makeUndefined()});
+  }
+
+  void flush() const {
+    // Does nothing, but is required for the interface.
+  }
+};
+
+auto makeIdRowAdder(IdJoinResult& target) {
+  return IdRowAdder{nullptr, nullptr, &target};
+}
+
+// Helper function for creating undefined Ids.
+auto U2() { return Id::makeUndefined(); }
+
+// Helper function to test the special optional join with blocks.
+// TODO<joka921> We have to fix the semantics for move-only IdTables...
+void testSpecialOptionalJoin(IdNestedBlock a, IdNestedBlock b,
+                             IdJoinResult expected, size_t numJoinColumns = 2,
+                             source_location l = AD_CURRENT_SOURCE_LOC()) {
+  auto trace = generateLocationTrace(l);
+  IdJoinResult result;
+  auto adder = makeIdRowAdder(result);
+
+  ad_utility::specialOptionalJoinForBlocks(std::move(a), std::move(b),
+                                           numJoinColumns, adder);
+
+  // The result must be sorted on the first column.
+  EXPECT_TRUE(ql::ranges::is_sorted(result, std::less<>{}, ad_utility::first));
+  // The exact order of the elements with the same first column is not important
+  // and depends on implementation details. We therefore do not enforce it here.
+  EXPECT_THAT(result, ::testing::UnorderedElementsAreArray(expected));
+}
+
+// Split an IdTable into multiple blocks based on split points.
+// splitPoints are indices where to split (exclusive start of next block).
+// Empty blocks are created when split points are consecutive.
+IdNestedBlock splitIdTable(const IdTable& table,
+                           const std::vector<size_t>& splitPoints) {
+  IdNestedBlock result;
+  size_t start = 0;
+
+  for (size_t splitPoint : splitPoints) {
+    AD_CONTRACT_CHECK(splitPoint <= table.numRows());
+    IdTable block(table.numColumns(), table.getAllocator());
+    block.resize(splitPoint - start);
+    for (size_t i = 0; i < splitPoint - start; ++i) {
+      for (size_t col = 0; col < table.numColumns(); ++col) {
+        block(i, col) = table(start + i, col);
+      }
+    }
+    result.push_back(std::move(block));
+    start = splitPoint;
+  }
+
+  // Add final block from last split point to end.
+  IdTable block(table.numColumns(), table.getAllocator());
+  block.resize(table.numRows() - start);
+  for (size_t i = 0; i < table.numRows() - start; ++i) {
+    for (size_t col = 0; col < table.numColumns(); ++col) {
+      block(i, col) = table(start + i, col);
+    }
+  }
+  result.push_back(std::move(block));
+
+  return result;
+}
+
+// Generate multiple split configurations for testing.
+// Returns a vector of split point vectors.
+std::vector<std::vector<size_t>> generateSplitConfigurations(
+    size_t tableSize, int numRandomSplits, int seed) {
+  std::vector<std::vector<size_t>> configs;
+
+  // Pathological case 1: All in one block (no splits).
+  configs.push_back({});
+
+  // Pathological case 2: One element per block (split after each element).
+  if (tableSize > 0) {
+    std::vector<size_t> onePerElement;
+    for (size_t i = 1; i < tableSize; ++i) {
+      onePerElement.push_back(i);
+    }
+    configs.push_back(onePerElement);
+  }
+
+  // Generate random split configurations.
+  std::mt19937 rng(seed);
+  for (int i = 0; i < numRandomSplits; ++i) {
+    if (tableSize <= 1) {
+      // Can't split tables with 0 or 1 rows in interesting ways.
+      continue;
+    }
+
+    std::vector<size_t> splits;
+    // Decide how many splits to make (between 1 and tableSize).
+    std::uniform_int_distribution<size_t> numSplitsDist(
+        1, std::min(tableSize, size_t{10}));
+    size_t numSplits = numSplitsDist(rng);
+
+    // Generate random split points.
+    std::uniform_int_distribution<size_t> splitPointDist(1, tableSize - 1);
+    std::set<size_t> splitSet;
+    for (size_t j = 0; j < numSplits; ++j) {
+      splitSet.insert(splitPointDist(rng));
+    }
+
+    // Randomly add empty blocks by duplicating some split points.
+    std::uniform_real_distribution<double> emptyBlockChance(0.0, 1.0);
+    std::vector<size_t> splitVec(splitSet.begin(), splitSet.end());
+    for (size_t splitPoint : splitVec) {
+      if (emptyBlockChance(rng) < 0.2) {  // 20% chance of empty block.
+        splitSet.insert(splitPoint);
+      }
+    }
+
+    splits.assign(splitSet.begin(), splitSet.end());
+    configs.push_back(splits);
+  }
+
+  return configs;
+}
+
+// Test the special optional join with automatic block splitting.
+// Takes single blocks as input and tests with various split configurations.
+void testSpecialOptionalJoinWithSplits(
+    const IdTable& leftTable, const IdTable& rightTable,
+    const IdJoinResult& expected, int numRandomSplits = 10,
+    size_t numJoinColumns = 2, source_location l = AD_CURRENT_SOURCE_LOC()) {
+  auto trace = generateLocationTrace(l);
+
+  // Generate split configurations for both sides.
+  auto leftConfigs =
+      generateSplitConfigurations(leftTable.numRows(), numRandomSplits, 42);
+  auto rightConfigs =
+      generateSplitConfigurations(rightTable.numRows(), numRandomSplits, 123);
+
+  // Test all combinations of left and right split configurations.
+  for (size_t leftIdx = 0; leftIdx < leftConfigs.size(); ++leftIdx) {
+    for (size_t rightIdx = 0; rightIdx < rightConfigs.size(); ++rightIdx) {
+      auto leftBlocks = splitIdTable(leftTable, leftConfigs[leftIdx]);
+      auto rightBlocks = splitIdTable(rightTable, rightConfigs[rightIdx]);
+
+      testSpecialOptionalJoin(std::move(leftBlocks), std::move(rightBlocks),
+                              expected, numJoinColumns, l);
+    }
+  }
+}
+}  // namespace
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinEmptyInputs) {
+  testSpecialOptionalJoin({}, {}, {});
+
+  auto emptyTable = IdTable(2, makeUnlimitedAllocator<Id>());
+  auto nonEmpty = makeIdTableFromVector({{I(13), I(0)}});
+  testSpecialOptionalJoin(makeVec(nonEmpty), makeVec(emptyTable),
+                          {{I(13), I(0)}});
+
+  testSpecialOptionalJoin(makeVec(emptyTable), makeVec(nonEmpty), {});
+}
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinSingleBlock) {
+  IdNestedBlock a = makeVec(
+      makeTable({{I(1), I(11)}, {I(4), I(12)}, {I(4), I(12)}, {I(42), I(14)}}));
+  IdNestedBlock b = makeVec(makeTable({{{I(0), I(24)},
+                                        {I(4), I(12)},
+                                        {I(4), I(12)},
+                                        {I(5), I(25)},
+                                        {I(19), I(26)},
+                                        {I(42), I(27)}}}));
+  IdJoinResult expectedResult{{I(1), I(11)}, {I(4), I(12)}, {I(4), I(12)},
+                              {I(4), I(12)}, {I(4), I(12)}, {I(42), I(14)}};
+  testSpecialOptionalJoin(std::move(a), std::move(b), expectedResult);
+}
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinSingleBlockWithSplits) {
+  auto leftTable =
+      makeTable({{I(1), I(11)}, {I(4), I(12)}, {I(4), I(12)}, {I(42), I(14)}});
+  auto rightTable = makeTable({{I(0), I(24)},
+                               {I(4), I(12)},
+                               {I(4), I(12)},
+                               {I(5), I(25)},
+                               {I(19), I(26)},
+                               {I(42), I(27)}});
+  IdJoinResult expectedResult{{I(1), I(11)}, {I(4), I(12)}, {I(4), I(12)},
+                              {I(4), I(12)}, {I(4), I(12)}, {I(42), I(14)}};
+  testSpecialOptionalJoinWithSplits(leftTable, rightTable, expectedResult);
+}
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinWithUndefsOnLeft) {
+  // Test that left entries with undefined in second column match right entries
+  // on first column only, and the result contains the right's second column.
+  auto leftTable = makeTable({{I(1), U2()}, {I(4), U2()}, {I(5), I(50)}});
+  auto rightTable = makeTable({{I(1), I(10)}, {I(4), I(40)}, {I(5), I(50)}});
+  IdJoinResult expectedResult{{I(1), I(10)}, {I(4), I(40)}, {I(5), I(50)}};
+  testSpecialOptionalJoinWithSplits(leftTable, rightTable, expectedResult);
+}
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinMultipleUndefsForSameFirstColumn) {
+  // Test multiple left entries with same first column and undefined second
+  // column. Each should match all right entries with that first column.
+  auto leftTable =
+      makeTable({{I(5), U2()}, {I(5), U2()}, {I(5), U2()}, {I(10), I(100)}});
+  auto rightTable = makeTable({{I(5), I(50)}, {I(5), I(51)}, {I(10), I(100)}});
+  IdJoinResult expectedResult{{I(5), I(50)},  {I(5), I(50)},
+                              {I(5), I(50)},  // 3 undefs match I(5), I(50)
+                              {I(5), I(51)},  {I(5), I(51)},
+                              {I(5), I(51)},  // 3 undefs match I(5), I(51)
+                              {I(10), I(100)}};
+  testSpecialOptionalJoinWithSplits(leftTable, rightTable, expectedResult);
+}
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinMultipleEntriesSameFirstColumn) {
+  // Test multiple entries with same first column but different second columns
+  // on both sides. Tests the cartesian product behavior.
+  auto leftTable =
+      makeTable({{I(3), I(30)}, {I(3), I(31)}, {I(3), I(32)}, {I(7), I(70)}});
+  auto rightTable = makeTable({{I(3), I(130)}, {I(3), I(131)}, {I(7), I(170)}});
+  IdJoinResult expectedResult{
+      {I(3), I(30)}, {I(3), I(31)}, {I(3), I(32)}, {I(7), I(70)}};
+  testSpecialOptionalJoinWithSplits(leftTable, rightTable, expectedResult);
+}
+
+// TODO<joka921> Currently a duplicate....
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinMultipleEntriesCartesian) {
+  // Test multiple entries with same first column but different second columns
+  // on both sides. Tests the cartesian product behavior.
+  auto leftTable =
+      makeTable({{I(3), I(30)}, {I(3), I(31)}, {I(3), I(32)}, {I(7), I(70)}});
+  auto rightTable = makeTable({{I(3), I(130)}, {I(3), I(131)}, {I(7), I(170)}});
+  IdJoinResult expectedResult{
+      {I(3), I(30)}, {I(3), I(31)}, {I(3), I(32)}, {I(7), I(70)}};
+  testSpecialOptionalJoinWithSplits(leftTable, rightTable, expectedResult);
+}
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinNoMatches) {
+  // Test when left entries have no matching right entries.
+  // All left entries should appear in result with their original values.
+  auto leftTable = makeTable({{I(1), I(10)}, {I(2), I(20)}, {I(3), I(30)}});
+  auto rightTable = makeTable({{I(5), I(50)}, {I(6), I(60)}, {I(7), I(70)}});
+  IdJoinResult expectedResult{{I(1), I(10)}, {I(2), I(20)}, {I(3), I(30)}};
+  testSpecialOptionalJoinWithSplits(leftTable, rightTable, expectedResult);
+}
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinPartialMatches) {
+  // Test mix of matching and non-matching left entries.
+  auto leftTable =
+      makeTable({{I(1), I(10)}, {I(2), I(20)}, {I(3), U2()}, {I(4), I(40)}});
+  auto rightTable = makeTable({{I(2), I(20)}, {I(3), I(30)}, {I(5), I(50)}});
+  IdJoinResult expectedResult{
+      {I(1), I(10)},  // No match, keep original.
+      {I(2), I(20)},  // Exact match.
+      {I(3), I(30)},  // Left has U2(), matches right on first column.
+      {I(4), I(40)}   // No match, keep original.
+  };
+  testSpecialOptionalJoinWithSplits(leftTable, rightTable, expectedResult);
+}
+
+// _____________________________________________________________________________
+TEST(JoinAlgorithms, SpecialOptionalJoinComplexCombination) {
+  // Comprehensive test combining all scenarios: undefs, multiples,
+  // matches/non-matches.
+  auto leftTable = makeTable({{I(1), U2()},
+                              {I(1), U2()},
+                              {I(2), I(20)},
+                              {I(3), I(30)},
+                              {I(3), I(31)},
+                              {I(4), U2()},
+                              {I(5), I(50)},
+                              {I(6), I(60)}});
+  auto rightTable = makeTable({{I(1), I(10)},
+                               {I(1), I(11)},
+                               {I(2), I(20)},
+                               {I(3), I(30)},
+                               {I(4), I(40)},
+                               {I(7), I(70)}});
+  IdJoinResult expectedResult{
+      {I(1), I(10)},  // Left I(1), U2() matches right I(1), I(10).
+      {I(1), I(10)},  // Second left I(1), U2() also matches.
+      {I(1), I(11)},  // Left I(1), U2() matches right I(1), I(11).
+      {I(1), I(11)},  // Second left I(1), U2() also matches.
+      {I(2), I(20)},  // Exact match.
+      {I(3), I(30)},  // Exact match on both columns.
+      {I(3), I(31)},  // Left I(3), I(31) doesn't match, keep original.
+      {I(4), I(40)},  // Left I(4), U2() matches right I(4), I(40).
+      {I(5), I(50)},  // No match on right, keep original.
+      {I(6), I(60)}   // No match on right, keep original.
+  };
+  testSpecialOptionalJoinWithSplits(leftTable, rightTable, expectedResult);
+}