webis-de
diff --git a/‎docs/howto/model.rst
+3-3 b/‎docs/howto/model.rst
+3-3
diff --git a/‎examples/custom_bi_encoder.py
+2-2 b/‎examples/custom_bi_encoder.py
+2-2
diff --git a/‎lightning_ir/base/model.py
+16-5 b/‎lightning_ir/base/model.py
+16-5
diff --git a/‎lightning_ir/base/module.py
+4-11 b/‎lightning_ir/base/module.py
+4-11
diff --git a/‎lightning_ir/bi_encoder/__init__.py
+5 b/‎lightning_ir/bi_encoder/__init__.py
+5
diff --git a/‎lightning_ir/bi_encoder/config.py
+38-2 b/‎lightning_ir/bi_encoder/config.py
+38-2
@@ -24,7 +24,7 @@ Say we wanted to build a custom bi-encoder model that adds an additional linear
             super().__init__(**kwargs)
             self.additional_linear_layer = additional_linear_layer
 
-Next, we need to subclass the :py:class:`lightning_ir.bi_encoder.model.BiEncoderModel` and override the :py:class:`lightning_ir.bi_encoder.model.BiEncoderModel._encode` method to include the additional linear layer. We also need to ensure that our new config class is registered with our new model as the :py:meth:`~lightning_ir.bi_encoder.model.BiEncoderModel.config_class` attribute. In the :py:class:`lightning_ir.bi_encoder.model.BiEncoderModel._encode` method, the :py:meth:`~lightning_ir.bi_encoder.model.BiEncoderModel._backbone_forward` method runs the backbone model and returns the contextualized embeddings of the input sequence. We then apply our additional linear layer to the pooled embeddings. Afterwards, the various steps of the processing pipeline for bi-encoders are applied (see :ref:`concepts-model` for more details). For example:
+Next, we need to subclass the :py:class:`lightning_ir.bi_encoder.model.BiEncoderModel` and override the :py:class:`lightning_ir.bi_encoder.model.BiEncoderModel.encode` method to include the additional linear layer. We also need to ensure that our new config class is registered with our new model as the :py:meth:`~lightning_ir.bi_encoder.model.BiEncoderModel.config_class` attribute. In the :py:class:`lightning_ir.bi_encoder.model.BiEncoderModel.encode` method, the :py:meth:`~lightning_ir.bi_encoder.model.BiEncoderModel._backbone_forward` method runs the backbone model and returns the contextualized embeddings of the input sequence. We then apply our additional linear layer to the pooled embeddings. Afterwards, the various steps of the processing pipeline for bi-encoders are applied (see :ref:`concepts-model` for more details). For example:
 
 .. code-block:: python
     
@@ -46,7 +46,7 @@ Next, we need to subclass the :py:class:`lightning_ir.bi_encoder.model.BiEncoder
                     config.hidden_size, config.hidden_size
                 )
 
-        def _encode(
+        def encode(
             self,
             encoding: BatchEncoding,
             expansion: bool = False,
@@ -62,7 +62,7 @@ Next, we need to subclass the :py:class:`lightning_ir.bi_encoder.model.BiEncoder
             embeddings = self._pooling(embeddings, encoding["attention_mask"], pooling_strategy)
             if self.config.normalize:
                 embeddings = torch.nn.functional.normalize(embeddings, dim=-1)
-            scoring_mask = self._scoring_mask(
+            scoring_mask = self.scoring_mask(
                 encoding["input_ids"],
                 encoding["attention_mask"],
                 expansion,
 
@@ -36,7 +36,7 @@ def __init__(self, config, *args, **kwargs):
         if config.additional_linear_layer:
             self.additional_linear_layer = torch.nn.Linear(config.hidden_size, config.hidden_size)
 
-    def _encode(
+    def encode(
         self,
         encoding: BatchEncoding,
         expansion: bool = False,
@@ -52,7 +52,7 @@ def _encode(
         embeddings = self._pooling(embeddings, encoding["attention_mask"], pooling_strategy)
         if self.config.normalize:
             embeddings = torch.nn.functional.normalize(embeddings, dim=-1)
-        scoring_mask = self._scoring_mask(
+        scoring_mask = self.scoring_mask(
             encoding["input_ids"],
             encoding["attention_mask"],
             expansion,
 
@@ -8,7 +8,7 @@
 from dataclasses import dataclass
 from functools import partial, wraps
 from pathlib import Path
-from typing import Any, Callable, Literal, Mapping, Sequence, Type, TypeVar
+from typing import Any, Callable, Literal, Mapping, Protocol, Sequence, Type, TypeVar
 
 import torch
 from transformers import MODEL_MAPPING, BatchEncoding, BertModel
@@ -232,10 +232,21 @@ def _cat_outputs(
     return OutputClass(**{key: _cat_outputs(value, types[key]) for key, value in agg.items()})
 
 
-def _batch_encoding(
-    func: Callable[[LightningIRModel, BatchEncoding, ...], Any]
-) -> Callable[[LightningIRModel, BatchEncoding, ...], Any]:
-    """Decorator to enable sub-batching for models that support it."""
+class BatchEncodingWrapper(Protocol):
+    def __call__(self, encoding: BatchEncoding, *args, **kwargs) -> Any: ...
+
+
+def batch_encoding_wrapper(func: BatchEncodingWrapper) -> BatchEncodingWrapper:
+    """Decorator to enable sub-batching for models that support it. Lowers the batch size of the input batch encoding
+    if the model runs out of memory.
+
+    :param func: Function to wrap that takes a batch encoding
+    :type func: BatchEncodingWrapper
+    :raises e: If CUDA runs out of memory even after lowering the batch size to 1
+    :raises ValueError: If no output was generated
+    :return: Wrapped function
+    :rtype: BatchEncodingWrapper
+    """
 
     @wraps(func)
     def wrapper(self, encoding: BatchEncoding, *args, **kwargs) -> Any:
 
@@ -165,15 +165,8 @@ def prepare_input(
             encodings[key] = encodings[key].to(self.device)
         return encodings
 
-    def compute_losses(self, batch: TrainBatch, output: LightningIROutput) -> List[torch.Tensor]:
-        """Computes the losses for the batch.
-
-        :param batch: Batch of training data
-        :type batch: TrainBatch
-        :raises NotImplementedError: Must be implemented by derived class
-        :return: List of losses, one for each loss function
-        :rtype: List[torch.Tensor]
-        """
+    def _compute_losses(self, batch: TrainBatch, output: LightningIROutput) -> List[torch.Tensor]:
+        """Computes the losses for a training batch."""
         raise NotImplementedError
 
     def training_step(self, batch: TrainBatch, batch_idx: int) -> torch.Tensor:
@@ -190,7 +183,7 @@ def training_step(self, batch: TrainBatch, batch_idx: int) -> torch.Tensor:
         if self.loss_functions is None:
             raise ValueError("Loss functions are not set")
         output = self.forward(batch)
-        losses = self.compute_losses(batch, output)
+        losses = self._compute_losses(batch, output)
         total_loss = torch.tensor(0)
         assert len(losses) == len(self.loss_functions)
         for (loss_function, loss_weight), loss in zip(self.loss_functions, losses):
@@ -205,7 +198,7 @@ def validation_step(
         """Handles the validation step for the model.
 
         :param batch: Batch of validation or testing data
-        :type batch: TrainBatch | RankBatch
+        :type batch: TrainBatch | RankBatch | SearchBatch
         :param batch_idx: Index of the batch
         :type batch_idx: int
         :param dataloader_idx: Index of the dataloader, defaults to 0
 
@@ -1,3 +1,8 @@
+"""Base module for bi-encoder models.
+
+This module provides the main classes and functions for bi-encoder models, including configurations, models,
+modules, and tokenizers."""
+
 from .config import BiEncoderConfig
 from .model import BiEncoderEmbedding, BiEncoderModel, BiEncoderOutput, ScoringFunction
 from .module import BiEncoderModule
 
@@ -1,3 +1,9 @@
+"""
+Configuration module for bi-encoder models.
+
+This module defines the configuration class used to instantiate bi-encoder models.
+"""
+
 import json
 import os
 from os import PathLike
@@ -109,22 +115,52 @@ def __init__(
         self.projection = projection
 
     def to_dict(self) -> Dict[str, Any]:
+        """Overrides the transformers.PretrainedConfig.to_dict_ method to include the added arguments, the backbone
+        model type, and remove the mask scoring tokens.
+
+        .. _transformers.PretrainedConfig.to_dict: \
+https://huggingface.co/docs/transformers/en/main_classes/configuration#transformers.PretrainedConfig.to_dict
+
+        :return: Configuration dictionary
+        :rtype: Dict[str, Any]
+        """
+
         output = super().to_dict()
         if "query_mask_scoring_tokens" in output:
             output.pop("query_mask_scoring_tokens")
         if "doc_mask_scoring_tokens" in output:
             output.pop("doc_mask_scoring_tokens")
         return output
 
-    def save_pretrained(self, save_directory: str | PathLike, push_to_hub: bool = False, **kwargs):
+    def save_pretrained(self, save_directory: str | PathLike, **kwargs) -> None:
+        """Overrides the transformers.PretrainedConfig.save_pretrained_ method to addtionally save the tokens which
+        should be maksed during scoring.
+
+        .. _transformers.PretrainedConfig.save_pretrained: \
+https://huggingface.co/docs/transformers/en/main_classes/configuration#transformers.PretrainedConfig.save_pretrained
+
+        :param save_directory: Directory to save the configuration
+        :type save_directory: str | PathLike
+        """
         with open(os.path.join(save_directory, "mask_scoring_tokens.json"), "w") as f:
             json.dump({"query": self.query_mask_scoring_tokens, "doc": self.doc_mask_scoring_tokens}, f)
-        return super().save_pretrained(save_directory, push_to_hub, **kwargs)
+        return super().save_pretrained(save_directory, **kwargs)
 
     @classmethod
     def get_config_dict(
         cls, pretrained_model_name_or_path: str | PathLike, **kwargs
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        """Overrides the transformers.PretrainedConfig.get_config_dict_ method to load the tokens that should be masked
+        during scoring.
+
+        .. _transformers.PretrainedConfig.get_config_dict: \
+https://huggingface.co/docs/transformers/en/main_classes/configuration#transformers.PretrainedConfig.get_config_dict
+
+        :param pretrained_model_name_or_path: Name or path of the pretrained model
+        :type pretrained_model_name_or_path: str | PathLike
+        :return: Configuration dictionary and additional keyword arguments
+        :rtype: Tuple[Dict[str, Any], Dict[str, Any]]
+        """
         config_dict, kwargs = super().get_config_dict(pretrained_model_name_or_path, **kwargs)
         mask_scoring_tokens = None
         mask_scoring_tokens_path = os.path.join(pretrained_model_name_or_path, "mask_scoring_tokens.json")