datawhalechina
diff --git a/‎tests/test_inbatch_sampling.py‎
Lines changed: 78 additions & 0 deletions b/‎tests/test_inbatch_sampling.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎torch_rechub/basic/loss_func.py‎
Lines changed: 10 additions & 4 deletions b/‎torch_rechub/basic/loss_func.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎torch_rechub/models/matching/narm.py‎
Lines changed: 43 additions & 20 deletions b/‎torch_rechub/models/matching/narm.py‎
Lines changed: 43 additions & 20 deletions
diff --git a/‎torch_rechub/models/matching/sasrec.py‎
Lines changed: 55 additions & 5 deletions b/‎torch_rechub/models/matching/sasrec.py‎
Lines changed: 55 additions & 5 deletions
diff --git a/‎torch_rechub/models/matching/stamp.py‎
Lines changed: 43 additions & 15 deletions b/‎torch_rechub/models/matching/stamp.py‎
Lines changed: 43 additions & 15 deletions
@@ -0,0 +1,78 @@
+import numpy as np
+import pandas as pd
+import torch
+
+from torch_rechub.basic.features import SequenceFeature, SparseFeature
+from torch_rechub.models.matching import DSSM
+from torch_rechub.trainers import MatchTrainer
+from torch_rechub.utils.data import MatchDataGenerator, df_to_dict
+from torch_rechub.utils.match import gather_inbatch_logits, gen_model_input, generate_seq_feature_match, inbatch_negative_sampling
+
+
+def test_inbatch_negative_sampling_random_and_uniform():
+    scores = torch.zeros((4, 4))
+    neg_idx = inbatch_negative_sampling(scores, neg_ratio=2, generator=torch.Generator().manual_seed(0))
+    logits = gather_inbatch_logits(scores, neg_idx)
+    assert logits.shape == (4, 3)
+    assert neg_idx.shape == (4, 2)
+    for row, sampled in enumerate(neg_idx):
+        assert row not in sampled.tolist()
+
+    # Different seed should give different permutations to ensure randomness
+    neg_idx_second = inbatch_negative_sampling(scores, neg_ratio=2, generator=torch.Generator().manual_seed(1))
+    assert not torch.equal(neg_idx, neg_idx_second)
+
+
+def test_inbatch_negative_sampling_hard_negative():
+    scores = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 0.0]])
+    neg_idx = inbatch_negative_sampling(scores, neg_ratio=1, hard_negative=True)
+    # highest non-diagonal scores for each row
+    assert torch.equal(neg_idx.squeeze(1), torch.tensor([2, 2, 1]))
+
+
+def _build_small_match_dataloader():
+    n_users, n_items, n_samples = 12, 24, 80
+    data = pd.DataFrame({
+        "user_id": np.random.randint(0,
+                                     n_users,
+                                     n_samples),
+        "item_id": np.random.randint(0,
+                                     n_items,
+                                     n_samples),
+        "time": np.arange(n_samples),
+    })
+    user_profile = pd.DataFrame({"user_id": np.arange(n_users)})
+    item_profile = pd.DataFrame({"item_id": np.arange(n_items)})
+
+    df_train, _ = generate_seq_feature_match(data, "user_id", "item_id", "time", mode=0, neg_ratio=0)
+    x_train = gen_model_input(df_train, user_profile, "user_id", item_profile, "item_id", seq_max_len=8)
+    # labels are unused in in-batch mode; keep zero array for shape alignment
+    y_train = np.zeros(len(df_train))
+
+    user_features = [
+        SparseFeature("user_id",
+                      n_users,
+                      embed_dim=8),
+        SequenceFeature("hist_item_id",
+                        n_items,
+                        embed_dim=8,
+                        pooling="mean",
+                        shared_with="item_id"),
+    ]
+    item_features = [SparseFeature("item_id", n_items, embed_dim=8)]
+
+    dg = MatchDataGenerator(x_train, y_train)
+    train_dl, _, _ = dg.generate_dataloader(x_train, df_to_dict(item_profile), batch_size=8, num_workers=0)
+
+    model = DSSM(user_features, item_features, user_params={"dims": [16]}, item_params={"dims": [16]})
+    return train_dl, model
+
+
+def test_match_trainer_inbatch_flow_runs_and_updates():
+    train_dl, model = _build_small_match_dataloader()
+
+    trainer = MatchTrainer(model, mode=0, in_batch_neg=True, in_batch_neg_ratio=3, sampler_seed=2, n_epoch=1, device="cpu")
+    trainer.train_one_epoch(train_dl, log_interval=100)
+
+    grads = [p.grad for p in model.parameters() if p.requires_grad]
+    assert any(g is not None for g in grads)
@@ -81,7 +81,8 @@ def __init__(self, margin=2, num_items=None):
         self.margin = margin
         self.n_items = num_items
 
-    def forward(self, pos_score, neg_score):
+    def forward(self, pos_score, neg_score, in_batch_neg=False):
+        pos_score = pos_score.view(-1)
         loss = torch.maximum(torch.max(neg_score, dim=-1).values - pos_score + self.margin, torch.tensor([0]).type_as(pos_score))
         if self.n_items is not None:
             impostors = neg_score - pos_score.view(-1, 1) + self.margin > 0
@@ -96,9 +97,14 @@ class BPRLoss(torch.nn.Module):
     def __init__(self):
         super().__init__()
 
-    def forward(self, pos_score, neg_score):
-        loss = torch.mean(-(pos_score - neg_score).sigmoid().log(), dim=-1)
-        return loss
+    def forward(self, pos_score, neg_score, in_batch_neg=False):
+        pos_score = pos_score.view(-1)
+        if neg_score.dim() == 1:
+            diff = pos_score - neg_score
+        else:
+            diff = pos_score.view(-1, 1) - neg_score
+        loss = -diff.sigmoid().log()
+        return loss.mean()
 
 
 class NCELoss(torch.nn.Module):
 
@@ -17,12 +17,14 @@
 
 class NARM(nn.Module):
 
-    def __init__(self, item_history_feature, hidden_dim, emb_dropout_p, session_rep_dropout_p):
+    def __init__(self, item_history_feature, hidden_dim, emb_dropout_p, session_rep_dropout_p, item_feature=None):
         super(NARM, self).__init__()
 
         # item embedding layer
         self.item_history_feature = item_history_feature
+        self.item_feature = item_feature  # Optional: for in-batch negative sampling
         self.item_emb = Embedding(item_history_feature.vocab_size, item_history_feature.embed_dim, padding_idx=0)
+        self.mode = None  # For inference: "user" or "item"
 
         # embedding dropout layer
         self.emb_dropout = Dropout(emb_dropout_p)
@@ -42,41 +44,62 @@ def __init__(self, item_history_feature, hidden_dim, emb_dropout_p, session_rep_
         # bilinear projection matrix
         self.b = Parameter(torch.randn(item_history_feature.embed_dim, hidden_dim * 2))
 
-    def forward(self, input_dict):
-        # Eq. 1-4, index item embeddings and pass through gru
-        # # Fetch the embeddings for items in the session
+    def _compute_session_repr(self, input_dict):
+        """Compute session representation (user embedding before bilinear transform)."""
         input = input_dict[self.item_history_feature.name]
         value_mask = (input != 0)
         value_counts = value_mask.sum(dim=1, keepdim=False).to("cpu").detach()
         embs = rnn_utils.pack_padded_sequence(self.emb_dropout(self.item_emb(input)), value_counts, batch_first=True, enforce_sorted=False)
 
-        # # compute hidden states at each time step
         h, h_t = self.gru(embs)
         h_t = h_t.permute(1, 0, 2)
         h, _ = rnn_utils.pad_packed_sequence(h, batch_first=True)
 
-        # Eq. 5, set last hidden state of gru as the output of the global
-        # encoder
         c_g = h_t.squeeze(1)
-
-        # Eq. 8, compute similarity between final hidden state and previous
-        # hidden states
         q = sigmoid(h_t @ self.a_1.T + h @ self.a_2.T) @ self.v
-
-        # Eq. 7, compute attention
         alpha = torch.exp(q) * value_mask.unsqueeze(-1)
         alpha /= alpha.sum(dim=1, keepdim=True)
-
-        # Eq. 6, compute the output of the local encoder
         c_l = (alpha * h).sum(1)
 
-        # Eq. 9, compute session representation by concatenating user
-        # sequential behavior (global) and main purpose in the current session
-        # (local)
         c = self.session_rep_dropout(torch.hstack((c_g, c_l)))
+        return c
+
+    def user_tower(self, x):
+        """Compute user embedding for in-batch negative sampling."""
+        if self.mode == "item":
+            return None
+        c = self._compute_session_repr(x)
+        user_emb = c @ self.b.T  # [batch_size, embed_dim]
+        if self.mode == "user":
+            return user_emb
+        return user_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+
+    def item_tower(self, x):
+        """Compute item embedding for in-batch negative sampling."""
+        if self.mode == "user":
+            return None
+        if self.item_feature is not None:
+            item_ids = x[self.item_feature.name]
+            item_emb = self.item_emb(item_ids)  # [batch_size, embed_dim]
+            if self.mode == "item":
+                return item_emb
+            return item_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+        return None
 
-        # Eq. 10, compute bilinear similarity between current session and each
-        # candidate items
+    def forward(self, input_dict):
+        # Support inference mode
+        if self.mode == "user":
+            return self.user_tower(input_dict)
+        if self.mode == "item":
+            return self.item_tower(input_dict)
+
+        # In-batch negative sampling mode
+        if self.item_feature is not None:
+            user_emb = self.user_tower(input_dict)  # [batch_size, 1, embed_dim]
+            item_emb = self.item_tower(input_dict)  # [batch_size, 1, embed_dim]
+            return torch.mul(user_emb, item_emb).sum(dim=-1).squeeze()
+
+        # Original behavior: compute scores for all items
+        c = self._compute_session_repr(input_dict)
         s = c @ self.b.T @ self.item_emb.weight.T
-
         return s
@@ -21,6 +21,7 @@ class SASRec(torch.nn.Module):
         max_len: The length of the sequence feature.
         num_blocks: The number of stacks of attention modules.
         num_heads: The number of heads in MultiheadAttention.
+        item_feature: Optional item feature for in-batch negative sampling mode.
 
     """
 
@@ -31,9 +32,15 @@ def __init__(
         dropout_rate=0.5,
         num_blocks=2,
         num_heads=1,
+        item_feature=None,
     ):
         super(SASRec, self).__init__()
 
+        self.features = features
+        self.item_feature = item_feature  # Optional: for in-batch negative sampling
+        self.mode = None  # For inference: "user" or "item"
+        self.max_len = max_len
+
         self.features = features
 
         self.item_num = self.features[0].vocab_size
@@ -94,17 +101,60 @@ def seq_forward(self, x, embed_x_feature):
 
         return seq_output
 
+    def user_tower(self, x):
+        """Compute user embedding for in-batch negative sampling.
+        Takes the last valid position's output as user representation.
+        """
+        if self.mode == "item":
+            return None
+        # Get sequence embedding
+        seq_embed = self.item_emb(x, self.features[:1])[:, 0]  # Only use seq feature
+        seq_output = self.seq_forward(x, seq_embed)  # [batch_size, max_len, embed_dim]
+
+        # Get the last valid position for each sequence
+        seq = x['seq']
+        seq_lens = (seq != 0).sum(dim=1) - 1  # Last valid index
+        seq_lens = seq_lens.clamp(min=0)
+        batch_idx = torch.arange(seq_output.size(0), device=seq_output.device)
+        user_emb = seq_output[batch_idx, seq_lens]  # [batch_size, embed_dim]
+
+        if self.mode == "user":
+            return user_emb
+        return user_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+
+    def item_tower(self, x):
+        """Compute item embedding for in-batch negative sampling."""
+        if self.mode == "user":
+            return None
+        if self.item_feature is not None:
+            item_ids = x[self.item_feature.name]
+            # Use the embedding layer to get item embeddings
+            item_emb = self.item_emb.embedding[self.features[0].name](item_ids)
+            if self.mode == "item":
+                return item_emb
+            return item_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+        return None
+
     def forward(self, x):
-        # (batch_size, 3, max_len, embed_dim)
+        # Support inference mode
+        if self.mode == "user":
+            return self.user_tower(x)
+        if self.mode == "item":
+            return self.item_tower(x)
+
+        # In-batch negative sampling mode
+        if self.item_feature is not None:
+            user_emb = self.user_tower(x)  # [batch_size, 1, embed_dim]
+            item_emb = self.item_tower(x)  # [batch_size, 1, embed_dim]
+            return torch.mul(user_emb, item_emb).sum(dim=-1).squeeze()
+
+        # Original behavior: pairwise loss with pos/neg sequences
         embedding = self.item_emb(x, self.features)
-        # (batch_size, max_len, embed_dim)
         seq_embed, pos_embed, neg_embed = embedding[:, 0], embedding[:, 1], embedding[:, 2]
-
-        # (batch_size, max_len, embed_dim)
         seq_output = self.seq_forward(x, seq_embed)
 
         pos_logits = (seq_output * pos_embed).sum(dim=-1)
-        neg_logits = (seq_output * neg_embed).sum(dim=-1)  # (batch_size, max_len)
+        neg_logits = (seq_output * neg_embed).sum(dim=-1)
 
         return pos_logits, neg_logits
 
 
@@ -14,13 +14,15 @@
 
 class STAMP(nn.Module):
 
-    def __init__(self, item_history_feature, weight_std, emb_std):
+    def __init__(self, item_history_feature, weight_std, emb_std, item_feature=None):
         super(STAMP, self).__init__()
 
         # item embedding layer
         self.item_history_feature = item_history_feature
+        self.item_feature = item_feature  # Optional: for in-batch negative sampling
         n_items, item_emb_dim, = item_history_feature.vocab_size, item_history_feature.embed_dim
         self.item_emb = nn.Embedding(n_items, item_emb_dim, padding_idx=0)
+        self.mode = None  # For inference: "user" or "item"
 
         # weights and biases for attention computation
         self.w_0 = nn.Parameter(torch.zeros(item_emb_dim, 1))
@@ -50,32 +52,58 @@ def _init_module_weights(self, module):
         elif isinstance(module, nn.Embedding):
             module.weight.data.normal_(std=self.emb_std)
 
-    def forward(self, input_dict):
-        # Index the embeddings for the items in the session
+    def _compute_user_repr(self, input_dict):
+        """Compute user representation (h_s * h_t)."""
         input = input_dict[self.item_history_feature.name]
         value_mask = (input != 0).unsqueeze(-1)
         value_counts = value_mask.sum(dim=1, keepdim=True).squeeze(-1)
         item_emb_batch = self.item_emb(input) * value_mask
 
-        # Index the embeddings of the latest clicked items
         x_t = self.item_emb(torch.gather(input, 1, value_counts - 1))
-
-        # Eq. 2, user's general interest in the current session
         m_s = ((item_emb_batch).sum(1) / value_counts).unsqueeze(1)
 
-        # Eq. 7, compute attention coefficient
         a = F.normalize(torch.exp(torch.sigmoid(item_emb_batch @ self.w_1_t + x_t @ self.w_2_t + m_s @ self.w_3_t + self.b_a) @ self.w_0) * value_mask, p=1, dim=1)
-
-        # Eq. 8, compute user's attention-based interests
         m_a = (a * item_emb_batch).sum(1) + m_s.squeeze(1)
 
-        # Eq. 3, compute the output state of the general interest
         h_s = self.f_s(m_a)
-
-        # Eq. 9, compute the output state of the short-term interest
         h_t = self.f_t(x_t).squeeze(1)
+        return h_s * h_t  # [batch_size, embed_dim]
+
+    def user_tower(self, x):
+        """Compute user embedding for in-batch negative sampling."""
+        if self.mode == "item":
+            return None
+        user_emb = self._compute_user_repr(x)
+        if self.mode == "user":
+            return user_emb
+        return user_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+
+    def item_tower(self, x):
+        """Compute item embedding for in-batch negative sampling."""
+        if self.mode == "user":
+            return None
+        if self.item_feature is not None:
+            item_ids = x[self.item_feature.name]
+            item_emb = self.item_emb(item_ids)  # [batch_size, embed_dim]
+            if self.mode == "item":
+                return item_emb
+            return item_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+        return None
 
-        # Eq. 4, compute candidate scores
-        z = h_s * h_t @ self.item_emb.weight.T
-
+    def forward(self, input_dict):
+        # Support inference mode
+        if self.mode == "user":
+            return self.user_tower(input_dict)
+        if self.mode == "item":
+            return self.item_tower(input_dict)
+
+        # In-batch negative sampling mode
+        if self.item_feature is not None:
+            user_emb = self.user_tower(input_dict)  # [batch_size, 1, embed_dim]
+            item_emb = self.item_tower(input_dict)  # [batch_size, 1, embed_dim]
+            return torch.mul(user_emb, item_emb).sum(dim=-1).squeeze()
+
+        # Original behavior: compute scores for all items
+        user_repr = self._compute_user_repr(input_dict)
+        z = user_repr @ self.item_emb.weight.T
         return z