Add ComputeAttributionHelper function for new output format (#1308)

Chenyu Liu · facebook-github-bot · commit 692e27ee2b91 · 2022-07-22T00:25:14.000-07:00
Summary: Pull Request resolved: #1308 # Reformat Attribution Output We will apply performance improvements to private attribution product (game) by changing the format of attribution result. For this we will need to make changes to both private attribution and private aggregation stages. The original format of attribution result is: { "last_click_1d": { "default": { "0": [ { "is_attributed": true }, { "is_attributed": false }, { "is_attributed": false }, { "is_attributed": false }, { "is_attributed": false } ] } } } Proposed format: [ {ad_id, conversion_value, is_attributed}, {ad_id, conversion_value, is_attributed}, {ad_id, conversion_value, is_attributed}, {ad_id, conversion_value, is_attributed}, ] The design plan: https://docs.google.com/document/d/1QyBtCkTeZA8IXAkok0n8EhfCZeLTU0SSN1VL57vjBCo/edit?usp=sharing # This Diff Add ComputAttributionHelperV2 function for new output format. # This Stack 1. Add a flag to validate whether to use new vs old output format in Private Attribution. 2. Modify PCS stage for attribution with the new flag. 3. Parse the input for new output format. 4. Add a new output format file in attribution game. 5. **Add ComputAttributionHelperV2 function for new output format.** 6. Update computeAttributions function. 7. Update unit tests for PCF2 Attribution logic. 8. Add json test files for new output format. 9. Modify revealXORedResult method for new output. 10. Add unit tests for new output format - testCorrectness. Reviewed By: chualynn Differential Revision: D37765615 fbshipit-source-id: 858302db5de13708daccb28a17591388d4b1ff88
diff --git a/fbpcs/emp_games/pcf2_attribution/AttributionGame.h b/fbpcs/emp_games/pcf2_attribution/AttributionGame.h
@@ -90,6 +90,20 @@ class AttributionGame : public fbpcf::frontend::MpcGame<schedulerId> {
       const std::vector<std::vector<SecTimestamp<schedulerId, usingBatch>>>&
           thresholds,
       size_t batchSize);
+
+  const std::vector<AttributionReformattedOutputFmt<schedulerId, usingBatch>>
+  computeAttributionsHelperV2(
+      const std::vector<
+          PrivateTouchpoint<schedulerId, usingBatch, inputEncryption>>&
+          touchpoints,
+      const std::vector<
+          PrivateConversion<schedulerId, usingBatch, inputEncryption>>&
+          conversions,
+      const AttributionRule<schedulerId, usingBatch, inputEncryption>&
+          attributionRule,
+      const std::vector<std::vector<SecTimestamp<schedulerId, usingBatch>>>&
+          thresholds,
+      size_t batchSize);
 };
 
 } // namespace pcf2_attribution
diff --git a/fbpcs/emp_games/pcf2_attribution/AttributionGame_impl.h b/fbpcs/emp_games/pcf2_attribution/AttributionGame_impl.h
@@ -209,88 +209,212 @@ AttributionGame<schedulerId, usingBatch, inputEncryption>::
   // know that it is the preferred touchpoint as well.
   // Thus at the end we will get the fully reversed attribution match vector of
   // conversions and touchpoints.
-  if (FLAGS_use_new_output_format) {
-    // ToDo: Implement logic for generating attribution output in new format.
-  } else {
-    for (auto conversion = conversions.rbegin();
-         conversion != conversions.rend();
-         ++conversion) {
-      auto conv = *conversion;
+  for (auto conversion = conversions.rbegin(); conversion != conversions.rend();
+       ++conversion) {
+    auto conv = *conversion;
+
+    if constexpr (usingBatch) {
+      OMNISCIENT_ONLY_XLOGF(
+          DBG,
+          "Computing attributions for conversions: {}",
+          common::vecToString(
+              conv.ts.openToParty(common::PUBLISHER).getValue()));
+    } else {
+      OMNISCIENT_ONLY_XLOGF(
+          DBG,
+          "Computing attributions for conversion: {}",
+          conv.ts.openToParty(common::PUBLISHER).getValue());
+    }
+
+    // store if conversion has already been attributed
+    SecBit<schedulerId, usingBatch> hasAttributedTouchpoint;
+    if constexpr (usingBatch) {
+      hasAttributedTouchpoint = SecBit<schedulerId, usingBatch>{
+          std::vector<bool>(batchSize, false), common::PUBLISHER};
+    } else {
+      hasAttributedTouchpoint =
+          SecBit<schedulerId, usingBatch>{false, common::PUBLISHER};
+    }
+
+    CHECK_EQ(touchpoints.size(), thresholds.size())
+        << "touchpoints and thresholds are not the same length.";
+
+    for (size_t i = touchpoints.size(); i >= 1; --i) {
+      auto tp = touchpoints.at(i - 1);
+      auto threshold = thresholds.at(i - 1);
 
       if constexpr (usingBatch) {
         OMNISCIENT_ONLY_XLOGF(
             DBG,
-            "Computing attributions for conversions: {}",
+            "Checking touchpoints: {}",
             common::vecToString(
-                conv.ts.openToParty(common::PUBLISHER).getValue()));
+                tp.ts.openToParty(common::PUBLISHER).getValue()));
       } else {
         OMNISCIENT_ONLY_XLOGF(
             DBG,
-            "Computing attributions for conversion: {}",
-            conv.ts.openToParty(common::PUBLISHER).getValue());
+            "Checking touchpoint: {}",
+            tp.ts.openToParty(common::PUBLISHER).getValue());
       }
 
-      // store if conversion has already been attributed
-      SecBit<schedulerId, usingBatch> hasAttributedTouchpoint;
+      auto isTouchpointAttributable =
+          attributionRule.isAttributable(tp, conv, threshold);
+
+      auto isAttributed = isTouchpointAttributable & !hasAttributedTouchpoint;
+
+      hasAttributedTouchpoint = isAttributed | hasAttributedTouchpoint;
+
       if constexpr (usingBatch) {
-        hasAttributedTouchpoint = SecBit<schedulerId, usingBatch>{
-            std::vector<bool>(batchSize, false), common::PUBLISHER};
+        OMNISCIENT_ONLY_XLOGF(
+            DBG,
+            "isTouchpointAttributable={}, isAttributed={}, hasAttributedTouchpoint={}",
+            common::vecToString(
+                isTouchpointAttributable.extractBit().getValue()),
+            common::vecToString(isAttributed.extractBit().getValue()),
+            common::vecToString(
+                hasAttributedTouchpoint.extractBit().getValue()));
       } else {
-        hasAttributedTouchpoint =
-            SecBit<schedulerId, usingBatch>{false, common::PUBLISHER};
+        OMNISCIENT_ONLY_XLOGF(
+            DBG,
+            "isTouchpointAttributable={}, isAttributed={}, hasAttributedTouchpoint={}",
+            isTouchpointAttributable.extractBit().getValue(),
+            isAttributed.extractBit().getValue(),
+            hasAttributedTouchpoint.extractBit().getValue());
       }
 
-      CHECK_EQ(touchpoints.size(), thresholds.size())
-          << "touchpoints and thresholds are not the same length.";
-
-      for (size_t i = touchpoints.size(); i >= 1; --i) {
-        auto tp = touchpoints.at(i - 1);
-        auto threshold = thresholds.at(i - 1);
-
-        if constexpr (usingBatch) {
-          OMNISCIENT_ONLY_XLOGF(
-              DBG,
-              "Checking touchpoints: {}",
-              common::vecToString(
-                  tp.ts.openToParty(common::PUBLISHER).getValue()));
-        } else {
-          OMNISCIENT_ONLY_XLOGF(
-              DBG,
-              "Checking touchpoint: {}",
-              tp.ts.openToParty(common::PUBLISHER).getValue());
-        }
+      attributions.push_back(isAttributed);
+    }
+  }
+  std::reverse(attributions.begin(), attributions.end());
+  return attributions;
+}
 
-        auto isTouchpointAttributable =
-            attributionRule.isAttributable(tp, conv, threshold);
-
-        auto isAttributed = isTouchpointAttributable & !hasAttributedTouchpoint;
-
-        hasAttributedTouchpoint = isAttributed | hasAttributedTouchpoint;
-
-        if constexpr (usingBatch) {
-          OMNISCIENT_ONLY_XLOGF(
-              DBG,
-              "isTouchpointAttributable={}, isAttributed={}, hasAttributedTouchpoint={}",
-              common::vecToString(
-                  isTouchpointAttributable.extractBit().getValue()),
-              common::vecToString(isAttributed.extractBit().getValue()),
-              common::vecToString(
-                  hasAttributedTouchpoint.extractBit().getValue()));
-        } else {
-          OMNISCIENT_ONLY_XLOGF(
-              DBG,
-              "isTouchpointAttributable={}, isAttributed={}, hasAttributedTouchpoint={}",
-              isTouchpointAttributable.extractBit().getValue(),
-              isAttributed.extractBit().getValue(),
-              hasAttributedTouchpoint.extractBit().getValue());
-        }
+template <
+    int schedulerId,
+    bool usingBatch,
+    common::InputEncryption inputEncryption>
+const std::vector<AttributionReformattedOutputFmt<schedulerId, usingBatch>>
+AttributionGame<schedulerId, usingBatch, inputEncryption>::
+    computeAttributionsHelperV2(
+        const std::vector<
+            PrivateTouchpoint<schedulerId, usingBatch, inputEncryption>>&
+            touchpoints,
+        const std::vector<
+            PrivateConversion<schedulerId, usingBatch, inputEncryption>>&
+            conversions,
+        const AttributionRule<schedulerId, usingBatch, inputEncryption>&
+            attributionRule,
+        const std::vector<std::vector<SecTimestamp<schedulerId, usingBatch>>>&
+            thresholds,
+        size_t batchSize) {
+  if constexpr (usingBatch) {
+    if (batchSize == 0) {
+      throw std::invalid_argument(
+          "Must provide positive batch size for batch execution!");
+    }
+  }
+  std::vector<AttributionReformattedOutputFmt<schedulerId, usingBatch>>
+      attributionsOutput;
+  // We will be attributing on a sorted vector of touchpoints and conversions
+  // (based on timestamps).
+  // The preferred touchpoint for a conversion will be a valid attributable
+  // touchpoint with nearest timestamp to the conversion. In order to compute
+  // this efficiently, we will traverse backwards on both conversion and
+  // touchpoint vector. So that when we find a valid attributable touchpoint, we
+  // know that it is the preferred touchpoint as well.
+  // Thus at the end we will get the fully reversed attribution match vector of
+  // conversions and touchpoints.
+  for (auto conversion = conversions.rbegin(); conversion != conversions.rend();
+       ++conversion) {
+    auto conv = *conversion;
+
+    if constexpr (usingBatch) {
+      OMNISCIENT_ONLY_XLOGF(
+          DBG,
+          "Computing attributions for conversions: {}",
+          common::vecToString(
+              conv.ts.openToParty(common::PUBLISHER).getValue()));
+    } else {
+      OMNISCIENT_ONLY_XLOGF(
+          DBG,
+          "Computing attribution for conversion: {}",
+          conv.ts.openToParty(common::PUBLISHER).getValue());
+    }
+
+    // store if conversion has already been attributed
+    SecBit<schedulerId, usingBatch> hasAttributedTouchpoint;
+    if constexpr (usingBatch) {
+      hasAttributedTouchpoint = SecBit<schedulerId, usingBatch>{
+          std::vector<bool>(batchSize, false), common::PUBLISHER};
+    } else {
+      hasAttributedTouchpoint =
+          SecBit<schedulerId, usingBatch>{false, common::PUBLISHER};
+    }
+
+    CHECK_EQ(touchpoints.size(), thresholds.size())
+        << "touchpoints and thresholds are not the same length.";
+
+    SecAdId<schedulerId, usingBatch> attributedAdId;
+    uint64_t defaultAdId = 0;
+    if constexpr (usingBatch) {
+      // initialize the ad_id to be 0, is_attributed to be false:
+      attributedAdId = SecAdId<schedulerId, usingBatch>{
+          std::vector<uint64_t>(batchSize, defaultAdId), common::PUBLISHER};
+    } else {
+      attributedAdId =
+          SecAdId<schedulerId, usingBatch>(defaultAdId, common::PUBLISHER);
+    }
+    for (size_t i = touchpoints.size(); i >= 1; --i) {
+      auto tp = touchpoints.at(i - 1);
+      auto threshold = thresholds.at(i - 1);
 
-        attributions.push_back(isAttributed);
+      if constexpr (usingBatch) {
+        OMNISCIENT_ONLY_XLOGF(
+            DBG,
+            "Checking touchpoints: {}",
+            common::vecToString(
+                tp.ts.openToParty(common::PUBLISHER).getValue()));
+      } else {
+        OMNISCIENT_ONLY_XLOGF(
+            DBG,
+            "Checking touchpoint: {}",
+            tp.ts.openToParty(common::PUBLISHER).getValue());
       }
+
+      auto isTouchpointAttributable =
+          attributionRule.isAttributable(tp, conv, threshold);
+
+      auto isAttributed = isTouchpointAttributable & !hasAttributedTouchpoint;
+
+      hasAttributedTouchpoint = isAttributed | hasAttributedTouchpoint;
+
+      if constexpr (usingBatch) {
+        OMNISCIENT_ONLY_XLOGF(
+            DBG,
+            "isTouchpointAttributable={}, isAttributed={}, hasAttributedTouchpoint={}",
+            common::vecToString(
+                isTouchpointAttributable.extractBit().getValue()),
+            common::vecToString(isAttributed.extractBit().getValue()),
+            common::vecToString(
+                hasAttributedTouchpoint.extractBit().getValue()));
+      } else {
+        OMNISCIENT_ONLY_XLOGF(
+            DBG,
+            "isTouchpointAttributable={}, isAttributed={}, hasAttributedTouchpoint={}",
+            isTouchpointAttributable.extractBit().getValue(),
+            isAttributed.extractBit().getValue(),
+            hasAttributedTouchpoint.extractBit().getValue());
+      }
+
+      attributedAdId = attributedAdId.mux(isAttributed, tp.adId);
     }
-    std::reverse(attributions.begin(), attributions.end());
+    attributionsOutput.push_back(
+        AttributionReformattedOutputFmt<schedulerId, usingBatch>{
+            .ad_id = attributedAdId,
+            .conv_value = conv.convValue,
+            .is_attributed = hasAttributedTouchpoint});
   }
-  return attributions;
+  std::reverse(attributionsOutput.begin(), attributionsOutput.end());
+  return attributionsOutput;
 }
 
 template <
diff --git a/fbpcs/emp_games/pcf2_attribution/test/AttributionGameTest.cpp b/fbpcs/emp_games/pcf2_attribution/test/AttributionGameTest.cpp
@@ -245,23 +245,6 @@ TEST(AttributionGameTest, TestAttributionLogicPlaintext) {
       thresholdsLastTouch1D,
       1);
 
-  FLAGS_use_new_output_format = true;
-  auto computeAttributionLastClick1DNewOutputFormat =
-      game.computeAttributionsHelper(
-          privateTouchpoints.at(0),
-          privateConversions.at(0),
-          *lastClick1D,
-          thresholdsLastClick1D,
-          1);
-
-  auto computeAttributionLastTouch1DNewOutputFormat =
-      game.computeAttributionsHelper(
-          privateTouchpoints.at(0),
-          privateConversions.at(0),
-          *lastTouch1D,
-          thresholdsLastTouch1D,
-          1);
-
   for (size_t i = 0; i < attributionResultsLastClick1D.size(); ++i) {
     EXPECT_EQ(
         computeAttributionLastClick1D.at(i)
@@ -277,10 +260,6 @@ TEST(AttributionGameTest, TestAttributionLogicPlaintext) {
             .getValue(),
         attributionResultsLastTouch1D.at(i));
   }
-
-  EXPECT_EQ(computeAttributionLastClick1DNewOutputFormat.size(), 0);
-
-  EXPECT_EQ(computeAttributionLastTouch1DNewOutputFormat.size(), 0);
 }
 
 TEST(AttributionGameTest, TestAttributionLogicPlaintextBatch) {
@@ -354,23 +333,6 @@ TEST(AttributionGameTest, TestAttributionLogicPlaintextBatch) {
       thresholdsLastTouch1D,
       batchSize);
 
-  FLAGS_use_new_output_format = true;
-  auto computeAttributionLastClick1DNewOutputFormat =
-      game.computeAttributionsHelper(
-          privateTouchpoints,
-          privateConversions,
-          *lastClick1D,
-          thresholdsLastClick1D,
-          batchSize);
-
-  auto computeAttributionLastTouch1DNewOutputFormat =
-      game.computeAttributionsHelper(
-          privateTouchpoints,
-          privateConversions,
-          *lastTouch1D,
-          thresholdsLastTouch1D,
-          batchSize);
-
   for (size_t i = 0; i < attributionResultsLastClick1D.size(); ++i) {
     for (size_t j = 0; j < batchSize; ++j) {
       EXPECT_EQ(
@@ -392,10 +354,6 @@ TEST(AttributionGameTest, TestAttributionLogicPlaintextBatch) {
           attributionResultsLastTouch1D.at(i));
     }
   }
-
-  EXPECT_EQ(computeAttributionLastClick1DNewOutputFormat.size(), 0);
-
-  EXPECT_EQ(computeAttributionLastTouch1DNewOutputFormat.size(), 0);
 }
 
 template <