feat(noisy_avg): Add support for all numeric input types (facebookincubator#13709)

Oliver Xu · facebook-github-bot · commit 51c414db55b9 · 2025-06-10T16:52:20.000-07:00
Summary: Pull Request resolved: facebookincubator#13709 ### Summary This diff adds support for all numeric input types to the `noisy_avg` aggregation function. ### Code Changes The diff modifies two files: 1. `NoisyAvgGaussianAggregationTest.cpp`: Adds new test cases for `bigint`, `decimal`, and `real` input types. 2. `NoisyAvgGaussianAggregate.cpp`: Updates the `update` method to handle different input types using `VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH`. ### Impact This diff allows the `noisy_avg` aggregation function to work with a wider range of input types, making it more versatile and useful for various use cases. Differential Revision: D76209005
diff --git a/velox/functions/prestosql/aggregates/NoisyAvgGaussianAggregate.cpp b/velox/functions/prestosql/aggregates/NoisyAvgGaussianAggregate.cpp
@@ -204,8 +204,12 @@ class NoisyAvgGaussianAggregate : public exec::Aggregate {
       noiseScale = static_cast<double>(decodedNoiseScale_.valueAt<uint64_t>(i));
     }
     accumulator->checkAndSetNoiseScale(noiseScale);
-    accumulator->updateCount(1);
-    accumulator->updateSum(decodedValue_.valueAt<double>(i));
+
+    // Update sum and count. check input value and dispatch to corresponding
+    // type.
+    auto inputType = args[0]->typeKind();
+    VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(
+        updateTemplate, inputType, accumulator, decodedValue_, i);
   }
 
   void updateAccumulatorFromIntermediateResult(
@@ -224,26 +228,84 @@ class NoisyAvgGaussianAggregate : public exec::Aggregate {
       accumulator->checkAndSetNoiseScale(otherAccumulator.getNoiseScale());
     }
   }
+
+  // Template helper function to update accumulator, can support all numeric
+  // data types. Only used in this class.
+  template <TypeKind TData>
+  void updateTemplate(
+      AccumulatorType* accumulator,
+      const DecodedVector& decodedValue,
+      vector_size_t i) {
+    using T = typename TypeTraits<TData>::NativeType;
+    // Handle decimal types separately.
+    if constexpr (std::is_same_v<T, int64_t> || std::is_same_v<T, int128_t>) {
+      const auto& type = decodedValue.base()->type();
+      if (type->isDecimal()) {
+        auto value = decodedValue.valueAt<T>(i);
+        auto scale = type->isShortDecimal() ? type->asShortDecimal().scale()
+                                            : type->asLongDecimal().scale();
+        double doubleValue = static_cast<double>(value) / pow(10, scale);
+
+        accumulator->updateSum(doubleValue);
+        accumulator->updateCount(1);
+        return;
+      }
+    }
+    // Handle other types.
+    if constexpr (
+        std::is_same_v<T, TypeTraits<TypeKind::TIMESTAMP>> ||
+        std::is_same_v<T, TypeTraits<TypeKind::VARBINARY>> ||
+        std::is_same_v<T, TypeTraits<TypeKind::VARCHAR>> ||
+        std::is_same_v<T, facebook::velox::StringView> ||
+        std::is_same_v<T, facebook::velox::Timestamp>) {
+      VELOX_FAIL("NoisySumGaussianAggregate does not support this data type.");
+    } else {
+      accumulator->updateSum(static_cast<double>(decodedValue.valueAt<T>(i)));
+      accumulator->updateCount(1);
+    }
+  }
 };
 } // namespace
 
 void registerNoisyAvgGaussianAggregate(
     const std::string& prefix,
     bool withCompanionFunctions,
     bool overwrite) {
-  std::vector<std::shared_ptr<exec::AggregateFunctionSignature>> signatures{
-      exec::AggregateFunctionSignatureBuilder()
-          .returnType("double")
-          .intermediateType("varbinary")
-          .argumentType("double") // input type
-          .argumentType("double") // noise scale
-          .build(),
-      exec::AggregateFunctionSignatureBuilder()
-          .returnType("double")
-          .intermediateType("varbinary")
-          .argumentType("double") // input type
-          .argumentType("bigint") // noise scale
-          .build()};
+  // Helper function to create a signature builder with return and
+  // intermediate types
+  auto createBuilder = []() {
+    return exec::AggregateFunctionSignatureBuilder()
+        .returnType("double")
+        .intermediateType("varbinary");
+  };
+
+  // List of possible argument types.
+  const std::vector<std::string> simpleDataTypes = {
+      "tinyint", "smallint", "integer", "bigint", "real", "double"};
+  const std::vector<std::string> noiseScaleTypes = {"double", "bigint"};
+
+  std::vector<std::shared_ptr<exec::AggregateFunctionSignature>> signatures;
+
+  // Generate signatures for all type combinations.
+  for (const auto& noiseScaleType : noiseScaleTypes) {
+    // Handle simple types.
+    for (const auto& dataType : simpleDataTypes) {
+      signatures.push_back(createBuilder()
+                               .argumentType(dataType)
+                               .argumentType(noiseScaleType)
+                               .build());
+    }
+
+    // Handle decimal types separately.
+    signatures.push_back(exec::AggregateFunctionSignatureBuilder()
+                             .integerVariable("a_precision")
+                             .integerVariable("a_scale")
+                             .returnType("double")
+                             .intermediateType("varbinary")
+                             .argumentType("DECIMAL(a_precision, a_scale)")
+                             .argumentType(noiseScaleType)
+                             .build());
+  }
 
   auto name = prefix + kNoisyAvgGaussian;
   exec::registerAggregateFunction(
diff --git a/velox/functions/prestosql/aggregates/tests/NoisyAvgGaussianAggregationTest.cpp b/velox/functions/prestosql/aggregates/tests/NoisyAvgGaussianAggregationTest.cpp
@@ -15,6 +15,18 @@ class NoisyAvgGaussianAggregationTest
 
   RowTypePtr doubleRowType_{
       ROW({"c0", "c1", "c2"}, {DOUBLE(), DOUBLE(), DOUBLE()})};
+  RowTypePtr bigintRowType_{
+      ROW({"c0", "c1", "c2"}, {BIGINT(), BIGINT(), BIGINT()})};
+  RowTypePtr decimalRowType_{
+      ROW({"c0", "c1", "c2"},
+          {DECIMAL(20, 5), DECIMAL(20, 5), DECIMAL(20, 5)})};
+  RowTypePtr realRowType_{ROW({"c0", "c1", "c2"}, {REAL(), REAL(), REAL()})};
+  RowTypePtr integerRowType_{
+      ROW({"c0", "c1", "c2"}, {INTEGER(), INTEGER(), INTEGER()})};
+  RowTypePtr smallintRowType_{
+      ROW({"c0", "c1", "c2"}, {SMALLINT(), SMALLINT(), SMALLINT()})};
+  RowTypePtr tinyintRowType_{
+      ROW({"c0", "c1", "c2"}, {TINYINT(), TINYINT(), TINYINT()})};
 };
 
 TEST_F(NoisyAvgGaussianAggregationTest, basicNoNoise) {
@@ -111,4 +123,26 @@ TEST_F(NoisyAvgGaussianAggregationTest, aggregateNullsNoNoise) {
       vectors, {"c0"}, {"noisy_avg_gaussian(c1, 0.0)"}, {expectedResult});
 }
 
+TEST_F(NoisyAvgGaussianAggregationTest, numericInputTypeTestNoNoise) {
+  auto rowTypes = {
+      doubleRowType_,
+      bigintRowType_,
+      decimalRowType_,
+      realRowType_,
+      integerRowType_,
+      smallintRowType_,
+      tinyintRowType_};
+
+  for (const auto& rowType : rowTypes) {
+    auto vectors = makeVectors(rowType, 3, 3);
+    createDuckDbTable(vectors);
+
+    testAggregations(
+        vectors,
+        {},
+        {"noisy_avg_gaussian(c2, 0.0)"},
+        "SELECT AVG(c2) FROM tmp");
+  }
+}
+
 } // namespace facebook::velox::aggregate::test