-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add STDEV() aggregate function (#1614)
Add a new aggregate function `STDEV(X)` which computes the (sample) standard deviation, such that a user will not have to repetitively type `math:sqrt(sum(math:pow((X - avg(X)), 2)) / (count(*) - 1))`. This is not part of the SPARQL standard, but also doesn't cause any conflicts.
- Loading branch information
Showing
21 changed files
with
2,755 additions
and
2,420 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
// Copyright 2024, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Christoph Ullinger <[email protected]> | ||
|
||
#include "engine/sparqlExpressions/StdevExpression.h" | ||
|
||
#include "engine/sparqlExpressions/SparqlExpressionTypes.h" | ||
|
||
namespace sparqlExpression::detail { | ||
|
||
// _____________________________________________________________________________ | ||
ExpressionResult DeviationExpression::evaluate( | ||
EvaluationContext* context) const { | ||
// Helper: Extracts a double or int (as double) from a variant | ||
auto numValVisitor = []<typename T>(const T& value) -> std::optional<double> { | ||
if constexpr (ad_utility::isSimilar<T, double> || | ||
ad_utility::isSimilar<T, int64_t>) { | ||
return static_cast<double>(value); | ||
} else { | ||
return std::nullopt; | ||
} | ||
}; | ||
|
||
// Helper to replace child expression results with their squared deviation | ||
auto devImpl = [context, numValVisitor]( | ||
bool& undef, | ||
VectorWithMemoryLimit<IdOrLiteralOrIri>& exprResult, | ||
auto generator) { | ||
double sum = 0.0; | ||
// Intermediate storage of the results returned from the child | ||
// expression | ||
VectorWithMemoryLimit<double> childResults{context->_allocator}; | ||
|
||
// Collect values as doubles | ||
for (auto& inp : generator) { | ||
const auto& n = detail::NumericValueGetter{}(std::move(inp), context); | ||
auto v = std::visit(numValVisitor, n); | ||
if (v.has_value()) { | ||
childResults.push_back(v.value()); | ||
sum += v.value(); | ||
} else { | ||
// There is a non-numeric value in the input. Therefore the entire | ||
// result will be undef. | ||
undef = true; | ||
return; | ||
} | ||
context->cancellationHandle_->throwIfCancelled(); | ||
} | ||
|
||
// Calculate squared deviation and save for result | ||
double avg = sum / static_cast<double>(context->size()); | ||
for (size_t i = 0; i < childResults.size(); i++) { | ||
exprResult.at(i) = IdOrLiteralOrIri{ | ||
ValueId::makeFromDouble(std::pow(childResults.at(i) - avg, 2))}; | ||
} | ||
}; | ||
|
||
// Visitor for child expression result | ||
auto impl = [context, | ||
devImpl](SingleExpressionResult auto&& el) -> ExpressionResult { | ||
// Prepare space for result | ||
VectorWithMemoryLimit<IdOrLiteralOrIri> exprResult{context->_allocator}; | ||
exprResult.resize(context->size()); | ||
bool undef = false; | ||
|
||
auto generator = | ||
detail::makeGenerator(AD_FWD(el), context->size(), context); | ||
devImpl(undef, exprResult, std::move(generator)); | ||
|
||
if (undef) { | ||
return IdOrLiteralOrIri{Id::makeUndefined()}; | ||
} | ||
return exprResult; | ||
}; | ||
|
||
auto childRes = child_->evaluate(context); | ||
return std::visit(impl, std::move(childRes)); | ||
}; | ||
|
||
} // namespace sparqlExpression::detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
// Copyright 2024, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// Author: Christoph Ullinger <[email protected]> | ||
|
||
#pragma once | ||
|
||
#include <cmath> | ||
#include <functional> | ||
#include <memory> | ||
#include <variant> | ||
|
||
#include "engine/sparqlExpressions/AggregateExpression.h" | ||
#include "engine/sparqlExpressions/LiteralExpression.h" | ||
#include "engine/sparqlExpressions/NaryExpression.h" | ||
#include "engine/sparqlExpressions/SparqlExpression.h" | ||
#include "engine/sparqlExpressions/SparqlExpressionTypes.h" | ||
#include "engine/sparqlExpressions/SparqlExpressionValueGetters.h" | ||
#include "global/ValueId.h" | ||
|
||
namespace sparqlExpression { | ||
|
||
namespace detail { | ||
|
||
/// The STDEV Expression | ||
|
||
// Helper expression: The individual deviation squares. A DeviationExpression | ||
// over X corresponds to the value (X - AVG(X))^2. | ||
class DeviationExpression : public SparqlExpression { | ||
private: | ||
Ptr child_; | ||
|
||
public: | ||
explicit DeviationExpression(Ptr&& child) : child_{std::move(child)} {} | ||
|
||
// __________________________________________________________________________ | ||
ExpressionResult evaluate(EvaluationContext* context) const override; | ||
|
||
// __________________________________________________________________________ | ||
AggregateStatus isAggregate() const override { | ||
return SparqlExpression::AggregateStatus::NoAggregate; | ||
} | ||
|
||
// __________________________________________________________________________ | ||
[[nodiscard]] string getCacheKey( | ||
const VariableToColumnMap& varColMap) const override { | ||
return absl::StrCat("[ SQ.DEVIATION ]", child_->getCacheKey(varColMap)); | ||
} | ||
|
||
private: | ||
// _________________________________________________________________________ | ||
std::span<SparqlExpression::Ptr> childrenImpl() override { | ||
return {&child_, 1}; | ||
} | ||
}; | ||
|
||
// Separate subclass of AggregateOperation, that replaces its child with a | ||
// DeviationExpression of this child. Everything else is left untouched. | ||
template <typename AggregateOperation, | ||
typename FinalOperation = decltype(identity)> | ||
class DeviationAggExpression | ||
: public AggregateExpression<AggregateOperation, FinalOperation> { | ||
public: | ||
// __________________________________________________________________________ | ||
DeviationAggExpression(bool distinct, SparqlExpression::Ptr&& child, | ||
AggregateOperation aggregateOp = AggregateOperation{}) | ||
: AggregateExpression<AggregateOperation, FinalOperation>( | ||
distinct, std::make_unique<DeviationExpression>(std::move(child)), | ||
aggregateOp){}; | ||
}; | ||
|
||
// The final operation for dividing by degrees of freedom and calculation square | ||
// root after summing up the squared deviation | ||
inline auto stdevFinalOperation = [](const NumericValue& aggregation, | ||
size_t numElements) { | ||
auto divAndRoot = [](double value, double degreesOfFreedom) { | ||
if (degreesOfFreedom <= 0) { | ||
return 0.0; | ||
} else { | ||
return std::sqrt(value / degreesOfFreedom); | ||
} | ||
}; | ||
return makeNumericExpressionForAggregate<decltype(divAndRoot)>()( | ||
aggregation, NumericValue{static_cast<double>(numElements) - 1}); | ||
}; | ||
|
||
// The actual Standard Deviation Expression | ||
// Mind the explicit instantiation of StdevExpressionBase in | ||
// AggregateExpression.cpp | ||
using StdevExpressionBase = | ||
DeviationAggExpression<AvgOperation, decltype(stdevFinalOperation)>; | ||
class StdevExpression : public StdevExpressionBase { | ||
using StdevExpressionBase::StdevExpressionBase; | ||
ValueId resultForEmptyGroup() const override { return Id::makeFromDouble(0); } | ||
}; | ||
|
||
} // namespace detail | ||
|
||
using detail::StdevExpression; | ||
|
||
} // namespace sparqlExpression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.