Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/engine/SpatialJoinParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace ad_utility::detail::parallel_wkt_parser {
// _____________________________________________________________________________
WKTParser::WKTParser(sj::Sweeper* sweeper, size_t numThreads,
bool usePrefiltering,
const std::optional<util::geo::DBox>& prefilterLatLngBox,
const std::optional<::util::geo::DBox>& prefilterLatLngBox,
const Index& index)
: sj::WKTParserBase<SpatialJoinParseJob>(sweeper, numThreads),
_numSkipped(numThreads),
Expand Down Expand Up @@ -66,22 +66,22 @@ void WKTParser::processQueue(size_t t) {
parseCounter++;
} else if (dt == Datatype::GeoPoint) {
const auto& p = job.valueId.getGeoPoint();
const util::geo::DPoint utilPoint{p.getLng(), p.getLat()};
const ::util::geo::DPoint utilPoint{p.getLng(), p.getLat()};

// If point is not contained in the prefilter box, we can skip it
// immediately instead of feeding it to the parser.
if (_prefilterLatLngBox.has_value() &&
!util::geo::intersects(_prefilterLatLngBox.value(), utilPoint)) {
!::util::geo::intersects(_prefilterLatLngBox.value(), utilPoint)) {
prefilterCounter++;
continue;
}
// parse point directly
auto mercPoint = latLngToWebMerc(utilPoint);

util::geo::I32Point addPoint{
::util::geo::I32Point addPoint{
static_cast<int32_t>(mercPoint.getX() * PREC),
static_cast<int32_t>(mercPoint.getY() * PREC)};
_bboxes[t] = util::geo::extendBox(
_bboxes[t] = ::util::geo::extendBox(
_sweeper->add(addPoint, std::to_string(job.line), job.side, w),
_bboxes[t]);
parseCounter++;
Expand Down
4 changes: 2 additions & 2 deletions src/engine/SpatialJoinParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ inline bool operator==(const SpatialJoinParseJob& a,
class WKTParser : public sj::WKTParserBase<SpatialJoinParseJob> {
public:
WKTParser(sj::Sweeper* sweeper, size_t numThreads, bool usePrefiltering,
const std::optional<util::geo::DBox>& prefilterLatLngBox,
const std::optional<::util::geo::DBox>& prefilterLatLngBox,
const Index& index);

// Enqueue a new row from the input table (given the `ValueId` of the
Expand Down Expand Up @@ -69,7 +69,7 @@ class WKTParser : public sj::WKTParserBase<SpatialJoinParseJob> {

// Configure prefiltering geometries by bounding box.
bool _usePrefiltering;
std::optional<util::geo::DBox> _prefilterLatLngBox;
std::optional<::util::geo::DBox> _prefilterLatLngBox;

// A reference to QLever's index is needed to access precomputed geometry
// bounding boxes and to resolve `ValueId`s into WKT literals.
Expand Down
58 changes: 38 additions & 20 deletions src/index/DeltaTriples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "index/Index.h"
#include "index/IndexImpl.h"
#include "index/LocatedTriples.h"
#include "util/LruCache.h"
#include "util/Serializer/TripleSerializer.h"

// ____________________________________________________________________________
Expand Down Expand Up @@ -167,16 +166,21 @@ DeltaTriplesCount DeltaTriples::getCounts() const {
}

// _____________________________________________________________________________
DeltaTriples::Triples DeltaTriples::makeInternalTriples(
const Triples& triples) {
DeltaTriples::Triples DeltaTriples::makeInternalTriples(const Triples& triples,
bool insertion) {
// NOTE: If this logic is ever changed, you need to also change the code
// in `IndexBuilderTypes.h`, the function `getIdMapLambdas` specifically,
// which adds the same extra triples for language tags to the internal triples
// on the initial index build.
Triples internalTriples;
constexpr size_t predicateCacheSize = 50;
ad_utility::util::LRUCache<Id::T, ad_utility::triple_component::Iri>
predicateCache{predicateCacheSize};
// Initialize on first use.
if (languagePredicate_.isUndefined()) {
languagePredicate_ =
TripleComponent{
ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)}
.toValueId(index_.getVocab(), localVocab_,
index_.encodedIriManager());
}
for (const auto& triple : triples) {
const auto& ids = triple.ids();
Id objectId = ids.at(2);
Expand All @@ -188,28 +192,39 @@ DeltaTriples::Triples DeltaTriples::makeInternalTriples(
continue;
}
const auto& predicate =
predicateCache.getOrCompute(ids.at(1).getBits(), [this](Id::T bits) {
predicateCache_.getOrCompute(ids.at(1).getBits(), [this](Id::T bits) {
auto optionalPredicate = ExportQueryExecutionTrees::idToLiteralOrIri(
index_, Id::fromBits(bits), localVocab_, true);
AD_CORRECTNESS_CHECK(optionalPredicate.has_value());
AD_CORRECTNESS_CHECK(optionalPredicate.value().isIri());
return std::move(optionalPredicate.value().getIri());
});
auto specialPredicate = ad_utility::convertToLanguageTaggedPredicate(
predicate,
asStringViewUnsafe(optionalLiteralOrIri.value().getLanguageTag()));
auto langtag =
asStringViewUnsafe(optionalLiteralOrIri.value().getLanguageTag());
auto specialPredicate =
ad_utility::convertToLanguageTaggedPredicate(predicate, langtag);
Id specialId = TripleComponent{std::move(specialPredicate)}.toValueId(
index_.getVocab(), localVocab_, index_.encodedIriManager());
// NOTE: We currently only add one of the language triples, specifically
// `<subject> @language@<predicate> "object"@language` because it is
// directly tied to a regular triple, so insertion and removal work exactly
// the same. `<object> ql:langtag <@language>` on the other hand needs
// either some sort of reference counting or we have to keep it
// indefinitely, even if the object is removed. This means that some queries
// will return no results for entries with language tags that were inserted
// via an UPDATE operation.
// Extra triple `<subject> @language@<predicate> "object"@language`.
internalTriples.push_back(
IdTriple<0>{std::array{ids.at(0), specialId, ids.at(2), ids.at(3)}});
Id langtagId =
languageTagCache_.getOrCompute(langtag, [this](const std::string& tag) {
return TripleComponent{ad_utility::convertLangtagToEntityUri(tag)}
.toValueId(index_.getVocab(), localVocab_,
index_.encodedIriManager());
});

// Because we don't track the exact counts of existing objects, we just
// conservatively add these internal triples on insertion, and never remove
// them. This is inefficient, but never wrong because queries that use these
// internal triples will always join these internal triples with a regular
// index scan.
if (insertion) {
// Extra triple `"object"@language ql:langtag <@language>`.
internalTriples.push_back(IdTriple<0>{
std::array{ids.at(2), languagePredicate_, langtagId, ids.at(3)}});
}
}
// Because of the special predicates, we need to re-sort the triples.
ql::ranges::sort(internalTriples);
Expand All @@ -221,7 +236,7 @@ void DeltaTriples::insertTriples(CancellationHandle cancellationHandle,
Triples triples,
ad_utility::timer::TimeTracer& tracer) {
tracer.beginTrace("makeInternalTriples");
auto internalTriples = makeInternalTriples(triples);
auto internalTriples = makeInternalTriples(triples, true);
tracer.endTrace("makeInternalTriples");
tracer.beginTrace("externalPermutation");
modifyTriplesImpl<false, true>(cancellationHandle, std::move(triples),
Expand All @@ -240,7 +255,7 @@ void DeltaTriples::deleteTriples(CancellationHandle cancellationHandle,
Triples triples,
ad_utility::timer::TimeTracer& tracer) {
tracer.beginTrace("makeInternalTriples");
auto internalTriples = makeInternalTriples(triples);
auto internalTriples = makeInternalTriples(triples, false);
tracer.endTrace("makeInternalTriples");
tracer.beginTrace("externalPermutation");
modifyTriplesImpl<false, false>(cancellationHandle, std::move(triples),
Expand Down Expand Up @@ -417,6 +432,9 @@ DeltaTriplesCount operator-(const DeltaTriplesCount& lhs,
DeltaTriples::DeltaTriples(const Index& index)
: DeltaTriples(index.getImpl()) {}

// ____________________________________________________________________________
DeltaTriples::DeltaTriples(const IndexImpl& index) : index_{index} {}

// ____________________________________________________________________________
DeltaTriplesManager::DeltaTriplesManager(const IndexImpl& index)
: deltaTriples_{index},
Expand Down
27 changes: 24 additions & 3 deletions src/index/DeltaTriples.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "index/IndexBuilderTypes.h"
#include "index/LocatedTriples.h"
#include "index/Permutation.h"
#include "util/LruCache.h"
#include "util/Synchronized.h"
#include "util/TimeTracer.h"

Expand Down Expand Up @@ -128,6 +129,23 @@ class DeltaTriples {
// See the documentation of `setPersist()` below.
std::optional<std::string> filenameForPersisting_;

// Store the id of the `ql:langtag` predicate to avoid repeated disk lookups.
// This is initialized on first use.
Id languagePredicate_ = Id::makeUndefined();

// Store commonly used language tags of the form `<@lang>` to avoid repeated
// disk lookups.
static constexpr size_t languageTagCacheSize_ = 1000;
ad_utility::util::LRUCache<std::string, Id> languageTagCache_{
languageTagCacheSize_};

// Cache commonly used predicates and their IRI representation between calls
// of `makeInternalTriples`. For example in wikidata `wdt:P31`, or `wdt:P279`
// are frequently used, so we try to avoid an expensive lookup from disk.
static constexpr size_t predicateCacheSize_ = 1000;
ad_utility::util::LRUCache<Id::T, ad_utility::triple_component::Iri>
predicateCache_{predicateCacheSize_};

// Assert that the Permutation Enum values have the expected int values.
// This is used to store and lookup items that exist for permutation in an
// array.
Expand Down Expand Up @@ -167,7 +185,7 @@ class DeltaTriples {
public:
// Construct for given index.
explicit DeltaTriples(const Index& index);
explicit DeltaTriples(const IndexImpl& index) : index_{index} {}
explicit DeltaTriples(const IndexImpl& index);

// Disable accidental copying.
DeltaTriples(const DeltaTriples&) = delete;
Expand Down Expand Up @@ -213,8 +231,11 @@ class DeltaTriples {
// bunch of triples to be inserted into the internal permutation to make
// things like efficient language filters work. This currently performs a
// lookup from disk to check the language tag, but in the future this may be
// implemented more efficiently.
Triples makeInternalTriples(const Triples& triples);
// implemented more efficiently. If `insertion` is false, this indicates that
// the triples are meant for deletion. In that case no triples are returned
// that may be unsafe to delete. In particular this refers to triples of the
// form `<object> ql:langtag <@language>`.
Triples makeInternalTriples(const Triples& triples, bool insertion);

// Insert triples.
void insertTriples(CancellationHandle cancellationHandle, Triples triples,
Expand Down
19 changes: 8 additions & 11 deletions src/rdfTypes/GeometryInfoHelpersImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ using ParsedWkt =
MultiPoint<CoordType>, MultiLine<CoordType>,
MultiPolygon<CoordType>, Collection<CoordType>>;
using ParseResult = std::pair<WKTType, std::optional<ParsedWkt>>;
using DAnyGeometry = util::geo::AnyGeometry<CoordType>;
using DAnyGeometry = AnyGeometry<CoordType>;

template <typename T>
CPP_concept WktSingleGeometryType =
Expand Down Expand Up @@ -245,19 +245,16 @@ inline std::optional<std::string_view> wktTypeToIri(uint8_t type) {

// Reverse projection applied by `sj::WKTParser`: convert coordinates from web
// mercator int32 to normal lat-long double coordinates.
inline util::geo::DPoint projectInt32WebMercToDoubleLatLng(
const util::geo::I32Point& p) {
return util::geo::webMercToLatLng<double>(
static_cast<double>(p.getX()) / PREC,
static_cast<double>(p.getY()) / PREC);
};
inline DPoint projectInt32WebMercToDoubleLatLng(const I32Point& p) {
return webMercToLatLng<double>(static_cast<double>(p.getX()) / PREC,
static_cast<double>(p.getY()) / PREC);
}

// Same as above, but for a bounding box.
inline util::geo::DBox projectInt32WebMercToDoubleLatLng(
const util::geo::I32Box& box) {
inline DBox projectInt32WebMercToDoubleLatLng(const I32Box& box) {
return {projectInt32WebMercToDoubleLatLng(box.getLowerLeft()),
projectInt32WebMercToDoubleLatLng(box.getUpperRight())};
};
}

// Counts the number of geometries in a geometry collection.
inline uint32_t countChildGeometries(const ParsedWkt& geom) {
Expand Down Expand Up @@ -657,7 +654,7 @@ struct MetricDistanceVisitor {
// Delegate the actual distance computation to `pb_util`.
CPP_template(typename T, typename U)(requires IsPairOfUtilGeoms<T, U>) double
operator()(const T& a, const U& b) const {
return util::geo::webMercMeterDist<T, U>(a, b);
return webMercMeterDist<T, U>(a, b);
}

// Handle optional geometries that may be contained in a `ParseResult`.
Expand Down
9 changes: 5 additions & 4 deletions src/util/LruCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class LRUCache {
// found. Otherwise, compute the value using `computeFunction` and store it in
// the cache. If the cache is already at maximum capacity, evict the least
// recently used element.
CPP_template(typename Func)(
CPP_template(typename Key, typename Func)(
requires ad_utility::InvocableWithConvertibleReturnType<
Func, V, const K&>) const V& getOrCompute(const K& key,
Func, V, const K&>) const V& getOrCompute(Key&& key,
Func computeFunction) {
auto it = cache_.find(key);
if (it != cache_.end()) {
Expand All @@ -56,9 +56,10 @@ class LRUCache {
lruKey = key;
} else {
// Push new element if not full
keys_.push_front(key);
keys_.push_front(K{key});
}
auto result = cache_.try_emplace(key, computeFunction(key), keys_.begin());
auto result = cache_.try_emplace(
AD_FWD(key), computeFunction(keys_.front()), keys_.begin());
AD_CORRECTNESS_CHECK(result.second);
return result.first->second.first;
}
Expand Down
15 changes: 13 additions & 2 deletions test/DeltaTriplesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,10 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) {
"<a> <other> \"def\"@de", "<a> <other> \"def\"@es"}));
auto a = iri("<a>");
auto b = iri("<b>");
auto lp = iri(LANGUAGE_PREDICATE);
auto de = TripleComponent{ad_utility::convertLangtagToEntityUri("de")};
auto en = TripleComponent{ad_utility::convertLangtagToEntityUri("en")};
auto es = TripleComponent{ad_utility::convertLangtagToEntityUri("es")};
EXPECT_THAT(deltaTriples,
TriplesAre({{a, b, TripleComponent{1}},
{a, b, lit("\"abc\"")},
Expand All @@ -453,7 +457,11 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) {
{a, iri("<other>"), lit("\"def\"@de")},
{a, iri("<other>"), lit("\"def\"@es")}},
{},
{{a, iri("@de@<b>"), lit("\"abc\"@de")},
{{lit("\"abc\"@de"), lp, de},
{lit("\"abc\"@en"), lp, en},
{lit("\"def\"@de"), lp, de},
{lit("\"def\"@es"), lp, es},
{a, iri("@de@<b>"), lit("\"abc\"@de")},
{a, iri("@en@<b>"), lit("\"abc\"@en")},
{a, iri("@de@<other>"), lit("\"def\"@de")},
{a, iri("@es@<other>"), lit("\"def\"@es")}},
Expand All @@ -477,7 +485,10 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) {
{a, b, iri("<abc>")},
{a, iri("<other>"), lit("\"def\"@de")},
{a, iri("<other>"), lit("\"def\"@es")}},
{},
{{lit("\"abc\"@de"), lp, de},
{lit("\"abc\"@en"), lp, en},
{lit("\"def\"@de"), lp, de},
{lit("\"def\"@es"), lp, es}},
{{a, iri("@de@<b>"), lit("\"abc\"@de")},
{a, iri("@en@<b>"), lit("\"abc\"@en")},
{a, iri("@de@<other>"), lit("\"def\"@de")},
Expand Down
14 changes: 7 additions & 7 deletions test/ExecuteUpdateTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,14 @@ TEST(ExecuteUpdate, executeUpdate) {
expectExecuteUpdate("DROP SILENT ALL", NumTriples(0, 8, 8, 0, 1));
expectExecuteUpdate("ADD <x> TO <x>", NumTriples(0, 0, 0));
expectExecuteUpdate("ADD SILENT <x> TO DEFAULT", NumTriples(0, 0, 0));
expectExecuteUpdate("ADD DEFAULT TO <x>", NumTriples(8, 0, 8, 1, 0));
expectExecuteUpdate("ADD DEFAULT TO <x>", NumTriples(8, 0, 8, 2, 0));
expectExecuteUpdate("ADD SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0));
expectExecuteUpdate("MOVE SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0));
expectExecuteUpdate("MOVE GRAPH <x> TO <x>", NumTriples(0, 0, 0));
expectExecuteUpdate("MOVE <x> TO DEFAULT", NumTriples(0, 8, 8, 0, 1));
expectExecuteUpdate("MOVE DEFAULT TO GRAPH <x>",
NumTriples(8, 8, 16, 1, 1));
expectExecuteUpdate("COPY DEFAULT TO <x>", NumTriples(8, 0, 8, 1, 0));
NumTriples(8, 8, 16, 2, 1));
expectExecuteUpdate("COPY DEFAULT TO <x>", NumTriples(8, 0, 8, 2, 0));
expectExecuteUpdate("COPY DEFAULT TO DEFAULT", NumTriples(0, 0, 0));
expectExecuteUpdate("COPY <x> TO DEFAULT", NumTriples(0, 8, 8, 0, 1));
expectExecuteUpdate("CREATE SILENT GRAPH <x>", NumTriples(0, 0, 0));
Expand Down Expand Up @@ -198,17 +198,17 @@ TEST(ExecuteUpdate, executeUpdate) {
expectExecuteUpdate("DROP SILENT ALL", NumTriples(0, 7, 7, 0, 3));
expectExecuteUpdate("ADD <q> TO <q>", NumTriples(0, 0, 0));
expectExecuteUpdate("ADD <a> TO <q>", NumTriples(0, 0, 0));
expectExecuteUpdate("ADD SILENT <q> TO DEFAULT", NumTriples(3, 0, 3, 2, 0));
expectExecuteUpdate("ADD SILENT <q> TO DEFAULT", NumTriples(3, 0, 3, 4, 0));
expectExecuteUpdate("ADD DEFAULT TO <q>", NumTriples(1, 0, 1));
expectExecuteUpdate("ADD SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0));
expectExecuteUpdate("MOVE SILENT DEFAULT TO DEFAULT", NumTriples(0, 0, 0));
expectExecuteUpdate("MOVE GRAPH <q> TO <t>", NumTriples(3, 3, 6, 2, 2));
expectExecuteUpdate("MOVE <q> TO DEFAULT", NumTriples(3, 4, 7, 2, 2));
expectExecuteUpdate("MOVE GRAPH <q> TO <t>", NumTriples(3, 3, 6, 4, 2));
expectExecuteUpdate("MOVE <q> TO DEFAULT", NumTriples(3, 4, 7, 4, 2));
expectExecuteUpdate("MOVE DEFAULT TO GRAPH <t>", NumTriples(1, 1, 2));
expectExecuteUpdate("MOVE DEFAULT TO GRAPH <q>", NumTriples(1, 4, 5, 0, 2));
expectExecuteUpdate("COPY DEFAULT TO <q>", NumTriples(1, 3, 4, 0, 2));
expectExecuteUpdate("COPY DEFAULT TO DEFAULT", NumTriples(0, 0, 0));
expectExecuteUpdate("COPY <q> TO DEFAULT", NumTriples(3, 1, 4, 2, 0));
expectExecuteUpdate("COPY <q> TO DEFAULT", NumTriples(3, 1, 4, 4, 0));
expectExecuteUpdate("CREATE SILENT GRAPH <x>", NumTriples(0, 0, 0));
expectExecuteUpdate("CREATE GRAPH <y>", NumTriples(0, 0, 0));
}
Expand Down
2 changes: 1 addition & 1 deletion test/IndexTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ TEST(IndexImpl, recomputeStatistics) {
auto newStats = indexImpl.recomputeStatistics(
index.deltaTriplesManager().getCurrentLocatedTriplesSharedState());
EXPECT_NE(newStats, indexImpl.configurationJson_);
EXPECT_EQ(newStats["num-triples"], NNAI(5, 6));
EXPECT_EQ(newStats["num-triples"], NNAI(5, 7));
EXPECT_EQ(newStats["num-predicates"], NNAI(2, 4));
if (loadAllPermutations) {
EXPECT_EQ(newStats["num-subjects"], NNAI(4, 0));
Expand Down
Loading
Loading