From e3516b18a68f6c397bb083ddd69bfe20a26e7526 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Fri, 15 Nov 2024 20:55:25 +0100 Subject: [PATCH 1/5] Fix backwards-compatibility warning for `--parse-paralel` So far, for a single input stream, the warnings also shows when `--parse-parallel` or `-p` are explicity specified when calling `IndexBuilderMain`, which is the recommended behavior. This is now fixed and the warnin is only shown when it should be. --- src/index/IndexBuilderMain.cpp | 4 +++- src/index/IndexImpl.cpp | 8 ++++---- src/index/InputFileSpecification.h | 4 ++++ test/IndexTest.cpp | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp index 2e4173d305..1b500c9dde 100644 --- a/src/index/IndexBuilderMain.cpp +++ b/src/index/IndexBuilderMain.cpp @@ -305,12 +305,14 @@ int main(int argc, char** argv) { } bool parseInParallel = getParameterValue(i, parseParallel, false); + bool parseInParallelSetExplicitly = i < parseParallel.size(); auto& filename = inputFile.at(i); if (filename == "-") { filename = "/dev/stdin"; } fileSpecs.emplace_back(filename, getFiletype(type, filename), - std::move(defaultGraph), parseInParallel); + std::move(defaultGraph), parseInParallel, + parseInParallelSetExplicitly); } return fileSpecs; }; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 1e69d9676b..feb1bedfd1 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -312,12 +312,12 @@ void IndexImpl::updateInputFileSpecificationsAndLog( " via the command-line option --parse-parallel or -p"}; } } - - if (spec.size() == 1 && !parallelParsingSpecifiedViaJson.has_value()) { + if (spec.size() == 1 && !parallelParsingSpecifiedViaJson.has_value() && + spec.at(0).parseInParallelSetExplicitly_ == false) { LOG(WARN) << "Implicitly using the parallel parser for a single input file " "for reasons of backward compatibility; this is deprecated, " - "please use the command-line option --parse-parallel or -p " - "instead" + "please explicitly use the command-line option " + "--parse-parallel or -p instead" << std::endl; spec.at(0).parseInParallel_ = true; } diff --git a/src/index/InputFileSpecification.h b/src/index/InputFileSpecification.h index 9b5afde367..6cd2e55b94 100644 --- a/src/index/InputFileSpecification.h +++ b/src/index/InputFileSpecification.h @@ -27,6 +27,10 @@ struct InputFileSpecification { // Turtle files with all prefixes at the beginning and no multiline literals. bool parseInParallel_ = false; + // Remember if the value for parallel parsing was set explicitly (via the + // command line). + bool parseInParallelSetExplicitly_; + bool operator==(const InputFileSpecification&) const = default; }; } // namespace qlever diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 6ce7f6f732..1dabacfbb8 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -528,7 +528,7 @@ TEST(IndexTest, trivialGettersAndSetters) { TEST(IndexTest, updateInputFileSpecificationsAndLog) { using enum qlever::Filetype; std::vector files{ - {"singleFile.ttl", Turtle, std::nullopt, false}}; + {"singleFile.ttl", Turtle, std::nullopt, false, false}}; using namespace ::testing; { testing::internal::CaptureStdout(); From cf4b529a6e13dd574b6c167bf687e0433e1c34ae Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Fri, 15 Nov 2024 21:52:51 +0100 Subject: [PATCH 2/5] Fix compile error for some of the platforms --- test/util/IndexTestHelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 79eb77b0d5..7d9a9ef5d2 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -178,7 +178,7 @@ Index makeTestIndex(const std::string& indexBasename, index.setSettingsFile(inputFilename + ".settings.json"); index.loadAllPermutations() = loadAllPermutations; qlever::InputFileSpecification spec{inputFilename, qlever::Filetype::Turtle, - std::nullopt}; + std::nullopt, false, false}; index.createFromFiles({spec}); if (createTextIndex) { index.addTextFromContextFile("", true); From 4b4a3d14024b8a300f245fb8aedcebd8b6af60bc Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Fri, 15 Nov 2024 23:10:03 +0100 Subject: [PATCH 3/5] Try to make Clang and CodeCov happy --- src/index/IndexImpl.cpp | 6 ++++-- test/GroupByTest.cpp | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index feb1bedfd1..d9fbf2b064 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -312,8 +312,10 @@ void IndexImpl::updateInputFileSpecificationsAndLog( " via the command-line option --parse-parallel or -p"}; } } - if (spec.size() == 1 && !parallelParsingSpecifiedViaJson.has_value() && - spec.at(0).parseInParallelSetExplicitly_ == false) { + bool setParallelParsingImplicitly = + spec.size() == 1 && !parallelParsingSpecifiedViaJson.has_value() && + !spec.at(0).parseInParallelSetExplicitly_; + if (setParallelParsingImplicitly) { LOG(WARN) << "Implicitly using the parallel parser for a single input file " "for reasons of backward compatibility; this is deprecated, " "please explicitly use the command-line option " diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index a2c28a1b5e..63467eb01f 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -73,8 +73,8 @@ class GroupByTest : public ::testing::Test { _index.setKbName("group_by_test"); _index.setTextName("group_by_test"); _index.setOnDiskBase("group_ty_test"); - _index.createFromFiles( - {{"group_by_test.nt", qlever::Filetype::Turtle, std::nullopt}}); + _index.createFromFiles({{"group_by_test.nt", qlever::Filetype::Turtle, + std::nullopt, false, false}}); _index.addTextFromContextFile("group_by_test.words", false); _index.buildDocsDB("group_by_test.documents"); From 359aba75b46b88b5bbb7e7824a6533d53593398d Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sat, 16 Nov 2024 10:39:32 +0100 Subject: [PATCH 4/5] Refactor and improve the whole code, let's see --- src/index/IndexImpl.cpp | 53 +++++++++--------- src/index/InputFileSpecification.h | 10 ++-- test/GroupByTest.cpp | 4 +- test/IndexTest.cpp | 86 +++++++++++++++++++++--------- test/util/IndexTestHelpers.cpp | 2 +- 5 files changed, 97 insertions(+), 58 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index d9fbf2b064..93b2fc09ea 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -291,38 +291,43 @@ std::pair IndexImpl::createInternalPSOandPOS( void IndexImpl::updateInputFileSpecificationsAndLog( std::vector& spec, std::optional parallelParsingSpecifiedViaJson) { - if (spec.size() == 1) { - LOG(INFO) << "Processing triples from " << spec.at(0).filename_ << " ..." - << std::endl; - } else { - LOG(INFO) << "Processing triples from " << spec.size() - << " input streams ..." << std::endl; - } - if (parallelParsingSpecifiedViaJson.value_or(false) == true) { + std::string pleaseUseParallelParsingOption = + "please use the command-line option --parse-parallel or -p"; + // Parallel parsing specified in the `settings.json` file. This is deprecated + // for a single input stream and forbidden for multiple input streams. + if (parallelParsingSpecifiedViaJson.has_value()) { if (spec.size() == 1) { - LOG(WARN) << "Parallel parsing set to `true` in the `.settings.json` " - "file; this is deprecated, please use the command-line " - " option --parse-parallel or -p instead" - << std::endl; - spec.at(0).parseInParallel_ = true; + LOG(WARN) << "Parallel parsing set in the `.settings.json` file; this is " + "deprecated, " + << pleaseUseParallelParsingOption << std::endl; + spec.at(0).parseInParallel_ = parallelParsingSpecifiedViaJson.value(); } else { - throw std::runtime_error{ - "For more than one input file, the parallel parsing must not be " - "specified via the `.settings.json` file, but has to be specified " - " via the command-line option --parse-parallel or -p"}; + throw std::runtime_error{absl::StrCat( + "Parallel parsing set in the `.settings.json` file; this is " + "forbidden for multiple input streams, ", + pleaseUseParallelParsingOption)}; } } - bool setParallelParsingImplicitly = - spec.size() == 1 && !parallelParsingSpecifiedViaJson.has_value() && - !spec.at(0).parseInParallelSetExplicitly_; - if (setParallelParsingImplicitly) { + // For a single input stream, if parallel parsing is not specified explicitly + // on the command line, we set if implicitly for backward compatibility. + if (!parallelParsingSpecifiedViaJson.has_value() && spec.size() == 1 && + !spec.at(0).parseInParallelSetExplicitly_) { LOG(WARN) << "Implicitly using the parallel parser for a single input file " "for reasons of backward compatibility; this is deprecated, " - "please explicitly use the command-line option " - "--parse-parallel or -p instead" - << std::endl; + << pleaseUseParallelParsingOption << std::endl; spec.at(0).parseInParallel_ = true; } + // For a single input stream, show the name and whether we parse in parallel. + // For multiple input streams, only show the number of streams. + if (spec.size() == 1) { + LOG(INFO) << "Parsing triples from single input stream " + << spec.at(0).filename_ << " (parallel = " + << (spec.at(0).parseInParallel_ ? "true" : "false") << ") ..." + << std::endl; + } else { + LOG(INFO) << "Processing triples from " << spec.size() + << " input streams ..." << std::endl; + } } // _____________________________________________________________________________ diff --git a/src/index/InputFileSpecification.h b/src/index/InputFileSpecification.h index 6cd2e55b94..efc8c204ce 100644 --- a/src/index/InputFileSpecification.h +++ b/src/index/InputFileSpecification.h @@ -1,7 +1,7 @@ -// Copyright 2024, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach(joka921) -// +// Copyright 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Author: Johannes Kalmbach + #pragma once #include @@ -29,7 +29,7 @@ struct InputFileSpecification { // Remember if the value for parallel parsing was set explicitly (via the // command line). - bool parseInParallelSetExplicitly_; + bool parseInParallelSetExplicitly_ = false; bool operator==(const InputFileSpecification&) const = default; }; diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index 63467eb01f..a2c28a1b5e 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -73,8 +73,8 @@ class GroupByTest : public ::testing::Test { _index.setKbName("group_by_test"); _index.setTextName("group_by_test"); _index.setOnDiskBase("group_ty_test"); - _index.createFromFiles({{"group_by_test.nt", qlever::Filetype::Turtle, - std::nullopt, false, false}}); + _index.createFromFiles( + {{"group_by_test.nt", qlever::Filetype::Turtle, std::nullopt}}); _index.addTextFromContextFile("group_by_test.words", false); _index.buildDocsDB("group_by_test.documents"); diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 1dabacfbb8..76e81e2916 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -1,6 +1,8 @@ -// Copyright 2015, University of Freiburg, +// Copyright 2015 - 2024, University of Freiburg, // Chair of Algorithms and Data Structures. -// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) +// Authors: Björn Buchhold [2015 - 2017] +// Johannes Kalmbach +// Hannah Bast #include #include @@ -527,50 +529,82 @@ TEST(IndexTest, trivialGettersAndSetters) { TEST(IndexTest, updateInputFileSpecificationsAndLog) { using enum qlever::Filetype; - std::vector files{ - {"singleFile.ttl", Turtle, std::nullopt, false, false}}; + std::vector singleFileSpec = { + {"singleFile.ttl", Turtle, std::nullopt}}; + std::vector twoFilesSpec = { + {"firstFile.ttl", Turtle, std::nullopt}, + {"secondFile.ttl", Turtle, std::nullopt}}; using namespace ::testing; + + // Parallel parsing not specified anywhere. For a single input stream, we then + // default to `true` for reasons of backwards compatibility, but this is + // deprecated. For multiple input streams, we default to `false` and this is + // normal behavior. + { + singleFileSpec.at(0).parseInParallelSetExplicitly_ = false; + testing::internal::CaptureStdout(); + IndexImpl::updateInputFileSpecificationsAndLog(singleFileSpec, + std::nullopt); + EXPECT_THAT(testing::internal::GetCapturedStdout(), + AllOf(HasSubstr("singleFile.ttl"), HasSubstr("deprecated"))); + EXPECT_TRUE(singleFileSpec.at(0).parseInParallel_); + } { + twoFilesSpec.at(0).parseInParallelSetExplicitly_ = false; + twoFilesSpec.at(1).parseInParallelSetExplicitly_ = false; testing::internal::CaptureStdout(); - IndexImpl::updateInputFileSpecificationsAndLog(files, false); + IndexImpl::updateInputFileSpecificationsAndLog(twoFilesSpec, std::nullopt); EXPECT_THAT( testing::internal::GetCapturedStdout(), - AllOf(HasSubstr("from singleFile.ttl"), Not(HasSubstr("parallel")))); - EXPECT_FALSE(files.at(0).parseInParallel_); + AllOf(HasSubstr("from 2 input streams"), Not(HasSubstr("deprecated")))); + EXPECT_FALSE(twoFilesSpec.at(0).parseInParallel_); + EXPECT_FALSE(twoFilesSpec.at(1).parseInParallel_); } + // Parallel parsing specified on the command line and not in the + // `settings.json`. This is normal behavior (no deprecation warning). { + singleFileSpec.at(0).parseInParallel_ = true; + singleFileSpec.at(0).parseInParallelSetExplicitly_ = true; testing::internal::CaptureStdout(); - IndexImpl::updateInputFileSpecificationsAndLog(files, true); - EXPECT_THAT(testing::internal::GetCapturedStdout(), - AllOf(HasSubstr("from singleFile.ttl"), - HasSubstr("settings.json"), HasSubstr("deprecated"))); - EXPECT_TRUE(files.at(0).parseInParallel_); + IndexImpl::updateInputFileSpecificationsAndLog(singleFileSpec, + std::nullopt); + EXPECT_THAT( + testing::internal::GetCapturedStdout(), + AllOf(HasSubstr("singleFile.ttl"), Not(HasSubstr("deprecated")))); + EXPECT_TRUE(singleFileSpec.at(0).parseInParallel_); } { + twoFilesSpec.at(0).parseInParallel_ = true; + twoFilesSpec.at(1).parseInParallel_ = false; + twoFilesSpec.at(0).parseInParallelSetExplicitly_ = true; + twoFilesSpec.at(1).parseInParallelSetExplicitly_ = true; testing::internal::CaptureStdout(); - IndexImpl::updateInputFileSpecificationsAndLog(files, std::nullopt); - EXPECT_THAT(testing::internal::GetCapturedStdout(), - AllOf(HasSubstr("from singleFile.ttl"), - HasSubstr("single input"), HasSubstr("deprecated"))); - EXPECT_TRUE(files.at(0).parseInParallel_); + IndexImpl::updateInputFileSpecificationsAndLog(twoFilesSpec, std::nullopt); + EXPECT_THAT( + testing::internal::GetCapturedStdout(), + AllOf(HasSubstr("from 2 input streams"), Not(HasSubstr("deprecated")))); + EXPECT_TRUE(twoFilesSpec.at(0).parseInParallel_); + EXPECT_FALSE(twoFilesSpec.at(1).parseInParallel_); } + // Parallel parsing not specified on the command line, but explicitly set in + // the `settings.json` file. This is deprecated for a single input + // stream and forbidden for multiple input streams. { - files.emplace_back("secondFile.ttl", Turtle, std::nullopt, false); - auto filesCopy = files; + singleFileSpec.at(0).parseInParallelSetExplicitly_ = false; testing::internal::CaptureStdout(); - IndexImpl::updateInputFileSpecificationsAndLog(files, false); + IndexImpl::updateInputFileSpecificationsAndLog(singleFileSpec, true); EXPECT_THAT(testing::internal::GetCapturedStdout(), - AllOf(HasSubstr("from 2 input streams"), - Not(HasSubstr("is deprecated")))); - EXPECT_EQ(files, filesCopy); + AllOf(HasSubstr("singleFile.ttl"), HasSubstr("deprecated"))); + EXPECT_TRUE(singleFileSpec.at(0).parseInParallel_); } - { + twoFilesSpec.at(0).parseInParallelSetExplicitly_ = false; + twoFilesSpec.at(1).parseInParallelSetExplicitly_ = false; AD_EXPECT_THROW_WITH_MESSAGE( - IndexImpl::updateInputFileSpecificationsAndLog(files, true), - HasSubstr("but has to be specified")); + IndexImpl::updateInputFileSpecificationsAndLog(twoFilesSpec, true), + AllOf(Not(HasSubstr("from 2 input streams")), HasSubstr("forbidden"))); } } diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 7d9a9ef5d2..79eb77b0d5 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -178,7 +178,7 @@ Index makeTestIndex(const std::string& indexBasename, index.setSettingsFile(inputFilename + ".settings.json"); index.loadAllPermutations() = loadAllPermutations; qlever::InputFileSpecification spec{inputFilename, qlever::Filetype::Turtle, - std::nullopt, false, false}; + std::nullopt}; index.createFromFiles({spec}); if (createTextIndex) { index.addTextFromContextFile("", true); From b81d670fa94bd512f23c4dadd8fdc5d1a7a2aea0 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sat, 16 Nov 2024 13:53:23 +0100 Subject: [PATCH 5/5] Try to bring patch coverage to 100% --- test/IndexTest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 76e81e2916..5bb641b1ef 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -563,8 +563,8 @@ TEST(IndexTest, updateInputFileSpecificationsAndLog) { // Parallel parsing specified on the command line and not in the // `settings.json`. This is normal behavior (no deprecation warning). - { - singleFileSpec.at(0).parseInParallel_ = true; + for (auto parallelParsing : {true, false}) { + singleFileSpec.at(0).parseInParallel_ = parallelParsing; singleFileSpec.at(0).parseInParallelSetExplicitly_ = true; testing::internal::CaptureStdout(); IndexImpl::updateInputFileSpecificationsAndLog(singleFileSpec, @@ -572,7 +572,7 @@ TEST(IndexTest, updateInputFileSpecificationsAndLog) { EXPECT_THAT( testing::internal::GetCapturedStdout(), AllOf(HasSubstr("singleFile.ttl"), Not(HasSubstr("deprecated")))); - EXPECT_TRUE(singleFileSpec.at(0).parseInParallel_); + EXPECT_EQ(singleFileSpec.at(0).parseInParallel_, parallelParsing); } { twoFilesSpec.at(0).parseInParallel_ = true;