diff --git a/pygribjump/src/pygribjump/pygribjump.py b/pygribjump/src/pygribjump/pygribjump.py index e52e7a0..b01ef16 100644 --- a/pygribjump/src/pygribjump/pygribjump.py +++ b/pygribjump/src/pygribjump/pygribjump.py @@ -258,7 +258,7 @@ class ExtractionRequest: The ranges to extract. """ def __init__(self, req, ranges, gridHash=None): - reqstr = "retrieve,"+dic_to_request(req) + reqstr = dic_to_request(req) rangestr = list_to_rangestr(ranges) request = ffi.new('gribjump_extraction_request_t**') c_reqstr = ffi.new("char[]", reqstr.encode()) diff --git a/src/gribjump/Engine.cc b/src/gribjump/Engine.cc index c33d82d..d9a6c1c 100644 --- a/src/gribjump/Engine.cc +++ b/src/gribjump/Engine.cc @@ -11,8 +11,10 @@ /// @author Christopher Bradley #include "eckit/log/Plural.h" +#include "eckit/utils/StringTools.h" #include "metkit/mars/MarsExpension.h" +#include "metkit/mars/MarsParser.h" #include "gribjump/Engine.h" #include "gribjump/ExtractionItem.h" @@ -26,95 +28,8 @@ namespace gribjump { //---------------------------------------------------------------------------------------------------------------------- // Stringify requests and keys alphabetically -namespace -{ -std::string requestToStr(const metkit::mars::MarsRequest& request) { - std::stringstream ss; - std::string separator = ""; - std::vector keys = request.params(); - std::sort(keys.begin(), keys.end()); - for(const auto& key : keys) { - ss << separator << key << "=" << request[key]; - separator = ","; - } - return ss.str(); -} - -//---------------------------------------------------------------------------------------------------------------------- - - -class CollectFlattenedRequests : public metkit::mars::FlattenCallback { -public: - CollectFlattenedRequests(std::vector& flattenedRequests) : flattenedRequests_(flattenedRequests) {} - - virtual void operator()(const metkit::mars::MarsRequest& req) { - flattenedRequests_.push_back(req); - } - - std::vector& flattenedRequests_; -}; - -std::vector flattenRequest(const metkit::mars::MarsRequest& request) { - - metkit::mars::MarsExpension expansion(false); - metkit::mars::DummyContext ctx; - std::vector flattenedRequests; - - CollectFlattenedRequests cb(flattenedRequests); - expansion.flatten(ctx, request, cb); - - LOG_DEBUG_LIB(LibGribJump) << "Base request: " << request << std::endl; - - for (const auto& req : flattenedRequests) { - LOG_DEBUG_LIB(LibGribJump) << " Flattened request: " << req << std::endl; - } - - return flattenedRequests; -} - -// Stringify requests, and flatten if necessary - -typedef std::map> flattenedKeys_t; - -flattenedKeys_t buildFlatKeys(const ExtractionRequests& requests, bool flatten) { - - flattenedKeys_t keymap; - - for (const auto& req : requests) { - const metkit::mars::MarsRequest& baseRequest = req.request(); - keymap[baseRequest] = std::vector(); - - // Assume baseRequest has cardinality >= 1 and may need to be flattened - if (flatten) { - std::vector flat = flattenRequest(baseRequest); - for (const auto& r : flat) { - keymap[baseRequest].push_back(requestToStr(r)); - } - } - - // Assume baseRequest has cardinality 1 - else { - keymap[baseRequest].push_back(requestToStr(baseRequest)); - } - - eckit::Log::debug() << "Flattened keys for request " << baseRequest << ": " << keymap[baseRequest] << std::endl; - } - - return keymap; -} - -metkit::mars::MarsRequest unionRequest(const MarsRequests& requests) { - - /// @todo: we should do some check not to merge on keys like class and stream - metkit::mars::MarsRequest unionRequest = requests.front(); - for(size_t i = 1; i < requests.size(); ++i) { - unionRequest.merge(requests[i]); - } - - eckit::Log::info() << "Gribjump: Union request is " << unionRequest << std::endl; - - return unionRequest; -} +namespace { +// ---------------------------------------------------------------------------------------------------------------------- bool isRemote(eckit::URI uri) { return uri.scheme() == "fdb"; @@ -127,43 +42,73 @@ Engine::Engine() {} Engine::~Engine() {} -ExItemMap Engine::buildKeyToExtractionItem(const ExtractionRequests& requests, bool flatten){ - ExItemMap keyToExtractionItem; - - flattenedKeys_t flatKeys = buildFlatKeys(requests, flatten); // Map from base request to {flattened keys} - - LOG_DEBUG_LIB(LibGribJump) << "Built flat keys" << std::endl; +metkit::mars::MarsRequest Engine::buildRequestMap(ExtractionRequests& requests, ExItemMap& keyToExtractionItem ){ + // Split strings into one unified map + // We also canonicalise the requests such that their keys are in alphabetical order + /// @todo: Note that it is not in general possible to arbitrary requests into a single request. In future, we should look into + /// merging into the minimum number of requests. + + std::map> keyValues; + for (auto& r : requests) { + const std::string& s = r.requestString(); + std::vector kvs = eckit::StringTools::split(",", s); /// @todo might be faster to use tokenizer directly. + for (auto& kv : kvs) { + std::vector kv_s = eckit::StringTools::split("=", kv); + if (kv_s.size() != 2) continue; // ignore verb + keyValues[kv_s[0]].insert(kv_s[1]); + } - // Create the 1-to-1 map - for (size_t i = 0; i < requests.size(); i++) { - const metkit::mars::MarsRequest& basereq = requests[i].request(); - const std::vector keys = flatKeys[basereq]; - for (const auto& key : keys) { - ASSERT(keyToExtractionItem.find(key) == keyToExtractionItem.end()); /// @todo support duplicated requests? - auto extractionItem = std::make_unique(basereq, requests[i].ranges()); - extractionItem->gridHash(requests[i].gridHash()); - keyToExtractionItem.emplace(key, std::move(extractionItem)); // 1-to-1-map + // Canonicalise string by sorting keys + std::sort(kvs.begin(), kvs.end()); + std::string canonicalised = ""; + for (auto& kv : kvs) { + canonicalised += kv; + if (kv != kvs.back()) { + canonicalised += ","; + } } + ASSERT(keyToExtractionItem.find(canonicalised) == keyToExtractionItem.end()); // no repeats + r.requestString(canonicalised); + auto extractionItem = std::make_unique(canonicalised, r.ranges()); + extractionItem->gridHash(r.gridHash()); + keyToExtractionItem.emplace(canonicalised, std::move(extractionItem)); // 1-to-1-map } - return keyToExtractionItem; -} - -filemap_t Engine::buildFileMap(const ExtractionRequests& requests, ExItemMap& keyToExtractionItem) { - // Map files to ExtractionItem - eckit::Timer timer("Gribjump Engine: Building file map"); - - std::vector marsrequests; - for (const auto& req : requests) { - marsrequests.push_back(req.request()); + // Construct the union request + + std::string result = "retrieve,"; + size_t i = 0; + for (auto& [key, values] : keyValues) { + result += key + "="; + if (values.size() == 1) { + result += *values.begin(); + } else { + size_t j = 0; + for (auto& value : values) { + result += value; + if (j != values.size() - 1) { + result += "/"; + } + j++; + } + } + if (i != keyValues.size() - 1) { + result += ","; + } + i++; } - const metkit::mars::MarsRequest req = unionRequest(marsrequests); - MetricsManager::instance().set("union_request", req.asString()); - timer.reset("Gribjump Engine: Flattened requests and constructed union request"); + std::istringstream istream(result); + metkit::mars::MarsParser parser(istream); + std::vector unionRequests = parser.parse(); + ASSERT(unionRequests.size() == 1); - filemap_t filemap = FDBLister::instance().fileMap(req, keyToExtractionItem); + return unionRequests[0]; +} +filemap_t Engine::buildFileMap(const metkit::mars::MarsRequest& unionrequest, ExItemMap& keyToExtractionItem) { + // Map files to ExtractionItem + filemap_t filemap = FDBLister::instance().fileMap(unionrequest, keyToExtractionItem); return filemap; } @@ -174,10 +119,11 @@ void Engine::forwardRemoteExtraction(filemap_t& filemap) { const std::map& servermap_str = LibGribJump::instance().config().serverMap(); ASSERT(!servermap_str.empty()); - for (auto& [fdb, gj] : servermap_str) { - LOG_DEBUG_LIB(LibGribJump) << "Servermap: " << fdb << " -> " << gj << std::endl; + if (LibGribJump::instance().debug()) { + for (auto& [fdb, gj] : servermap_str) { + LOG_DEBUG_LIB(LibGribJump) << "Servermap: " << fdb << " -> " << gj << std::endl; + } } - std::unordered_map servermap; for (auto& [fdb, gj] : servermap_str) { eckit::net::Endpoint fdbEndpoint(fdb); @@ -247,11 +193,14 @@ void Engine::scheduleTasks(filemap_t& filemap){ taskGroup_.waitForTasks(); } -ResultsMap Engine::extract(const ExtractionRequests& requests, bool flatten) { +ResultsMap Engine::extract(ExtractionRequests& requests) { eckit::Timer timer("Engine::extract"); - ExItemMap keyToExtractionItem = buildKeyToExtractionItem(requests, flatten); // Owns the ExtractionItems - filemap_t filemap = buildFileMap(requests, keyToExtractionItem); + + ExItemMap keyToExtractionItem; + metkit::mars::MarsRequest unionreq = buildRequestMap(requests, keyToExtractionItem); + + filemap_t filemap = buildFileMap(unionreq, keyToExtractionItem); MetricsManager::instance().set("elapsed_build_filemap", timer.elapsed()); timer.reset("Gribjump Engine: Built file map"); diff --git a/src/gribjump/Engine.h b/src/gribjump/Engine.h index 57f5760..5302e96 100644 --- a/src/gribjump/Engine.h +++ b/src/gribjump/Engine.h @@ -28,7 +28,7 @@ class Engine { Engine(); ~Engine(); - ResultsMap extract(const ExtractionRequests& requests, bool flattenRequests = false); + ResultsMap extract(ExtractionRequests& requests); // byfiles: scan entire file, not just fields matching request size_t scan(const MarsRequests& requests, bool byfiles = false); @@ -43,10 +43,10 @@ class Engine { private: - filemap_t buildFileMap(const ExtractionRequests& requests, ExItemMap& keyToExtractionItem); - ExItemMap buildKeyToExtractionItem(const ExtractionRequests& requests, bool flatten); + filemap_t buildFileMap(const metkit::mars::MarsRequest& unionrequest, ExItemMap& keyToExtractionItem); ResultsMap collectResults(ExItemMap& keyToExtractionItem); void forwardRemoteExtraction(filemap_t& filemap); + metkit::mars::MarsRequest buildRequestMap(ExtractionRequests& requests, ExItemMap& keyToExtractionItem ); private: diff --git a/src/gribjump/ExtractionData.cc b/src/gribjump/ExtractionData.cc index 2dc249e..2f576f0 100644 --- a/src/gribjump/ExtractionData.cc +++ b/src/gribjump/ExtractionData.cc @@ -35,8 +35,6 @@ std::vector decodeVector(eckit::Stream& s) { return std::vector(data, data + size); } -// todo: encodeVectorVector ? - } // namespace ExtractionResult::ExtractionResult() {} @@ -120,15 +118,16 @@ eckit::Stream& operator<<(eckit::Stream& s, const ExtractionResult& o) { //--------------------------------------------------------------------------------------------------------------------- -ExtractionRequest::ExtractionRequest(const metkit::mars::MarsRequest& request, const std::vector& ranges, std::string gridHash): +ExtractionRequest::ExtractionRequest(const std::string& request, const std::vector& ranges, std::string gridHash): ranges_(ranges), request_(request), gridHash_(gridHash) {} + ExtractionRequest::ExtractionRequest() {} ExtractionRequest::ExtractionRequest(eckit::Stream& s) { - request_ = metkit::mars::MarsRequest(s); + s >> request_; s >> gridHash_; size_t numRanges; s >> numRanges; @@ -139,30 +138,6 @@ ExtractionRequest::ExtractionRequest(eckit::Stream& s) { } } -std::vector ExtractionRequest::split(const std::string& key) const { - - std::vector reqs = request_.split(key); - - std::vector requests; - requests.reserve(reqs.size()); - for (auto& r : reqs) { - requests.push_back(ExtractionRequest(r, ranges_)); - } - return requests; -} - -std::vector ExtractionRequest::split(const std::vector& keys) const { - - std::vector reqs = request_.split(keys); - - std::vector requests; - requests.reserve(reqs.size()); - for (auto& r : reqs) { - requests.push_back(ExtractionRequest(r, ranges_)); - } - return requests; -} - eckit::Stream& operator<<(eckit::Stream& s, const ExtractionRequest& o) { o.encode(s); return s; diff --git a/src/gribjump/ExtractionData.h b/src/gribjump/ExtractionData.h index 9e2ab98..83e49e0 100644 --- a/src/gribjump/ExtractionData.h +++ b/src/gribjump/ExtractionData.h @@ -24,9 +24,6 @@ namespace gribjump { //---------------------------------------------------------------------------------------------------------------------- - -/// @todo This class is now redundant thanks to ExtractionItem. - class ExtractionResult { public: // methods @@ -77,13 +74,12 @@ class ExtractionRequest { public: // methods ExtractionRequest(); - ExtractionRequest(const metkit::mars::MarsRequest&, const std::vector&, std::string gridHash=""); + ExtractionRequest(const std::string&, const std::vector&, std::string gridHash=""); explicit ExtractionRequest(eckit::Stream& s); - std::vector split(const std::vector& keys) const; - std::vector split(const std::string& key) const; const std::vector& ranges() const {return ranges_;} - const metkit::mars::MarsRequest& request() const {return request_;} + const std::string& requestString() const {return request_;} + void requestString(const std::string& s) {request_ = s;} const std::string& gridHash() const {return gridHash_;} private: // methods @@ -94,7 +90,7 @@ class ExtractionRequest { private: // members std::vector ranges_; - metkit::mars::MarsRequest request_; + std::string request_; std::string gridHash_; }; diff --git a/src/gribjump/ExtractionItem.h b/src/gribjump/ExtractionItem.h index 607b2fd..f90b892 100644 --- a/src/gribjump/ExtractionItem.h +++ b/src/gribjump/ExtractionItem.h @@ -26,7 +26,7 @@ class ExtractionItem : public eckit::NonCopyable { public: - ExtractionItem(const metkit::mars::MarsRequest& baseRequest, const Ranges& ranges): + ExtractionItem(const std::string& baseRequest, const Ranges& ranges): request_(baseRequest), ranges_(ranges) { /// @note We could reserve the values and mask here based on the ranges /// @note We're not always going to have mars requests (e.g. file name, tree, ...) More generic object? @@ -41,7 +41,7 @@ class ExtractionItem : public eckit::NonCopyable { ExValues& values() { return values_; } const ExMask& mask() const { return mask_; } const Ranges& intervals() const { return ranges_; } - const metkit::mars::MarsRequest& request() const { return request_; } + const std::string& request() const { return request_; } /// @note alternatively we could store the offset directly instead of the uri. eckit::Offset offset() const { @@ -68,7 +68,7 @@ class ExtractionItem : public eckit::NonCopyable { void debug_print() const { std::cout << "ExtractionItem: {" << std::endl; - std::cout << " MarsRequest: " << request_ << std::endl; + std::cout << " RequestString: " << request_ << std::endl; std::cout << " Ranges: " << std::endl; for (auto& r : ranges_) { std::cout << " {" << r.first << ", " << r.second << "}" << std::endl; @@ -99,7 +99,7 @@ class ExtractionItem : public eckit::NonCopyable { private: - const metkit::mars::MarsRequest request_; + const std::string request_; const Ranges ranges_; // Set on Listing diff --git a/src/gribjump/GribJump.cc b/src/gribjump/GribJump.cc index d40c8c2..0a3f1c5 100644 --- a/src/gribjump/GribJump.cc +++ b/src/gribjump/GribJump.cc @@ -50,7 +50,7 @@ size_t GribJump::scan(const std::vector requests, boo } -std::vector>> GribJump::extract(const std::vector& requests, const LogContext& ctx) { +std::vector>> GribJump::extract(std::vector& requests, const LogContext& ctx) { ContextManager::instance().set(ctx); if (requests.empty()) { diff --git a/src/gribjump/GribJump.h b/src/gribjump/GribJump.h index ae29e7e..c9a3e43 100644 --- a/src/gribjump/GribJump.h +++ b/src/gribjump/GribJump.h @@ -46,7 +46,7 @@ class GribJump { size_t scan(const std::vector& paths, const LogContext& ctx=LogContext("none")); size_t scan(std::vector requests, bool byfiles = false, const LogContext& ctx=LogContext("none")); - std::vector>> extract(const std::vector& requests, const LogContext& ctx=LogContext("none")); + std::vector>> extract(std::vector& requests, const LogContext& ctx=LogContext("none")); std::vector> extract(const eckit::PathName& path, const std::vector& offsets, const std::vector>& ranges, const LogContext& ctx=LogContext("none")); std::map> axes(const std::string& request, int level=3, const LogContext& ctx=LogContext("none")); diff --git a/src/gribjump/GribJumpBase.h b/src/gribjump/GribJumpBase.h index 190355f..6961376 100644 --- a/src/gribjump/GribJumpBase.h +++ b/src/gribjump/GribJumpBase.h @@ -33,7 +33,7 @@ namespace fdb5 { namespace gribjump { -using ResultsMap = std::map>>; +using ResultsMap = std::map>>; class GribJumpBase : public eckit::NonCopyable { public: @@ -45,9 +45,9 @@ class GribJumpBase : public eckit::NonCopyable { size_t virtual scan(const std::vector& paths) = 0; - virtual size_t scan(const std::vector requests, bool byfiles) = 0; + virtual size_t scan(const std::vector& requests, bool byfiles) = 0; - virtual std::vector>> extract(std::vector) = 0; + virtual std::vector>> extract(std::vector&) = 0; virtual std::vector> extract(const eckit::PathName& path, const std::vector& offsets, const std::vector>& ranges) = 0; virtual std::map> axes(const std::string& request, int level) = 0; diff --git a/src/gribjump/GribJumpException.h b/src/gribjump/GribJumpException.h index c696a48..f24453c 100644 --- a/src/gribjump/GribJumpException.h +++ b/src/gribjump/GribJumpException.h @@ -40,4 +40,15 @@ class DataNotFoundException : public GribJumpException { GribJumpException("DataNotFound. " + msg, here) {} }; +class JumpInfoExtractionDisabled : public GribJumpException { +public: + + JumpInfoExtractionDisabled(const std::string& msg) : + GribJumpException("Lazy JumpInfo extraction has been disabled. " + msg) {} + + JumpInfoExtractionDisabled(const std::string& msg, const eckit::CodeLocation& here) : + GribJumpException("Lazy JumpInfo extraction has been disabled. " + msg, here) {} +}; + + } // namespace gribjump diff --git a/src/gribjump/Lister.cc b/src/gribjump/Lister.cc index 0a5853b..8b644eb 100644 --- a/src/gribjump/Lister.cc +++ b/src/gribjump/Lister.cc @@ -81,10 +81,11 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons fdb5::FDBToolRequest fdbreq(unionRequest); auto listIter = fdb_.list(fdbreq, true); + size_t fdb_count = 0; size_t count = 0; - fdb5::ListElement elem; while (listIter.next(elem)) { + fdb_count++; std::string key = fdbkeyToStr(elem.combinedKey()); @@ -93,7 +94,6 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons // Set the URI in the ExtractionItem eckit::URI uri = elem.location().fullUri(); - ExtractionItem* extractionItem = reqToExtractionItem.at(key).get(); extractionItem->URI(uri); @@ -112,27 +112,28 @@ filemap_t FDBLister::fileMap(const metkit::mars::MarsRequest& unionRequest, cons count++; } - LOG_DEBUG_LIB(LibGribJump) << "Found " << count << " fields in " << filemap.size() << " files" << std::endl; - + LOG_DEBUG_LIB(LibGribJump) << "FDB found " << fdb_count << " fields. Matched " << count << " fields in " << filemap.size() << " files" << std::endl; if (count != reqToExtractionItem.size()) { - eckit::Log::warning() << "Warning: Number of fields found (" << count << ") does not match number of keys in extractionItem map (" << reqToExtractionItem.size() << ")" << std::endl; + eckit::Log::warning() << "Warning: Number of fields matched (" << count << ") does not match number of keys in extractionItem map (" << reqToExtractionItem.size() << ")" << std::endl; if (!allowMissing_) { std::stringstream ss; - ss << "Found " << count << " fields but " << reqToExtractionItem.size() << " were requested." << std::endl; + ss << "Matched " << count << " fields but " << reqToExtractionItem.size() << " were requested." << std::endl; + ss << "Union request: " << unionRequest << std::endl; throw DataNotFoundException(ss.str()); } } - // print the file map - LOG_DEBUG_LIB(LibGribJump) << "File map: " << std::endl; - for (const auto& file : filemap) { - LOG_DEBUG_LIB(LibGribJump) << " file=" << file.first << ", Offsets=["; - for (const auto& extractionItem : file.second) { - LOG_DEBUG_LIB(LibGribJump) << extractionItem->offset() << ", "; + if (LibGribJump::instance().debug()) { + LOG_DEBUG_LIB(LibGribJump) << "File map: " << std::endl; + for (const auto& file : filemap) { + LOG_DEBUG_LIB(LibGribJump) << " file=" << file.first << ", Offsets=["; + for (const auto& extractionItem : file.second) { + LOG_DEBUG_LIB(LibGribJump) << extractionItem->offset() << ", "; + } + LOG_DEBUG_LIB(LibGribJump) << "]" << std::endl; } - LOG_DEBUG_LIB(LibGribJump) << "]" << std::endl; } - + return filemap; } diff --git a/src/gribjump/LocalGribJump.cc b/src/gribjump/LocalGribJump.cc index 1386ce9..5365078 100644 --- a/src/gribjump/LocalGribJump.cc +++ b/src/gribjump/LocalGribJump.cc @@ -53,7 +53,7 @@ size_t LocalGribJump::scan(const std::vector& paths) { return engine.scan(paths); } -size_t LocalGribJump::scan(const std::vector requests, bool byfiles) { +size_t LocalGribJump::scan(const std::vector& requests, bool byfiles) { Engine engine; return engine.scan(requests, byfiles); } @@ -84,20 +84,19 @@ std::vector> LocalGribJump::extract(const eckit: } /// @todo, change API, remove extraction request -std::vector>> LocalGribJump::extract(ExtractionRequests requests) { +std::vector>> LocalGribJump::extract(ExtractionRequests& requests) { - bool flatten = true; Engine engine; - ResultsMap results = engine.extract(requests, flatten); + ResultsMap results = engine.extract(requests); engine.raiseErrors(); std::vector>> extractionResults; for (auto& req : requests) { - auto it = results.find(req.request()); + auto it = results.find(req.requestString()); + ASSERT(it != results.end()); std::vector> res; for (auto& item : it->second) { - // std::unique_ptr r(new ExtractionResult(item->values(), item->mask())); res.push_back(std::make_unique(item->values(), item->mask())); } @@ -107,7 +106,7 @@ std::vector>> LocalGribJump::extra return extractionResults; } -ResultsMap LocalGribJump::extract(const std::vector& requests, const std::vector>& ranges, bool flatten) { +ResultsMap LocalGribJump::extract(const std::vector& requests, const std::vector>& ranges) { Engine engine; ExtractionRequests extractionRequests; @@ -115,7 +114,7 @@ ResultsMap LocalGribJump::extract(const std::vector& requests, cons extractionRequests.push_back(ExtractionRequest(requests[i], ranges[i])); } - ResultsMap results = engine.extract(extractionRequests, flatten); + ResultsMap results = engine.extract(extractionRequests); engine.raiseErrors(); return results; } diff --git a/src/gribjump/LocalGribJump.h b/src/gribjump/LocalGribJump.h index dbaa238..f0fdf17 100644 --- a/src/gribjump/LocalGribJump.h +++ b/src/gribjump/LocalGribJump.h @@ -32,14 +32,14 @@ class LocalGribJump : public GribJumpBase { /// @param path full path to grib file size_t scan(const std::vector& paths) override; - size_t scan(const std::vector requests, bool byfiles) override; + size_t scan(const std::vector& requests, bool byfiles) override; // new API! - ResultsMap extract(const std::vector& requests, const std::vector>& ranges, bool flatten); + ResultsMap extract(const std::vector& requests, const std::vector>& ranges); // old API std::vector> extract(const eckit::PathName& path, const std::vector& offsets, const std::vector>& ranges) override; - std::vector>> extract(std::vector) override; + std::vector>> extract(std::vector&) override; std::map> axes(const std::string& request, int level) override; diff --git a/src/gribjump/Task.cc b/src/gribjump/Task.cc index cb22c48..2ca85d1 100644 --- a/src/gribjump/Task.cc +++ b/src/gribjump/Task.cc @@ -186,7 +186,7 @@ void FileExtractionTask::extract() { throw eckit::BadValue("Grid hash was not specified in request but is required. (Extraction item " + std::to_string(i) + " in file " + fname_ + ")"); } if (!ignoreGrid_ && (expectedHash != info.md5GridSection())) { - throw eckit::BadValue("Grid hash mismatch for extraction item " + std::to_string(i) + " in file " + fname_ + ". Expected: " + expectedHash + ", got: " + info.md5GridSection()); + throw eckit::BadValue("Grid hash mismatch for extraction item " + std::to_string(i) + " in file " + fname_ + ". Request specified: " + expectedHash + ", JumpInfo contains: " + info.md5GridSection()); } std::unique_ptr jumper(JumperFactory::instance().build(info)); // todo, dont build a new jumper for each info. diff --git a/src/gribjump/gribjump_c.cc b/src/gribjump/gribjump_c.cc index 0b323a5..9aa99bf 100644 --- a/src/gribjump/gribjump_c.cc +++ b/src/gribjump/gribjump_c.cc @@ -132,29 +132,20 @@ int gribjump_delete_handle(gribjump_handle_t* handle) { int gribjump_new_request(gribjump_extraction_request_t** request, const char* reqstr, const char* rangesstr, const char* gridhash) { return wrapApiFunction([=] { - - // reqstr is a string representation of a metkit::mars::MarsRequest + // reqstr is a request string, we *ASSUME* that it resembles a valid mars request for a SINGLE field. // rangesstr is a comma-separated list of ranges, e.g. "0-10,20-30" - // NB: Treat the requests as raw requests. - std::istringstream iss(reqstr); - metkit::mars::MarsParser parser(iss); - std::vector requests = parser.parse(); - ASSERT(requests.size() == 1); - metkit::mars::MarsRequest mreq(requests[0]); - // Parse the ranges string std::vector ranges = eckit::StringTools::split(",", rangesstr); std::vector rangevec; for (const auto& range : ranges) { - std::vector kv = eckit::StringTools::split("-", range); + std::vector kv = eckit::StringTools::split("-", range); // this is silly, we should just pass the values as integers ASSERT(kv.size() == 2); rangevec.push_back(std::make_pair(std::stoi(kv[0]), std::stoi(kv[1]))); } std::string gridhash_str = gridhash ? std::string(gridhash) : ""; - *request = new gribjump_extraction_request_t(mreq, rangevec, gridhash_str); - + *request = new gribjump_extraction_request_t(reqstr, rangevec, gridhash_str); }); } @@ -227,7 +218,8 @@ int gribjump_delete_result(gribjump_extraction_result_t* result) { int extract_single(gribjump_handle_t* handle, gribjump_extraction_request_t* request, gribjump_extraction_result_t*** results_array, unsigned long* nfields) { return wrapApiFunction([=] { ExtractionRequest req = *request; - std::vector>> resultsv = handle->extract(std::vector{req}); + std::vector vec = {req}; + std::vector>> resultsv = handle->extract(vec); ASSERT(resultsv.size() == 1); std::vector> results = std::move(resultsv[0]); diff --git a/src/gribjump/info/InfoCache.cc b/src/gribjump/info/InfoCache.cc index f433899..8eb7a94 100644 --- a/src/gribjump/info/InfoCache.cc +++ b/src/gribjump/info/InfoCache.cc @@ -24,6 +24,7 @@ #include "gribjump/LibGribJump.h" #include "gribjump/info/InfoFactory.h" #include "gribjump/info/InfoExtractor.h" +#include "gribjump/GribJumpException.h" namespace gribjump { @@ -41,10 +42,11 @@ InfoCache::~InfoCache() { InfoCache::InfoCache(): cacheDir_(eckit::PathName()), - cache_(eckit::Resource("gribjumpCacheSize", LibGribJump::instance().config().getInt("cache.size", 64))) { + cache_(eckit::Resource("gribjumpCacheSize", LibGribJump::instance().config().getInt("cache.size", 64))), + lazy_(eckit::Resource("gribjumpLazyInfo", LibGribJump::instance().config().getBool("cache.lazy", true))) { const Config& config = LibGribJump::instance().config(); - + bool enabled = config.getBool("cache.enabled", true); if (!enabled) { persistentCache_ = false; @@ -120,6 +122,9 @@ std::shared_ptr InfoCache::get(const eckit::PathName& path, const ecki } // Extract explicitly + if (!lazy_) { + throw JumpInfoExtractionDisabled("No JumpInfo found for path " + path + " at offset " + std::to_string(offset)); + } InfoExtractor extractor; std::shared_ptr info = extractor.extract(path, offset); @@ -143,7 +148,11 @@ std::vector> InfoCache::get(const eckit::PathName& pat } if (!missingOffsets.empty()) { - + if (!lazy_) { + std::stringstream ss; + ss << "Missing JumpInfo for " << eckit::Plural(missingOffsets.size(), "offset") << " in " << path; + throw JumpInfoExtractionDisabled(ss.str()); + } std::sort(missingOffsets.begin(), missingOffsets.end()); InfoExtractor extractor; diff --git a/src/gribjump/info/InfoCache.h b/src/gribjump/info/InfoCache.h index 4c7eae3..b8de63e 100644 --- a/src/gribjump/info/InfoCache.h +++ b/src/gribjump/info/InfoCache.h @@ -80,6 +80,8 @@ class InfoCache { bool persistentCache_ = true; + bool lazy_; //< if true, cache.get may construct JumpInfo on the fly + bool shadowCache_ = false; //< if true, cache files are persisted next to the original data files (e.g. in FDB) // This takes precedence over cacheDir_. }; diff --git a/src/gribjump/remote/GribJumpUser.cc b/src/gribjump/remote/GribJumpUser.cc index 0d3ba02..308293e 100644 --- a/src/gribjump/remote/GribJumpUser.cc +++ b/src/gribjump/remote/GribJumpUser.cc @@ -61,7 +61,7 @@ void GribJumpUser::handle_client(eckit::Stream& s, eckit::Timer& timer) { s >> version; if (version != remoteProtocolVersion) { - throw eckit::SeriousBug("Gribjump remote-protocol mismatch: expected version " + std::to_string(protocolVersion_) + " but got " + std::to_string(version)); + throw eckit::SeriousBug("Gribjump remote-protocol mismatch: Serverside version: " + std::to_string(protocolVersion_) + ", Clientside version: " + std::to_string(version)); } LogContext ctx(s); diff --git a/src/gribjump/remote/RemoteGribJump.cc b/src/gribjump/remote/RemoteGribJump.cc index 1872e68..15c036b 100644 --- a/src/gribjump/remote/RemoteGribJump.cc +++ b/src/gribjump/remote/RemoteGribJump.cc @@ -41,7 +41,7 @@ void RemoteGribJump::sendHeader(eckit::net::InstantTCPStream& stream, RequestTyp stream << static_cast(type); } -size_t RemoteGribJump::scan(const std::vector requests, bool byfiles) { +size_t RemoteGribJump::scan(const std::vector& requests, bool byfiles) { eckit::Timer timer("RemoteGribJump::scan()"); // connect to server @@ -78,7 +78,7 @@ size_t RemoteGribJump::scan(const std::vector request return count; } -std::vector>> RemoteGribJump::extract(std::vector requests) { +std::vector>> RemoteGribJump::extract(std::vector& requests) { eckit::Timer timer("RemoteGribJump::extract()"); std::vector>> result; @@ -120,7 +120,9 @@ std::vector> RemoteGribJump::extract(const eckit NOTIMP; } +// Forward extraction request to another server void RemoteGribJump::extract(filemap_t& filemap){ + eckit::Timer timer("RemoteGribJump::extract()"); ///@todo we could probably do the connection logic in the ctor @@ -143,9 +145,8 @@ void RemoteGribJump::extract(filemap_t& filemap){ size_t nItems = extractionItems.size(); stream << nItems; for (auto& item : extractionItems) { - // ExtractionRequest req(item->request(), item->intervals()); - metkit::mars::MarsRequest r(""); // no need to send mars request when we have uri - ExtractionRequest req(r, item->intervals(), item->gridHash()); + // We have URI, no need to send a request string + ExtractionRequest req("", item->intervals(), item->gridHash()); stream << req; stream << item->URI(); } diff --git a/src/gribjump/remote/RemoteGribJump.h b/src/gribjump/remote/RemoteGribJump.h index e32d066..eb6e54c 100644 --- a/src/gribjump/remote/RemoteGribJump.h +++ b/src/gribjump/remote/RemoteGribJump.h @@ -24,7 +24,7 @@ enum class RequestType : uint16_t { SCAN, FORWARD_EXTRACT }; -constexpr static uint16_t remoteProtocolVersion = 0; +constexpr static uint16_t remoteProtocolVersion = 1; class RemoteGribJump : public GribJumpBase { @@ -36,9 +36,9 @@ class RemoteGribJump : public GribJumpBase { size_t scan(const std::vector& path) override { NOTIMP; } - size_t scan(const std::vector requests, bool byfiles) override; + size_t scan(const std::vector& requests, bool byfiles) override; - std::vector>> extract(std::vector polyRequest) override; + std::vector>> extract(std::vector& polyRequest) override; std::vector> extract(const eckit::PathName& path, const std::vector& offsets, const std::vector>& ranges) override; void extract(filemap_t& filemap); diff --git a/src/gribjump/remote/Request.cc b/src/gribjump/remote/Request.cc index 57d3cab..93f6f0d 100644 --- a/src/gribjump/remote/Request.cc +++ b/src/gribjump/remote/Request.cc @@ -88,8 +88,6 @@ ExtractRequest::ExtractRequest(eckit::Stream& stream) : Request(stream) { requests_.push_back(req); } - flatten_ = false; // xxx hard coded for now - MetricsManager::instance().set("count_requests", nRequests); } @@ -98,7 +96,7 @@ ExtractRequest::~ExtractRequest() { void ExtractRequest::execute() { - results_ = engine_.extract(requests_, flatten_); + results_ = engine_.extract(requests_); if (LibGribJump::instance().debug()) { for (auto& pair : results_) { @@ -120,7 +118,7 @@ void ExtractRequest::replyToClient() { for (size_t i = 0; i < nRequests; i++) { LOG_DEBUG_LIB(LibGribJump) << "Sending result " << i << " to client" << std::endl; - auto it = results_.find(requests_[i].request()); + auto it = results_.find(requests_[i].requestString()); ASSERT(it != results_.end()); std::vector>& items = it->second; // ExtractionItems items = it->second; diff --git a/src/gribjump/remote/Request.h b/src/gribjump/remote/Request.h index 7fe014a..463b972 100644 --- a/src/gribjump/remote/Request.h +++ b/src/gribjump/remote/Request.h @@ -87,7 +87,6 @@ class ExtractRequest : public Request { private: std::vector requests_; - bool flatten_; ResultsMap results_; diff --git a/src/gribjump/tools/ToolUtils.cc b/src/gribjump/tools/ToolUtils.cc index 3a1e008..d18d975 100644 --- a/src/gribjump/tools/ToolUtils.cc +++ b/src/gribjump/tools/ToolUtils.cc @@ -13,7 +13,8 @@ #include "eckit/utils/StringTools.h" #include "eckit/filesystem/PathName.h" - +#include "metkit/mars/MarsExpension.h" +#include "gribjump/LibGribJump.h" #include "gribjump/tools/ToolUtils.h" namespace gribjump { @@ -49,4 +50,35 @@ std::vector> parseRangesFile(eckit::PathName fname) { return allRanges; } +class CollectFlattenedRequests : public metkit::mars::FlattenCallback { +public: + CollectFlattenedRequests(std::vector& flattenedRequests) : flattenedRequests_(flattenedRequests) {} + + virtual void operator()(const metkit::mars::MarsRequest& req) { + flattenedRequests_.push_back(req); + } + + std::vector& flattenedRequests_; +}; + +std::vector flattenRequest(const metkit::mars::MarsRequest& request) { + + metkit::mars::MarsExpension expansion(false); + metkit::mars::DummyContext ctx; + std::vector flattenedRequests; + + CollectFlattenedRequests cb(flattenedRequests); + expansion.flatten(ctx, request, cb); + + LOG_DEBUG_LIB(LibGribJump) << "Base request: " << request << std::endl; + + if (LibGribJump::instance().debug()) { + for (const auto& req : flattenedRequests) { + LOG_DEBUG_LIB(LibGribJump) << " Flattened request: " << req << std::endl; + } + } + + return flattenedRequests; +} + } // namespace gribjump diff --git a/src/gribjump/tools/ToolUtils.h b/src/gribjump/tools/ToolUtils.h index c700ec9..c74f641 100644 --- a/src/gribjump/tools/ToolUtils.h +++ b/src/gribjump/tools/ToolUtils.h @@ -12,12 +12,14 @@ #pragma once #include "eckit/filesystem/PathName.h" +#include "metkit/mars/MarsRequest.h" #include "gribjump/ExtractionData.h" namespace gribjump { std::vector> parseRangesFile(eckit::PathName fname); +std::vector flattenRequest(const metkit::mars::MarsRequest& request); } // namespace gribjump diff --git a/src/tools/gribjump-extract.cc b/src/tools/gribjump-extract.cc index 752ca3c..91e3762 100644 --- a/src/tools/gribjump-extract.cc +++ b/src/tools/gribjump-extract.cc @@ -49,7 +49,7 @@ void GribJumpExtract::usage(const std::string &tool) const { void GribJumpExtract::execute(const eckit::option::CmdArgs &args) { // Testing tool for extract / directJump functionality - + using MarsRequests = metkit::mars::MarsRequest; const bool raw = args.getBool("raw", false); const bool printout = args.getBool("print", true); @@ -84,8 +84,17 @@ void GribJumpExtract::execute(const eckit::option::CmdArgs &args) { std::vector polyRequest; for (size_t i = 0; i < requests.size(); i++) { - ExtractionRequest exrequest(requests[i], allRanges[i]); - polyRequest.push_back(exrequest); + // Flatten and remove verb + std::vector flattenedRequests = flattenRequest(requests[i]); + for (auto& req : flattenedRequests) { + std::string s = req.asString(); + // remove "retrieve," from the beginning, if it exists + if (s.find("retrieve,") == 0) { + s = s.substr(9); + } + ExtractionRequest exrequest(s, allRanges[i]); + polyRequest.push_back(exrequest); + } } // Grid hash diff --git a/src/tools/gribjump-validate.cc b/src/tools/gribjump-validate.cc index 00397fe..90bea56 100644 --- a/src/tools/gribjump-validate.cc +++ b/src/tools/gribjump-validate.cc @@ -85,7 +85,7 @@ void CompareEccodes::execute(const eckit::option::CmdArgs &args) { std::vector polyRequest; for (size_t i = 0; i < requests.size(); i++) { - ExtractionRequest exrequest(requests[i], allRanges[i]); + ExtractionRequest exrequest(requests[i].asString(), allRanges[i]); polyRequest.push_back(exrequest); } diff --git a/tests/remote/test_remote.cc b/tests/remote/test_remote.cc index de868b8..84915cf 100644 --- a/tests/remote/test_remote.cc +++ b/tests/remote/test_remote.cc @@ -48,10 +48,16 @@ static eckit::PathName metricsFile = "test_metrics"; CASE( "Remote protocol: extract" ) { // --- Extract - std::vector requests = { - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc")[0].request(), - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc")[0].request(), + // std::vector requests = { + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc")[0].request(), + // fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc")[0].request(), + // }; + + std::vector requests = { + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc", }; + std::vector> allIntervals = { {std::make_pair(0, 5), std::make_pair(20, 30)}, diff --git a/tests/test_api.cc b/tests/test_api.cc index 8bd2346..d6f6bbf 100644 --- a/tests/test_api.cc +++ b/tests/test_api.cc @@ -37,7 +37,7 @@ namespace test { constexpr double MISSING = std::numeric_limits::quiet_NaN(); void compareValues(const std::vector>>>& expectedValues, const std::vector>>& output) { - EXPECT(expectedValues.size() == output.size()); + EXPECT_EQUAL(expectedValues.size(), output.size()); for (size_t i = 0; i < expectedValues.size(); i++) { // each mars request EXPECT_EQUAL(expectedValues[i].size(), output[i].size()); for (size_t j = 0; j < expectedValues[i].size(); j++) { // each field @@ -98,19 +98,11 @@ CASE( "test_gribjump_api_extract" ) { // Test 1: Extract 3 fields. Each field has a different set of ranges - std::vector requests; - { - std::istringstream s( - "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc\n" - "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc\n" - "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc\n" - ); - metkit::mars::MarsParser parser(s); - auto parsedRequests = parser.parse(); - metkit::mars::MarsExpension expand(/* inherit */ false); - requests = expand.expand(parsedRequests); - } - + std::vector requests = { + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc" + }; std::vector> allIntervals = { { @@ -141,14 +133,16 @@ CASE( "test_gribjump_api_extract" ) { // Eccodes expected values std::vector>>> expectedValues; for (auto req : polyRequest1) { - expectedValues.push_back(eccodesExtract(req.request(), req.ranges())); + metkit::mars::MarsRequest marsreq = fdb5::FDBToolRequest::requestsFromString(req.requestString())[0].request(); + expectedValues.push_back(eccodesExtract(marsreq, req.ranges())); } compareValues(expectedValues, output1); // -------------------------------------------------------------------------------------------- +#if 0 // NO LONGER SUPPORTED // Test 2: Extract same fields as Test 1, but in a single step=2/1/3 request. One set of ranges for all fields. - + std::vector marsrequests; { std::istringstream s( "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2/1/3,stream=oper,time=1200,type=fc\n" @@ -156,8 +150,12 @@ CASE( "test_gribjump_api_extract" ) { metkit::mars::MarsParser parser(s); auto parsedRequests = parser.parse(); metkit::mars::MarsExpension expand(/* inherit */ false); - requests = expand.expand(parsedRequests); + marsrequests = expand.expand(parsedRequests); } + + requests = { + "retrieve,class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2/1/3,stream=oper,time=1200,type=fc" + }; std::vector ranges = allIntervals[0]; PolyRequest polyRequest2; @@ -168,25 +166,29 @@ CASE( "test_gribjump_api_extract" ) { EXPECT(output2[0].size() == 3); expectedValues.clear(); - expectedValues.push_back(eccodesExtract(requests[0], ranges)); + expectedValues.push_back(eccodesExtract(marsrequests[0], ranges)); compareValues(expectedValues, output2); - +#endif // -------------------------------------------------------------------------------------------- + std::vector ranges = allIntervals[0]; - // Test 2.b: Extract but with an md5 hash - EXPECT_THROWS_AS(gj.extract({ExtractionRequest(requests[0], ranges)}), eckit::SeriousBug); // missing hash - EXPECT_THROWS_AS(gj.extract({ExtractionRequest(requests[0], ranges, "wronghash")}), eckit::SeriousBug); // incorrect hash + // Test 1.b: Extract but with an md5 hash + std::vector vec = {ExtractionRequest(requests[0], ranges)}; + EXPECT_THROWS_AS(gj.extract(vec), eckit::SeriousBug); // missing hash + vec = {ExtractionRequest(requests[0], ranges, "wronghash")}; + EXPECT_THROWS_AS(gj.extract(vec), eckit::SeriousBug); // incorrect hash // correct hash - std::vector>> output2c = gj.extract({ExtractionRequest(requests[0], ranges, gridHash)}); + vec = {ExtractionRequest(requests[0], ranges, gridHash)}; + std::vector>> output2c = gj.extract(vec); EXPECT_EQUAL(output2c[0][0]->total_values(), 15); - // -------------------------------------------------------------------------------------------- + // // -------------------------------------------------------------------------------------------- // Test 3: Extract function using path and offsets, which skips engine and related tasks/checks. std::vector uris; - fdb5::FDBToolRequest fdbreq(requests[0]); + fdb5::FDBToolRequest fdbreq = fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2/1/3,stream=oper,time=1200,type=fc")[0]; auto listIter = fdb.list(fdbreq, false); fdb5::ListElement elem; while (listIter.next(elem)) { @@ -223,6 +225,9 @@ CASE( "test_gribjump_api_extract" ) { } // Expect output to be the same as output2[0] + expectedValues.clear(); + expectedValues.push_back(eccodesExtract(fdbreq.request(), ranges)); + std::vector>> output3v; output3v.push_back(std::move(output3)); // i.e. == {output3} compareValues(expectedValues, output3v); diff --git a/tests/test_engine.cc b/tests/test_engine.cc index 18bbc32..bdcf7a1 100644 --- a/tests/test_engine.cc +++ b/tests/test_engine.cc @@ -96,17 +96,16 @@ CASE ("Engine: pre-test setup") { CASE ("Engine: Basic extraction") { - - // --- Setup + // // --- Setup eckit::testing::SetEnv fdbconfig("FDB5_CONFIG", fdbConfig(tmpdir).c_str()); eckit::testing::SetEnv allowmissing("GRIBJUMP_ALLOW_MISSING", "0"); // We have deliberately missing data in the request. // --- Extract (test 1) - std::vector requests = { - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc")[0].request(), - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc")[0].request(), - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc")[0].request(), - fdb5::FDBToolRequest::requestsFromString("class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1000,stream=oper,time=1200,type=fc")[0].request() // Deliberately missing data + std::vector requests = { + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=2,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=3,stream=oper,time=1200,type=fc", + "class=rd,date=20230508,domain=g,expver=xxxx,levtype=sfc,param=151130,step=1000,stream=oper,time=1200,type=fc" // Deliberately missing data }; std::vector> allIntervals = { @@ -122,12 +121,12 @@ CASE ("Engine: Basic extraction") { exRequests.push_back(ExtractionRequest(requests[i], allIntervals[i], gridHash)); } // We expect a throw due to missing data - EXPECT_THROWS_AS(engine.extract(exRequests, false), DataNotFoundException); + EXPECT_THROWS_AS(engine.extract(exRequests), DataNotFoundException); // drop the final request exRequests.pop_back(); - ResultsMap results = engine.extract(exRequests, false); + ResultsMap results = engine.extract(exRequests); EXPECT_NO_THROW(engine.raiseErrors()); // print contents of map @@ -141,9 +140,9 @@ CASE ("Engine: Basic extraction") { // Check correct values size_t count = 0; for (size_t i = 0; i < 3; i++) { - metkit::mars::MarsRequest req = requests[i]; + metkit::mars::MarsRequest req = fdb5::FDBToolRequest::requestsFromString(requests[i])[0].request(); std::vector intervals = allIntervals[i]; - auto& exs = results[req]; + auto& exs = results[requests[i]]; auto comparisonValues = eccodesExtract(req, intervals); for (size_t j = 0; j < exs.size(); j++) { for (size_t k = 0; k < comparisonValues[j].size(); k++) { @@ -163,6 +162,7 @@ CASE ("Engine: Basic extraction") { // only count the 3 intervals with data EXPECT(count == 45); +#if 0 // --- Extract (test 2) // Same request, all in one (test flattening) /// @todo, currently, the user cannot know order of the results after flattening, making this feature not very useful. @@ -214,6 +214,7 @@ CASE ("Engine: Basic extraction") { } } EXPECT(count == 45); +#endif /// @todo: request touching multiple files? /// @todo: request involving unsupported packingType? diff --git a/tests/test_gribinfo.cc b/tests/test_gribinfo.cc index ff997d8..f0479f2 100644 --- a/tests/test_gribinfo.cc +++ b/tests/test_gribinfo.cc @@ -233,9 +233,8 @@ CASE ("test_wrong_jumper") { // Testing the extract functionality using ExtractionItem // ~ i.e. internals of FileExtractionTask CASE ("test_ExtractionItem_extract") { - metkit::mars::MarsRequest request("none"); auto intervals = std::vector{{0, 10}, {3000000, 3000010}, {6599670, 6599680}}; - ExtractionItem exItem(request, intervals ); + ExtractionItem exItem("", intervals ); eckit::PathName path = "2t_O1280.grib";