From 651da6cb6e072a3020015f3817675b2e02ea4911 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Mon, 9 Sep 2024 20:15:09 +0100 Subject: [PATCH 1/2] Add LRU cache --- src/gribjump/CMakeLists.txt | 1 + src/gribjump/info/InfoCache.cc | 71 ++++++++++++------------ src/gribjump/info/InfoCache.h | 9 ++-- src/gribjump/info/LRUCache.h | 98 ++++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 9 ++++ tests/test_misc_units.cc | 68 +++++++++++++++++++++++ 6 files changed, 214 insertions(+), 42 deletions(-) create mode 100644 src/gribjump/info/LRUCache.h create mode 100644 tests/test_misc_units.cc diff --git a/src/gribjump/CMakeLists.txt b/src/gribjump/CMakeLists.txt index 35d6049..b7b0391 100644 --- a/src/gribjump/CMakeLists.txt +++ b/src/gribjump/CMakeLists.txt @@ -78,6 +78,7 @@ if( HAVE_GRIBJUMP_LOCAL_EXTRACT ) info/InfoAggregator.cc info/InfoCache.cc info/InfoCache.h + info/LRUCache.h info/UnsupportedInfo.h info/UnsupportedInfo.cc diff --git a/src/gribjump/info/InfoCache.cc b/src/gribjump/info/InfoCache.cc index 51f1c41..84d2d5b 100644 --- a/src/gribjump/info/InfoCache.cc +++ b/src/gribjump/info/InfoCache.cc @@ -39,7 +39,9 @@ InfoCache& InfoCache::instance() { InfoCache::~InfoCache() { } -InfoCache::InfoCache(): cacheDir_(eckit::PathName()) { +InfoCache::InfoCache(): + cacheDir_(eckit::PathName()), + cache_(eckit::Resource("gribjumpCacheSize", LibGribJump::instance().config().getInt("cache.size", 128))) { const Config& config = LibGribJump::instance().config(); @@ -78,18 +80,21 @@ eckit::PathName InfoCache::cacheFilePath(const eckit::PathName& path) const { return cacheDir_ / path.baseName() + file_ext; } -FileCache& InfoCache::getFileCache(const eckit::PathName& path, bool load) { +std::shared_ptr InfoCache::getFileCache(const eckit::PathName& path, bool load) { std::lock_guard lock(mutex_); + const filename_t& f = path.baseName(); - auto it = cache_.find(f); - if(it != cache_.end()) return *(it->second); - + + if (cache_.exists(f)) { + return cache_.get(f); + } + eckit::PathName cachePath = cacheFilePath(path); LOG_DEBUG_LIB(LibGribJump) << "New InfoCache entry for file " << f << " at " << cachePath << std::endl; - std::unique_ptr filecache(new FileCache(cachePath, load)); - cache_.insert(std::make_pair(f, std::move(filecache))); - return *cache_[f]; + auto filecache = std::make_shared(cachePath, load); + cache_.put(f, filecache); + return filecache; } std::shared_ptr InfoCache::get(const eckit::URI& uri) { @@ -102,12 +107,12 @@ std::shared_ptr InfoCache::get(const eckit::URI& uri) { std::shared_ptr InfoCache::get(const eckit::PathName& path, const eckit::Offset offset) { - FileCache& filecache = getFileCache(path); - filecache.load(); + std::shared_ptr filecache = getFileCache(path); + filecache->load(); // return it if in memory cache { - std::shared_ptr info = filecache.find(offset); + std::shared_ptr info = filecache->find(offset); if (info) return info; LOG_DEBUG_LIB(LibGribJump) << "InfoCache file " << path << " does not contain JumpInfo for field at offset " << offset << std::endl; @@ -119,20 +124,20 @@ std::shared_ptr InfoCache::get(const eckit::PathName& path, const ecki InfoExtractor extractor; std::shared_ptr info = extractor.extract(path, offset); - filecache.insert(offset, info); + filecache->insert(offset, info); return info; } std::vector> InfoCache::get(const eckit::PathName& path, const eckit::OffsetList& offsets) { - FileCache& filecache = getFileCache(path); - filecache.load(); + std::shared_ptr filecache = getFileCache(path); + filecache->load(); std::vector missingOffsets; for (const auto& offset : offsets) { - if (!filecache.find(offset)) { + if (!filecache->find(offset)) { missingOffsets.push_back(offset); } } @@ -144,14 +149,14 @@ std::vector> InfoCache::get(const eckit::PathName& pat std::vector> infos = extractor.extract(path, missingOffsets); for (size_t i = 0; i < infos.size(); i++) { - filecache.insert(missingOffsets[i], std::move(infos[i])); + filecache->insert(missingOffsets[i], std::move(infos[i])); } } std::vector> result; for (const auto& offset : offsets) { - std::shared_ptr info = filecache.find(offset); + std::shared_ptr info = filecache->find(offset); ASSERT(info); result.push_back(info); } @@ -167,17 +172,10 @@ std::vector> InfoCache::get(const eckit::PathName& pat void InfoCache::insert(const eckit::PathName& path, const eckit::Offset offset, std::shared_ptr info) { LOG_DEBUG_LIB(LibGribJump) << "GribJumpCache inserting " << path << ":" << offset << std::endl; - FileCache& filecache = getFileCache(path, false); - filecache.insert(offset, info); + std::shared_ptr filecache = getFileCache(path, false); + filecache->insert(offset, info); } - -// void InfoCache::insert(const eckit::PathName& path, std::vector> infos) { -// LOG_DEBUG_LIB(LibGribJump) << "GribJumpCache inserting " << path << "" << infos.size() << " fields" << std::endl; -// FileCache& filecache = getFileCache(path, false); -// filecache.insert(infos); -// } - void InfoCache::flush(bool append) { std::lock_guard lock(mutex_); for (auto& [filename, filecache] : cache_) { @@ -199,14 +197,14 @@ void InfoCache::scan(const eckit::PathName& fdbpath, const std::vector filecache = getFileCache(fdbpath); + filecache->load(); // Find which offsets are not already in file cache std::vector newOffsets; for (const auto& offset : offsets) { - if(!filecache.find(offset)) { + if(!filecache->find(offset)) { newOffsets.push_back(offset); } } @@ -223,34 +221,33 @@ void InfoCache::scan(const eckit::PathName& fdbpath, const std::vector> infos; // infos.reserve(uinfos.size()); // std::move(uinfos.begin(), uinfos.end(), std::back_inserter(infos)); - // filecache.insert(infos); + // filecache->insert(infos); for (size_t i = 0; i < uinfos.size(); i++) { - filecache.insert(newOffsets[i], std::move(uinfos[i])); + filecache->insert(newOffsets[i], std::move(uinfos[i])); } if (persistentCache_) { - filecache.write(); + filecache->write(); } } void InfoCache::scan(const eckit::PathName& fdbpath) { - LOG_DEBUG_LIB(LibGribJump) << "Scanning whole file " << fdbpath << std::endl; // if cache exists load so we can merge with memory cache - FileCache& filecache = getFileCache(fdbpath); - filecache.load(); + std::shared_ptr filecache = getFileCache(fdbpath); + filecache->load(); InfoExtractor extractor; std::vector>> uinfos = extractor.extract(fdbpath); /* This needs to give use the offsets too*/ for (size_t i = 0; i < uinfos.size(); i++) { - filecache.insert(uinfos[i].first, std::move(uinfos[i].second)); + filecache->insert(uinfos[i].first, std::move(uinfos[i].second)); } if (persistentCache_) { - filecache.write(); + filecache->write(); } } diff --git a/src/gribjump/info/InfoCache.h b/src/gribjump/info/InfoCache.h index 8ee7e50..4c7eae3 100644 --- a/src/gribjump/info/InfoCache.h +++ b/src/gribjump/info/InfoCache.h @@ -19,8 +19,8 @@ #include "eckit/filesystem/URI.h" #include "eckit/serialisation/FileStream.h" - #include "gribjump/info/JumpInfo.h" +#include "gribjump/info/LRUCache.h" #include "gribjump/LibGribJump.h" namespace gribjump { @@ -30,8 +30,8 @@ class InfoCache { private: // types - using filename_t = std::string; //< key is fieldlocation's path basename - using cache_t = std::map>; //< map fieldlocation's to gribinfo + using filename_t = std::string; //< key is fieldlocation's path basename + using cache_t = LRUCache>; //< map fieldlocation's to gribinfo public: @@ -61,14 +61,13 @@ class InfoCache { void print(std::ostream& s) const; - FileCache& getFileCache(const eckit::PathName& f, bool load=true); - private: // methods InfoCache(); ~InfoCache(); + std::shared_ptr getFileCache(const eckit::PathName& f, bool load=true); eckit::PathName cacheFilePath(const eckit::PathName& path) const; diff --git a/src/gribjump/info/LRUCache.h b/src/gribjump/info/LRUCache.h new file mode 100644 index 0000000..8054949 --- /dev/null +++ b/src/gribjump/info/LRUCache.h @@ -0,0 +1,98 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation nor + * does it submit to any jurisdiction. + */ + +/// @author Christopher Bradley + +#pragma once + +#include +#include +#include "eckit/exception/Exceptions.h" + +namespace gribjump { + +// Note: not thread safe, use an external lock if needed +template +class LRUCache { +public: + LRUCache(size_t capacity) : capacity_(capacity) {} + + void put(const K& key, const V& value) { + if (map_.find(key) == map_.end()) { + if (list_.size() == capacity_) { + auto last = list_.back(); + list_.pop_back(); + map_.erase(last); + } + list_.push_front(key); + } else { + list_.remove(key); + list_.push_front(key); + } + map_[key] = value; + } + + // Remember that put may receive a unique_ptr + void put(const K& key, V&& value) { + if (map_.find(key) == map_.end()) { + if (list_.size() == capacity_) { + auto last = list_.back(); + list_.pop_back(); + map_.erase(last); + } + list_.push_front(key); + } else { + list_.remove(key); + list_.push_front(key); + } + map_[key] = std::move(value); + } + + V& get(const K& key) { + if (map_.find(key) == map_.end()) { + throw eckit::BadValue("Key does not exist"); + } + list_.remove(key); + list_.push_front(key); + return map_[key]; + } + + + + bool exists(const K& key) { + return map_.find(key) != map_.end(); + } + + void print(std::ostream& s) { + for (auto& key : list_) { + s << key << " -> " << map_[key] << std::endl; + } + } + + typename std::unordered_map::const_iterator begin() const { + return map_.begin(); + } + + typename std::unordered_map::const_iterator end() const { + return map_.end(); + } + + void clear() { + list_.clear(); + map_.clear(); + } + +private: + size_t capacity_; + std::list list_; + std::unordered_map map_; +}; + +} // namespace gribjump diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7fd8cfa..6c55815 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -120,6 +120,15 @@ ecbuild_add_test( LIBS gribjump ) +ecbuild_add_test( + TARGET "gribjump_test_misc_units" + SOURCES "test_misc_units.cc" + INCLUDES "${ECKIT_INCLUDE_DIRS}" + ENVIRONMENT "${gribjump_env}" + NO_AS_NEEDED + LIBS gribjump +) + if (ENABLE_FDB_BUILD_TOOLS) add_subdirectory(tools) endif() \ No newline at end of file diff --git a/tests/test_misc_units.cc b/tests/test_misc_units.cc new file mode 100644 index 0000000..35cf804 --- /dev/null +++ b/tests/test_misc_units.cc @@ -0,0 +1,68 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include + +#include "eckit/testing/Test.h" +#include "gribjump/info/LRUCache.h" + + +#include "metkit/mars/MarsParser.h" +#include "metkit/mars/MarsExpension.h" +using namespace eckit::testing; + +namespace gribjump { +namespace test { + +// Miscellanous unit tests +//----------------------------------------------------------------------------- +CASE( "test_lru" ){ + + LRUCache cache(3); + + // Check basic functionality + cache.put("a", 1); + cache.put("b", 2); + cache.put("c", 3); + + EXPECT(cache.get("a") == 1); + EXPECT(cache.get("b") == 2); + EXPECT(cache.get("c") == 3); + + cache.put("d", 4); + + EXPECT_THROWS_AS(cache.get("a"), eckit::BadValue); + EXPECT(cache.get("d") == 4); + + // Check recency is updated with get + cache.put("x", 1); + cache.put("y", 2); + cache.put("z", 3); + + EXPECT(cache.get("z") == 3); + EXPECT(cache.get("y") == 2); + EXPECT(cache.get("x") == 1); + + // z should now be the least recently used + cache.put("w", 1); + + EXPECT_THROWS_AS(cache.get("z"), eckit::BadValue); +} +//----------------------------------------------------------------------------- + +} // namespace test +} // namespace gribjump + + +int main(int argc, char **argv) +{ + return run_tests ( argc, argv ); +} From be75c5d6265b22dea803641031e34b5a1bc53dbe Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 10 Sep 2024 15:19:16 +0100 Subject: [PATCH 2/2] Tidy up --- src/gribjump/info/InfoCache.cc | 2 +- src/gribjump/info/LRUCache.h | 25 +------------------------ 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/src/gribjump/info/InfoCache.cc b/src/gribjump/info/InfoCache.cc index 84d2d5b..f433899 100644 --- a/src/gribjump/info/InfoCache.cc +++ b/src/gribjump/info/InfoCache.cc @@ -41,7 +41,7 @@ InfoCache::~InfoCache() { InfoCache::InfoCache(): cacheDir_(eckit::PathName()), - cache_(eckit::Resource("gribjumpCacheSize", LibGribJump::instance().config().getInt("cache.size", 128))) { + cache_(eckit::Resource("gribjumpCacheSize", LibGribJump::instance().config().getInt("cache.size", 64))) { const Config& config = LibGribJump::instance().config(); diff --git a/src/gribjump/info/LRUCache.h b/src/gribjump/info/LRUCache.h index 8054949..b4d22b4 100644 --- a/src/gribjump/info/LRUCache.h +++ b/src/gribjump/info/LRUCache.h @@ -18,7 +18,7 @@ namespace gribjump { -// Note: not thread safe, use an external lock if needed +// Note: not a thread safe container, use an external lock if needed template class LRUCache { public: @@ -39,21 +39,6 @@ class LRUCache { map_[key] = value; } - // Remember that put may receive a unique_ptr - void put(const K& key, V&& value) { - if (map_.find(key) == map_.end()) { - if (list_.size() == capacity_) { - auto last = list_.back(); - list_.pop_back(); - map_.erase(last); - } - list_.push_front(key); - } else { - list_.remove(key); - list_.push_front(key); - } - map_[key] = std::move(value); - } V& get(const K& key) { if (map_.find(key) == map_.end()) { @@ -64,18 +49,10 @@ class LRUCache { return map_[key]; } - - bool exists(const K& key) { return map_.find(key) != map_.end(); } - void print(std::ostream& s) { - for (auto& key : list_) { - s << key << " -> " << map_[key] << std::endl; - } - } - typename std::unordered_map::const_iterator begin() const { return map_.begin(); }