openzim
diff --git a/‎src/concurrent_cache.h
+58-43 b/‎src/concurrent_cache.h
+58-43
diff --git a/‎src/dirent_accessor.h
+4-4 b/‎src/dirent_accessor.h
+4-4
diff --git a/‎src/fileimpl.h
+1-1 b/‎src/fileimpl.h
+1-1
@@ -31,30 +31,6 @@
 namespace zim
 {
 
-template<typename CostEstimation>
-struct FutureToValueCostEstimation {
-  template<typename T>
-  static size_t cost(const std::shared_future<T>& future) {
-    // The future is the value in the cache.
-    // When calling getOrPut, if the key is not in the cache,
-    // we add a future and then we compute the value and set the future.
-    // But lrucache call us when we add the future, meaning before we have
-    // computed the value. If we wait here (or use future.get), we will dead lock
-    // as we need to exit before setting the value.
-    // So in this case, we return 0. `ConcurrentCache::getOrPut` will correctly increase
-    // the current cache size when it have an actual value.
-    // We still need to compute the size of the value if the future has a value as it
-    // is also use to decrease the cache size when the value is drop.
-    std::future_status status = future.wait_for(std::chrono::nanoseconds::zero());
-    if (status == std::future_status::ready) {
-      return CostEstimation::cost(future.get());
-    } else {
-      return 0;
-    }
-  }
-
-};
-
 /**
    ConcurrentCache implements a concurrent thread-safe cache
 
@@ -65,15 +41,15 @@ struct FutureToValueCostEstimation {
    available.
  */
 template <typename Key, typename Value, typename CostEstimation>
-class ConcurrentCache: private lru_cache<Key, std::shared_future<Value>, FutureToValueCostEstimation<CostEstimation>>
+class ConcurrentMemLimitedCache
 {
 private: // types
-  typedef std::shared_future<Value> ValuePlaceholder;
-  typedef lru_cache<Key, ValuePlaceholder, FutureToValueCostEstimation<CostEstimation>> Impl;
+  typedef std::shared_future<std::pair<size_t, Value>> ValuePlaceholder;
+  typedef lru_cache<Key, ValuePlaceholder> Impl;
 
 public: // types
-  explicit ConcurrentCache(size_t maxCost)
-    : Impl(maxCost)
+  explicit ConcurrentMemLimitedCache(size_t maxCost)
+    : maxCost(maxCost), currentCost(0), impl_((size_t)-1)
   {}
 
   // Gets the entry corresponding to the given key. If the entry is not in the
@@ -88,63 +64,102 @@ class ConcurrentCache: private lru_cache<Key, std::shared_future<Value>, FutureT
   template<class F>
   Value getOrPut(const Key& key, F f)
   {
-    std::promise<Value> valuePromise;
+    std::promise<std::pair<size_t ,Value>> valuePromise;
     std::unique_lock<std::mutex> l(lock_);
-    const auto x = Impl::getOrPut(key, valuePromise.get_future().share());
+    const auto x = impl_.getOrPut(key, valuePromise.get_future().share());
     l.unlock();
     if ( x.miss() ) {
       try {
-        valuePromise.set_value(f());
-        auto cost = CostEstimation::cost(x.value().get());
+        auto item = f();
+        auto cost = CostEstimation::cost(item);
+        valuePromise.set_value(std::make_pair(cost, item));
         // There is a small window when the valuePromise may be drop from lru cache after
         // we set the value but before we increase the size of the cache.
         // In this case decrease the size of `cost` before increasing it.
         // First of all it should be pretty rare as we have just put the future in the cache so it
         // should not be the least used item.
         // If it happens, this should not be a problem if current_size is bigger than `cost` (most of the time)
-        // For the really rare specific case of current cach size being lower than `cost` (if possible),
+        // For the really rare specific case of curreLasnt cach size being lower than `cost` (if possible),
         // `decreaseCost` will clamp the new size to 0.
         {
           std::unique_lock<std::mutex> l(lock_);
-          Impl::increaseCost(cost);
+          increaseCost(cost);
         }
       } catch (std::exception& e) {
         drop(key);
         throw;
       }
     }
 
-    return x.value().get();
+    return std::get<1>(x.value().get());
   }
 
-  bool drop(const Key& key)
+  void drop(const Key& key)
   {
     std::unique_lock<std::mutex> l(lock_);
-    return Impl::drop(key);
+    auto dropped = impl_.drop(key);
+    currentCost -= get_future_cost(dropped);
   }
 
   template<class F>
   void dropAll(F f) {
     std::unique_lock<std::mutex> l(lock_);
-    Impl::dropAll(f);
+    for(auto dropped:impl_.dropAll(f)) {
+      currentCost -= get_future_cost(dropped);
+    }
   }
 
   size_t getMaxCost() const {
     std::unique_lock<std::mutex> l(lock_);
-    return Impl::getMaxCost();
+    return maxCost;
   }
 
   size_t getCurrentCost() const {
     std::unique_lock<std::mutex> l(lock_);
-    return Impl::cost();
+    return currentCost;
   }
 
-  void setMaxCost(size_t newSize) {
+  void setMaxCost(size_t newMaxCost) {
     std::unique_lock<std::mutex> l(lock_);
-    return Impl::setMaxCost(newSize);
+    maxCost = newMaxCost;
+    while (currentCost>maxCost) {
+      auto dropped = impl_.dropLast();
+      currentCost -= get_future_cost(dropped);
+    }
+  }
+
+private:
+  static size_t get_future_cost(ValuePlaceholder& future) {
+    // The future is the value in the cache.
+    // When calling getOrPut, if the key is not in the cache,
+    // we add a future and then we compute the value and set the future.
+    // But lrucache call us when we add the future, meaning before we have
+    // computed the value. If we wait here (or use future.get), we will dead lock
+    // as we need to exit before setting the value.
+    // So in this case, we return 0. `ConcurrentCache::getOrPut` will correctly increase
+    // the current cache size when it have an actual value.
+    // We still need to compute the size of the value if the future has a value as it
+    // is also use to decrease the cache size when the value is drop.
+    std::future_status status = future.wait_for(std::chrono::nanoseconds::zero());
+    if (status == std::future_status::ready) {
+      return std::get<0>(future.get());
+    } else {
+      return 0;
+    }
+  }
+
+  void increaseCost(size_t newCost) {
+    currentCost += newCost;
+    while (currentCost>maxCost && impl_.size() >1 ) {
+      auto dropped = impl_.dropLast();
+      currentCost -= std::get<0>(dropped.get());
+    }
   }
 
 private: // data
+  size_t maxCost;
+  size_t currentCost;
+  mutable Impl impl_;
   mutable std::mutex lock_;
 };
 
 
@@ -55,9 +55,9 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
   std::shared_ptr<const Dirent> getDirent(entry_index_t idx) const;
   entry_index_t getDirentCount() const  {  return m_direntCount; }
 
-  size_t getMaxCacheSize() const { return m_direntCache.getMaxCost(); }
-  size_t getCurrentCacheSize() const { return m_direntCache.cost(); }
-  void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxCost(nbDirents); }
+  size_t getMaxCacheSize() const { return m_direntCache.getMaxSize(); }
+  size_t getCurrentCacheSize() const { return m_direntCache.size(); }
+  void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxSize(nbDirents); }
 
 private: // functions
   std::shared_ptr<const Dirent> readDirent(offset_t) const;
@@ -67,7 +67,7 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
   std::unique_ptr<const Reader>  mp_pathPtrReader;
   entry_index_t                  m_direntCount;
 
-  mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>, UnitCostEstimation> m_direntCache;
+  mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>> m_direntCache;
   mutable std::mutex m_direntCacheLock;
 
   mutable std::vector<char>  m_bufferDirentZone;
 
@@ -45,7 +45,7 @@ namespace zim
 {
   class FileImpl;
   typedef std::shared_ptr<const Cluster> ClusterHandle;
-  typedef ConcurrentCache<std::tuple<FileImpl*, cluster_index_type>, ClusterHandle, ClusterMemorySize> ClusterCache;
+  typedef ConcurrentMemLimitedCache<std::tuple<FileImpl*, cluster_index_type>, ClusterHandle, ClusterMemorySize> ClusterCache;
   ClusterCache& getClusterCache();
 
   class FileImpl
Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ namespace zim`
`45`	`45`	`{`
`46`	`46`	`class FileImpl;`
`47`	`47`	`typedef std::shared_ptr<const Cluster> ClusterHandle;`
`48`		`- typedef ConcurrentCache<std::tuple<FileImpl*, cluster_index_type>, ClusterHandle, ClusterMemorySize> ClusterCache;`
	`48`	`+ typedef ConcurrentMemLimitedCache<std::tuple<FileImpl*, cluster_index_type>, ClusterHandle, ClusterMemorySize> ClusterCache;`
`49`	`49`	`ClusterCache& getClusterCache();`
`50`	`50`
`51`	`51`	`class FileImpl`