Skip to content

Commit df4bd05

Browse files
committed
WIP: Test implementing memory limitation in ConcurrentCache.
1 parent 9b9e229 commit df4bd05

File tree

5 files changed

+169
-222
lines changed

5 files changed

+169
-222
lines changed

src/concurrent_cache.h

+58-43
Original file line numberDiff line numberDiff line change
@@ -31,30 +31,6 @@
3131
namespace zim
3232
{
3333

34-
template<typename CostEstimation>
35-
struct FutureToValueCostEstimation {
36-
template<typename T>
37-
static size_t cost(const std::shared_future<T>& future) {
38-
// The future is the value in the cache.
39-
// When calling getOrPut, if the key is not in the cache,
40-
// we add a future and then we compute the value and set the future.
41-
// But lrucache call us when we add the future, meaning before we have
42-
// computed the value. If we wait here (or use future.get), we will dead lock
43-
// as we need to exit before setting the value.
44-
// So in this case, we return 0. `ConcurrentCache::getOrPut` will correctly increase
45-
// the current cache size when it have an actual value.
46-
// We still need to compute the size of the value if the future has a value as it
47-
// is also use to decrease the cache size when the value is drop.
48-
std::future_status status = future.wait_for(std::chrono::nanoseconds::zero());
49-
if (status == std::future_status::ready) {
50-
return CostEstimation::cost(future.get());
51-
} else {
52-
return 0;
53-
}
54-
}
55-
56-
};
57-
5834
/**
5935
ConcurrentCache implements a concurrent thread-safe cache
6036
@@ -65,15 +41,15 @@ struct FutureToValueCostEstimation {
6541
available.
6642
*/
6743
template <typename Key, typename Value, typename CostEstimation>
68-
class ConcurrentCache: private lru_cache<Key, std::shared_future<Value>, FutureToValueCostEstimation<CostEstimation>>
44+
class ConcurrentMemLimitedCache
6945
{
7046
private: // types
71-
typedef std::shared_future<Value> ValuePlaceholder;
72-
typedef lru_cache<Key, ValuePlaceholder, FutureToValueCostEstimation<CostEstimation>> Impl;
47+
typedef std::shared_future<std::pair<size_t, Value>> ValuePlaceholder;
48+
typedef lru_cache<Key, ValuePlaceholder> Impl;
7349

7450
public: // types
75-
explicit ConcurrentCache(size_t maxCost)
76-
: Impl(maxCost)
51+
explicit ConcurrentMemLimitedCache(size_t maxCost)
52+
: maxCost(maxCost), currentCost(0), impl_((size_t)-1)
7753
{}
7854

7955
// Gets the entry corresponding to the given key. If the entry is not in the
@@ -88,63 +64,102 @@ class ConcurrentCache: private lru_cache<Key, std::shared_future<Value>, FutureT
8864
template<class F>
8965
Value getOrPut(const Key& key, F f)
9066
{
91-
std::promise<Value> valuePromise;
67+
std::promise<std::pair<size_t ,Value>> valuePromise;
9268
std::unique_lock<std::mutex> l(lock_);
93-
const auto x = Impl::getOrPut(key, valuePromise.get_future().share());
69+
const auto x = impl_.getOrPut(key, valuePromise.get_future().share());
9470
l.unlock();
9571
if ( x.miss() ) {
9672
try {
97-
valuePromise.set_value(f());
98-
auto cost = CostEstimation::cost(x.value().get());
73+
auto item = f();
74+
auto cost = CostEstimation::cost(item);
75+
valuePromise.set_value(std::make_pair(cost, item));
9976
// There is a small window when the valuePromise may be drop from lru cache after
10077
// we set the value but before we increase the size of the cache.
10178
// In this case decrease the size of `cost` before increasing it.
10279
// First of all it should be pretty rare as we have just put the future in the cache so it
10380
// should not be the least used item.
10481
// If it happens, this should not be a problem if current_size is bigger than `cost` (most of the time)
105-
// For the really rare specific case of current cach size being lower than `cost` (if possible),
82+
// For the really rare specific case of curreLasnt cach size being lower than `cost` (if possible),
10683
// `decreaseCost` will clamp the new size to 0.
10784
{
10885
std::unique_lock<std::mutex> l(lock_);
109-
Impl::increaseCost(cost);
86+
increaseCost(cost);
11087
}
11188
} catch (std::exception& e) {
11289
drop(key);
11390
throw;
11491
}
11592
}
11693

117-
return x.value().get();
94+
return std::get<1>(x.value().get());
11895
}
11996

120-
bool drop(const Key& key)
97+
void drop(const Key& key)
12198
{
12299
std::unique_lock<std::mutex> l(lock_);
123-
return Impl::drop(key);
100+
auto dropped = impl_.drop(key);
101+
currentCost -= get_future_cost(dropped);
124102
}
125103

126104
template<class F>
127105
void dropAll(F f) {
128106
std::unique_lock<std::mutex> l(lock_);
129-
Impl::dropAll(f);
107+
for(auto dropped:impl_.dropAll(f)) {
108+
currentCost -= get_future_cost(dropped);
109+
}
130110
}
131111

132112
size_t getMaxCost() const {
133113
std::unique_lock<std::mutex> l(lock_);
134-
return Impl::getMaxCost();
114+
return maxCost;
135115
}
136116

137117
size_t getCurrentCost() const {
138118
std::unique_lock<std::mutex> l(lock_);
139-
return Impl::cost();
119+
return currentCost;
140120
}
141121

142-
void setMaxCost(size_t newSize) {
122+
void setMaxCost(size_t newMaxCost) {
143123
std::unique_lock<std::mutex> l(lock_);
144-
return Impl::setMaxCost(newSize);
124+
maxCost = newMaxCost;
125+
while (currentCost>maxCost) {
126+
auto dropped = impl_.dropLast();
127+
currentCost -= get_future_cost(dropped);
128+
}
129+
}
130+
131+
private:
132+
static size_t get_future_cost(ValuePlaceholder& future) {
133+
// The future is the value in the cache.
134+
// When calling getOrPut, if the key is not in the cache,
135+
// we add a future and then we compute the value and set the future.
136+
// But lrucache call us when we add the future, meaning before we have
137+
// computed the value. If we wait here (or use future.get), we will dead lock
138+
// as we need to exit before setting the value.
139+
// So in this case, we return 0. `ConcurrentCache::getOrPut` will correctly increase
140+
// the current cache size when it have an actual value.
141+
// We still need to compute the size of the value if the future has a value as it
142+
// is also use to decrease the cache size when the value is drop.
143+
std::future_status status = future.wait_for(std::chrono::nanoseconds::zero());
144+
if (status == std::future_status::ready) {
145+
return std::get<0>(future.get());
146+
} else {
147+
return 0;
148+
}
149+
}
150+
151+
void increaseCost(size_t newCost) {
152+
currentCost += newCost;
153+
while (currentCost>maxCost && impl_.size() >1 ) {
154+
auto dropped = impl_.dropLast();
155+
currentCost -= std::get<0>(dropped.get());
156+
}
145157
}
146158

147159
private: // data
160+
size_t maxCost;
161+
size_t currentCost;
162+
mutable Impl impl_;
148163
mutable std::mutex lock_;
149164
};
150165

src/dirent_accessor.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
5555
std::shared_ptr<const Dirent> getDirent(entry_index_t idx) const;
5656
entry_index_t getDirentCount() const { return m_direntCount; }
5757

58-
size_t getMaxCacheSize() const { return m_direntCache.getMaxCost(); }
59-
size_t getCurrentCacheSize() const { return m_direntCache.cost(); }
60-
void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxCost(nbDirents); }
58+
size_t getMaxCacheSize() const { return m_direntCache.getMaxSize(); }
59+
size_t getCurrentCacheSize() const { return m_direntCache.size(); }
60+
void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxSize(nbDirents); }
6161

6262
private: // functions
6363
std::shared_ptr<const Dirent> readDirent(offset_t) const;
@@ -67,7 +67,7 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
6767
std::unique_ptr<const Reader> mp_pathPtrReader;
6868
entry_index_t m_direntCount;
6969

70-
mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>, UnitCostEstimation> m_direntCache;
70+
mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>> m_direntCache;
7171
mutable std::mutex m_direntCacheLock;
7272

7373
mutable std::vector<char> m_bufferDirentZone;

src/fileimpl.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ namespace zim
4545
{
4646
class FileImpl;
4747
typedef std::shared_ptr<const Cluster> ClusterHandle;
48-
typedef ConcurrentCache<std::tuple<FileImpl*, cluster_index_type>, ClusterHandle, ClusterMemorySize> ClusterCache;
48+
typedef ConcurrentMemLimitedCache<std::tuple<FileImpl*, cluster_index_type>, ClusterHandle, ClusterMemorySize> ClusterCache;
4949
ClusterCache& getClusterCache();
5050

5151
class FileImpl

0 commit comments

Comments
 (0)