-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathechttp_sorted.c
424 lines (399 loc) · 15.4 KB
/
echttp_sorted.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
/* echttp - Embedded HTTP server.
*
* Copyright 2019, Pascal Martin
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
* ------------------------------------------------------------------------
*
* DESCRIPTION
*
* This is a small toolkit to create "live" sorted lists
*
* This module provides a way to maintain a sorted list while adding
* and removing elements. Iterator functions are used to walk the list
* in ascending or descending order.
*
* The sort key is a 64 bit unsigned integer. This module is specifically
* tuned for sorting by timestamps.
*
* This module only contains an opaque reference to the actual data, and
* never access this data: it only uses the sorting keys. It is the
* application's responsibility to handle data storage.
*
* The sort key does not need to be unique: this module maintains the insert
* order when the keys are identical (because of the tuning for chronological
* order). However, a reference must remain unique in the list: the same
* reference should not be added twice.
*
* SYNOPSYS
*
* echttp_sorted_list echttp_sorted_new (void);
*
* Create a new empty sorted list.
*
* void echttp_sorted_add (echttp_sorted_list b,
* unsigned long long key, void *data);
*
* Add a new item to the list, keeping the list sorted.
*
* void echttp_sorted_remove (echttp_sorted_list b,
* unsigned long long key, void *data);
*
* Remove the specified item from the list, keeping the list sorted.
* Return 1 when the end of the list was reached, 0 when the walk was
* stopped before the end.
*
* int echttp_sorted_descending (echttp_sorted_list b,
* echttp_sorted_action *action);
*
* An iterator that walks the sorted list in descending order.
* The walk stops at the end of the list, or else when action returns 0.
* Return 1 when the end of the list was reached, 0 when the walk was
* stopped before the end.
*
* int echttp_sorted_descending_from (echttp_sorted_list b,
* unsigned long long key,
* echttp_sorted_action *action);
*
* An iterator that walks the sorted list in descending order, starting
* with the provided key value. This means that action will only be called
* for items associated with keys less or equal than the provided value.
* The goal is to save the overhead that comes with walking the whole list
* everytime there is a request for older items.
* The walk stops at the end of the list, or else when action returns 0.
* Return 1 when the end of the list was reached, 0 when the walk was
* stopped before the end.
*
* int echttp_sorted_ascending (echttp_sorted_list b,
* echttp_sorted_action *action);
*
* An iterator that walks the sorted list in ascending order.
* The walk stops at the end of the list, or else when action returns 0.
* Return 1 when the end of the list was reached, 0 when the walk was
* stopped before the end.
*
* int echttp_sorted_ascending_from (echttp_sorted_list b,
* unsigned long long key,
* echttp_sorted_action *action);
*
* An iterator that walks the sorted list in ascending order, starting
* with the provided key value. This means that action will only be called
* for items associated with keys greater or equal than the provided value.
* The goal is to save the overhead that comes with walking the whole list
* everytime there is a request for newer items.
* The walk stops at the end of the list, or else when action returns 0.
* Return 1 when the end of the list was reached, 0 when the walk was
* stopped before the end.
*
* void echttp_sorted_audit (echttp_sorted_list b, int *buckets, int *items);
*
* Calculate the number of buckets and items currently allocated.
* This function is intended for unit testing purpose.
*
* IMPLEMENTATION
*
* A list is represented as a tree of buckets, 8 levels deep. Each level
* represents one byte from the key: the first (top) level represents the
* most significant bytes while the last (bottom level represents the least
* significant bytes. Buckets at the last level points to collision lists
* instead of buckets: all the items that share the same key are listed
* in the order they were added.
*
* What is expected to happen when using live timestamps is the creation of
* a small number of low depth buckets, since all the timestamps share the
* same most significant bytes values, followed by a "fan" of mostly filled
* high depth buckets. Imagine a palm tree.. As time goes by, the older
* entries are removed and more recent new entries are created, keeping
* the overall palm tree shape.
*
* Lesson learned: the event indexed using this module are relatively spread,
* about 250 events within 12 hours, or about 20 events per hour. This leads
* to very sparse buckets. Thus two optimizations:
* - avoid walking all 256 slots in each bucket by remembering the lowest
* and highest slots used. Even if imperfect (ignoring removals), this
* should save a lot of empty slots checks since most slots were never
* used.
* - Skip the two lower levels (only use 6 levels), and sort the resulting
* collision lists (TBD).
*
* LIMITATIONS
*
* This implementation would not work well with randomized keys: it could
* cause the creation of a lot of sparce buckets, wasting memory. However
* this fits very well with timestamps, as "live" timestamps tend to be
* aggregated around the current day/month/year.
*
* There is no optimization intended to reduce the depth of the tree of
* buckets at that time. Such an optimization would be to skip depth levels
* that have the same value for all items. Such an optimization could make
* add/remove operations faster on average when using live timestamps. It
* was decided to not do it because the saving in term of buckets would be
* small (6 buckets or less) anyway, but this would significantly increase
* the implementation's complexity.
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include "echttp_sorted.h"
struct echttp_sorted_leaf {
struct echttp_sorted_leaf *prev;
struct echttp_sorted_leaf *next;
void *data;
};
struct echttp_sorted_bucket {
short depth;
short low;
short high;
union {
struct echttp_sorted_bucket *sub; // if depth < 7
struct echttp_sorted_leaf *leaf; // if depth == 7
} index[256];
};
static struct echttp_sorted_leaf *echttp_sorted_new_leaf (void *data) {
struct echttp_sorted_leaf *leaf = malloc (sizeof(struct echttp_sorted_leaf));
leaf->prev = 0;
leaf->next = 0;
leaf->data =data;
return leaf;
}
static struct echttp_sorted_bucket *echttp_sorted_new_bucket (int depth) {
struct echttp_sorted_bucket *bucket = malloc (sizeof(struct echttp_sorted_bucket));
bucket->depth = depth;
bucket->low = 255; // Inverted low and high means empty
bucket->high = 0;
int i;
for (i = 255; i >= 0; --i) bucket->index[i].sub = 0;
return bucket;
}
static void echttp_sorted_add_leaf (struct echttp_sorted_leaf *l, void *data) {
struct echttp_sorted_leaf *leaf = echttp_sorted_new_leaf (data);
leaf->prev = l->next;
if (l->next) {
l->next->next = leaf; // At the end of an existing list.
} else {
l->prev = leaf; // The list was empty: this is the 1st element.
}
l->next = leaf; // This is the new end of the list.
}
echttp_sorted_list echttp_sorted_new (void) {
return echttp_sorted_new_bucket (0);
}
void echttp_sorted_add (echttp_sorted_list b, unsigned long long key, void *data) {
if (b->depth >= 7) {
int hash = key & 0xff;
if (!b->index[hash].leaf) {
b->index[hash].leaf = echttp_sorted_new_leaf(0);
if (b->low > hash) b->low = hash;
if (b->high < hash) b->high = hash;
}
echttp_sorted_add_leaf (b->index[hash].leaf, data);
} else {
int hash = (key >> (8*(7-b->depth))) & 0xff;
if (!b->index[hash].sub) {
b->index[hash].sub = echttp_sorted_new_bucket (b->depth+1);
if (b->low > hash) b->low = hash;
if (b->high < hash) b->high = hash;
}
echttp_sorted_add (b->index[hash].sub, key, data);
}
}
static int echttp_sorted_bucket_empty (struct echttp_sorted_bucket *b) {
int i;
for (i = b->high; i >= b->low; --i) if (b->index[i].sub) return 0;
return 1;
}
void echttp_sorted_remove (echttp_sorted_list b, unsigned long long key, void *data) {
int i;
int hash;
struct echttp_sorted_bucket *stack[8];
struct echttp_sorted_bucket *bucket = b;
for (i = 7; i > 0; --i) {
stack[i] = bucket;
if (!bucket) return; // Nothing to remove if not present.
hash = (key >> (8*i)) & 0xff;
bucket = bucket->index[hash].sub;
}
if (!bucket) return;
// We have now reached the deepest bucket, which points to leaves.
hash = key & 0xff;
struct echttp_sorted_leaf *leaf = bucket->index[hash].leaf;
if (!leaf) return; // Nothing to remove from an empty list.
struct echttp_sorted_leaf *cursor;
for (cursor = leaf->prev; cursor; cursor = cursor->next) {
if (cursor->data == data) break;
}
if (!cursor) return; // Not found, no change.
// Remove from that collision list.
if (cursor->prev) {
cursor->prev->next = cursor->next;
} else {
leaf->prev = cursor->next;
}
if (cursor->next) {
cursor->next->prev = cursor->prev;
} else {
leaf->next = cursor->prev;
}
free (cursor);
if (leaf->prev || leaf->next) return; // List still not empty.
// The leave list is now empty: remove it.
free (leaf);
bucket->index[hash].leaf = 0;
// If the bucket is empty, delete it. Propagate through the stack, except
// for the top element (which is the list itself).
//
for (i = 1; i <= 7; ++i) {
if (!echttp_sorted_bucket_empty(bucket)) return;
free(bucket);
bucket = stack[i];
hash = (key >> (8*i)) & 0xff;
bucket->index[hash].sub = 0;
}
}
static int echttp_sorted_descend_leaf (const struct echttp_sorted_leaf *l,
echttp_sorted_action *action) {
struct echttp_sorted_leaf *cursor;
for (cursor = l->next; cursor; cursor = cursor->prev) {
if (!action(cursor->data)) return 0;
}
return 1;
}
int echttp_sorted_descending (const echttp_sorted_list b,
echttp_sorted_action *action) {
if (!b) return 1;
int i;
if (b->depth == 7) {
for (i = b->high; i >= b->low; --i) {
struct echttp_sorted_leaf *leaf = b->index[i].leaf;
if (leaf) {
if (!echttp_sorted_descend_leaf(leaf, action)) return 0;
}
}
} else {
for (i = b->high; i >= b->low; --i) {
if (!echttp_sorted_descending (b->index[i].sub, action)) return 0;
}
}
return 1;
}
int echttp_sorted_descending_from (const echttp_sorted_list b,
const unsigned long long key,
echttp_sorted_action *action) {
if (!b) return 1;
int i;
if (b->depth == 7) {
for (i = key & 0xff; i >= 0; --i) {
struct echttp_sorted_leaf *leaf = b->index[i].leaf;
if (leaf) {
if (!echttp_sorted_descend_leaf(leaf, action)) return 0;
}
}
} else {
// Walk only the relevant part of the first sub-bucket, but walk
// all subsequent buckets in their entirety because they match
// lower hash (i.e. lower key) values.
//
int hash = (key >> (8*(7-b->depth))) & 0xff;
if (!echttp_sorted_descending_from (b->index[hash].sub, key, action))
return 0;
for (i = hash-1; i >= b->low; --i) {
if (!echttp_sorted_descending (b->index[i].sub, action)) return 0;
}
}
return 1;
}
static int echttp_sorted_ascend_leaf (const struct echttp_sorted_leaf *l,
echttp_sorted_action *action) {
struct echttp_sorted_leaf *cursor;
for (cursor = l->prev; cursor; cursor = cursor->next) {
if (!action(cursor->data)) return 0;;
}
return 1;
}
int echttp_sorted_ascending (const echttp_sorted_list b,
echttp_sorted_action *action) {
if (!b) return 1;
int i;
if (b->depth == 7) {
for (i = b->low; i <= b->high; ++i) {
struct echttp_sorted_leaf *leaf = b->index[i].leaf;
if (leaf) {
if (!echttp_sorted_ascend_leaf(leaf, action)) return 0;
}
}
} else {
for (i = b->low; i <= b->high; ++i) {
if (!echttp_sorted_ascending (b->index[i].sub, action)) return 0;
}
}
return 1;
}
int echttp_sorted_ascending_from (const echttp_sorted_list b,
const unsigned long long key,
echttp_sorted_action *action) {
if (!b) return 1;
int i;
if (b->depth == 7) {
for (i = key & 0xff; i <= b->high; ++i) {
struct echttp_sorted_leaf *leaf = b->index[i].leaf;
if (leaf) {
if (!echttp_sorted_ascend_leaf(leaf, action)) return 0;
}
}
} else {
// Walk only the relevant part of the first sub-bucket, but walk
// all subsequent buckets in their entirety because they match
// greater hash (i.e. greater key) values.
//
int hash = (key >> (8*(7-b->depth))) & 0xff;
if (!echttp_sorted_ascending_from (b->index[hash].sub, key, action))
return 0;
for (i = hash+1; i <= b->high; ++i) {
if (!echttp_sorted_ascending (b->index[i].sub, action)) return 0;
}
}
return 1;
}
// This function is for unit testing and troubleshooting. We are more
// concerned about robustness than performances.
//
void echttp_sorted_audit (const echttp_sorted_list b, int *buckets, int *items) {
*buckets = 0;
*items = 0;
if (!b) return;
int i;
*buckets = 1; // This bucket, at least.
if (b->depth == 7) {
for (i = 255; i >= 0; --i) {
struct echttp_sorted_leaf *leaf = b->index[i].leaf;
if (!leaf) continue;
struct echttp_sorted_leaf *cursor;
for (cursor = leaf->next; cursor; cursor = cursor->prev) {
*items += 1;
}
}
} else {
for (i = 255; i >= 0; --i) {
int subbuckets;
int subitems;
echttp_sorted_audit (b->index[i].sub, &subbuckets, &subitems);
*buckets += subbuckets;
*items += subitems;
}
}
}