From 828917729d091c24ab8096b02ee28af2547373ee Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 11 Nov 2024 18:19:17 +0100 Subject: [PATCH 1/4] Refactor unpacking of recursive types Extracted from: https://github.com/msgpack/msgpack-ruby/pull/370 As initially implemented, we allocate a new stack whenever we encounter a child stack. This means acquiring a whole 4kiB chunk from the arena, which generally is overkill. Instead we can introduce a new type of stack item, that behave just like a Hash or Array. --- ext/msgpack/unpacker.c | 20 ++++++-------------- ext/msgpack/unpacker.h | 2 +- ext/msgpack/unpacker_class.c | 9 ++++----- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/ext/msgpack/unpacker.c b/ext/msgpack/unpacker.c index 32423e9c..443ba401 100644 --- a/ext/msgpack/unpacker.c +++ b/ext/msgpack/unpacker.c @@ -108,25 +108,19 @@ static inline void _msgpack_unpacker_free_stack(msgpack_unpacker_stack_t* stack) void _msgpack_unpacker_destroy(msgpack_unpacker_t* uk) { - msgpack_unpacker_stack_t *stack; - while ((stack = uk->stack)) { - uk->stack = stack->parent; - _msgpack_unpacker_free_stack(stack); - } - + _msgpack_unpacker_free_stack(uk->stack); msgpack_buffer_destroy(UNPACKER_BUFFER_(uk)); } void msgpack_unpacker_mark_stack(msgpack_unpacker_stack_t* stack) { - while (stack) { + if (stack) { msgpack_unpacker_stack_entry_t* s = stack->data; msgpack_unpacker_stack_entry_t* send = stack->data + stack->depth; for(; s < send; s++) { rb_gc_mark(s->object); rb_gc_mark(s->key); } - stack = stack->parent; } } @@ -343,14 +337,10 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type) reset_head_byte(uk); uk->reading_raw_remaining = 0; - msgpack_unpacker_stack_t* child_stack = _msgpack_unpacker_new_stack(); - child_stack->parent = uk->stack; - uk->stack = child_stack; - + _msgpack_unpacker_stack_push(uk, STACK_TYPE_RECURSIVE, 1, Qnil); int raised; obj = protected_proc_call(proc, 1, &uk->self, &raised); - uk->stack = child_stack->parent; - _msgpack_unpacker_free_stack(child_stack); + msgpack_unpacker_stack_pop(uk); if (raised) { uk->last_object = rb_errinfo(); @@ -783,6 +773,8 @@ int msgpack_unpacker_read(msgpack_unpacker_t* uk, size_t target_stack_depth) } top->type = STACK_TYPE_MAP_KEY; break; + case STACK_TYPE_RECURSIVE: + return PRIMITIVE_OBJECT_COMPLETE; } size_t count = --top->count; diff --git a/ext/msgpack/unpacker.h b/ext/msgpack/unpacker.h index 6925b108..a6889ab0 100644 --- a/ext/msgpack/unpacker.h +++ b/ext/msgpack/unpacker.h @@ -31,6 +31,7 @@ enum stack_type_t { STACK_TYPE_ARRAY, STACK_TYPE_MAP_KEY, STACK_TYPE_MAP_VALUE, + STACK_TYPE_RECURSIVE, }; typedef struct { @@ -44,7 +45,6 @@ struct msgpack_unpacker_stack_t { size_t depth; size_t capacity; msgpack_unpacker_stack_entry_t *data; - msgpack_unpacker_stack_t *parent; }; struct msgpack_unpacker_t { diff --git a/ext/msgpack/unpacker_class.c b/ext/msgpack/unpacker_class.c index 06dd3dfa..19758cac 100644 --- a/ext/msgpack/unpacker_class.c +++ b/ext/msgpack/unpacker_class.c @@ -58,17 +58,16 @@ static void Unpacker_mark(void *ptr) static size_t Unpacker_memsize(const void *ptr) { + const msgpack_unpacker_t* uk = ptr; + size_t total_size = sizeof(msgpack_unpacker_t); - const msgpack_unpacker_t* uk = ptr; if (uk->ext_registry) { total_size += sizeof(msgpack_unpacker_ext_registry_t) / (uk->ext_registry->borrow_count + 1); } - msgpack_unpacker_stack_t *stack = uk->stack; - while (stack) { - total_size += (stack->depth + 1) * sizeof(msgpack_unpacker_stack_t); - stack = stack->parent; + if (uk->stack) { + total_size += (uk->stack->depth + 1) * sizeof(msgpack_unpacker_stack_t); } return total_size + msgpack_buffer_memsize(&uk->buffer); From 3d287931b32af27f3c59c2a813ca62dabc37a415 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 11 Nov 2024 18:35:02 +0100 Subject: [PATCH 2/4] Embed unpacker_stack inside unpacker Now that there's no longer a concept of child stacks, we can embed the struct and save a malloc/free pair. --- ext/msgpack/unpacker.c | 34 ++++++++++++++++------------------ ext/msgpack/unpacker.h | 2 +- ext/msgpack/unpacker_class.c | 6 +++--- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/ext/msgpack/unpacker.c b/ext/msgpack/unpacker.c index 443ba401..cd8e70d9 100644 --- a/ext/msgpack/unpacker.c +++ b/ext/msgpack/unpacker.c @@ -79,12 +79,9 @@ void msgpack_unpacker_static_destroy(void) #define HEAD_BYTE_REQUIRED 0xc1 -static inline msgpack_unpacker_stack_t* _msgpack_unpacker_new_stack(void) { - msgpack_unpacker_stack_t *stack = ZALLOC(msgpack_unpacker_stack_t); +static inline void _msgpack_unpacker_stack_init(msgpack_unpacker_stack_t *stack) { stack->capacity = MSGPACK_UNPACKER_STACK_CAPACITY; stack->data = msgpack_rmem_alloc(&s_stack_rmem); - /*memset(uk->stack, 0, MSGPACK_UNPACKER_STACK_CAPACITY);*/ - return stack; } void _msgpack_unpacker_init(msgpack_unpacker_t* uk) @@ -96,25 +93,26 @@ void _msgpack_unpacker_init(msgpack_unpacker_t* uk) uk->last_object = Qnil; uk->reading_raw = Qnil; - uk->stack = _msgpack_unpacker_new_stack(); + _msgpack_unpacker_stack_init(&uk->stack); } static inline void _msgpack_unpacker_free_stack(msgpack_unpacker_stack_t* stack) { if (!msgpack_rmem_free(&s_stack_rmem, stack->data)) { rb_bug("Failed to free an rmem pointer, memory leak?"); } - xfree(stack); + stack->data = NULL; + stack->depth = 0; } void _msgpack_unpacker_destroy(msgpack_unpacker_t* uk) { - _msgpack_unpacker_free_stack(uk->stack); + _msgpack_unpacker_free_stack(&uk->stack); msgpack_buffer_destroy(UNPACKER_BUFFER_(uk)); } void msgpack_unpacker_mark_stack(msgpack_unpacker_stack_t* stack) { - if (stack) { + if (stack->data) { msgpack_unpacker_stack_entry_t* s = stack->data; msgpack_unpacker_stack_entry_t* send = stack->data + stack->depth; for(; s < send; s++) { @@ -128,7 +126,7 @@ void msgpack_unpacker_mark(msgpack_unpacker_t* uk) { rb_gc_mark(uk->last_object); rb_gc_mark(uk->reading_raw); - msgpack_unpacker_mark_stack(uk->stack); + msgpack_unpacker_mark_stack(&uk->stack); /* See MessagePack_Buffer_wrap */ /* msgpack_buffer_mark(UNPACKER_BUFFER_(uk)); */ rb_gc_mark(uk->buffer_ref); @@ -141,8 +139,8 @@ void _msgpack_unpacker_reset(msgpack_unpacker_t* uk) uk->head_byte = HEAD_BYTE_REQUIRED; - /*memset(uk->stack, 0, sizeof(msgpack_unpacker_t) * uk->stack->depth);*/ - uk->stack->depth = 0; + /*memset(uk->stack, 0, sizeof(msgpack_unpacker_t) * uk->stack.depth);*/ + uk->stack.depth = 0; uk->last_object = Qnil; uk->reading_raw = Qnil; uk->reading_raw_remaining = 0; @@ -226,35 +224,35 @@ static inline int object_complete_ext(msgpack_unpacker_t* uk, int ext_type, VALU /* stack funcs */ static inline msgpack_unpacker_stack_entry_t* _msgpack_unpacker_stack_entry_top(msgpack_unpacker_t* uk) { - return &uk->stack->data[uk->stack->depth-1]; + return &uk->stack.data[uk->stack.depth-1]; } static inline int _msgpack_unpacker_stack_push(msgpack_unpacker_t* uk, enum stack_type_t type, size_t count, VALUE object) { reset_head_byte(uk); - if(uk->stack->capacity - uk->stack->depth <= 0) { + if(uk->stack.capacity - uk->stack.depth <= 0) { return PRIMITIVE_STACK_TOO_DEEP; } - msgpack_unpacker_stack_entry_t* next = &uk->stack->data[uk->stack->depth]; + msgpack_unpacker_stack_entry_t* next = &uk->stack.data[uk->stack.depth]; next->count = count; next->type = type; next->object = object; next->key = Qnil; - uk->stack->depth++; + uk->stack.depth++; return PRIMITIVE_CONTAINER_START; } static inline VALUE msgpack_unpacker_stack_pop(msgpack_unpacker_t* uk) { - return --uk->stack->depth; + return --uk->stack.depth; } static inline bool msgpack_unpacker_stack_is_empty(msgpack_unpacker_t* uk) { - return uk->stack->depth == 0; + return uk->stack.depth == 0; } #ifdef USE_CASE_RANGE @@ -282,7 +280,7 @@ static inline bool msgpack_unpacker_stack_is_empty(msgpack_unpacker_t* uk) static inline bool is_reading_map_key(msgpack_unpacker_t* uk) { - if(uk->stack->depth > 0) { + if(uk->stack.depth > 0) { msgpack_unpacker_stack_entry_t* top = _msgpack_unpacker_stack_entry_top(uk); if(top->type == STACK_TYPE_MAP_KEY) { return true; diff --git a/ext/msgpack/unpacker.h b/ext/msgpack/unpacker.h index a6889ab0..be997cba 100644 --- a/ext/msgpack/unpacker.h +++ b/ext/msgpack/unpacker.h @@ -49,7 +49,7 @@ struct msgpack_unpacker_stack_t { struct msgpack_unpacker_t { msgpack_buffer_t buffer; - msgpack_unpacker_stack_t *stack; + msgpack_unpacker_stack_t stack; unsigned int head_byte; VALUE self; diff --git a/ext/msgpack/unpacker_class.c b/ext/msgpack/unpacker_class.c index 19758cac..99ed7bbe 100644 --- a/ext/msgpack/unpacker_class.c +++ b/ext/msgpack/unpacker_class.c @@ -66,8 +66,8 @@ static size_t Unpacker_memsize(const void *ptr) total_size += sizeof(msgpack_unpacker_ext_registry_t) / (uk->ext_registry->borrow_count + 1); } - if (uk->stack) { - total_size += (uk->stack->depth + 1) * sizeof(msgpack_unpacker_stack_t); + if (uk->stack.data) { + total_size += (uk->stack.depth + 1) * sizeof(msgpack_unpacker_stack_t); } return total_size + msgpack_buffer_memsize(&uk->buffer); @@ -161,7 +161,7 @@ static VALUE Unpacker_allow_unknown_ext_p(VALUE self) NORETURN(static void raise_unpacker_error(msgpack_unpacker_t *uk, int r)) { - uk->stack->depth = 0; + uk->stack.depth = 0; switch(r) { case PRIMITIVE_EOF: rb_raise(rb_eEOFError, "end of buffer reached"); From 3778272f0403d4472d3e8294c45b40bf0cc35cab Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 11 Nov 2024 18:40:05 +0100 Subject: [PATCH 3/4] Reorder msgpack_unpacker_t Reduce its size from 264 to 256B. --- ext/msgpack/unpacker.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ext/msgpack/unpacker.h b/ext/msgpack/unpacker.h index be997cba..400e6610 100644 --- a/ext/msgpack/unpacker.h +++ b/ext/msgpack/unpacker.h @@ -50,25 +50,26 @@ struct msgpack_unpacker_stack_t { struct msgpack_unpacker_t { msgpack_buffer_t buffer; msgpack_unpacker_stack_t stack; - unsigned int head_byte; VALUE self; VALUE last_object; VALUE reading_raw; size_t reading_raw_remaining; - int reading_raw_type; VALUE buffer_ref; msgpack_unpacker_ext_registry_t *ext_registry; + int reading_raw_type; + unsigned int head_byte; + /* options */ + int symbol_ext_type; bool symbolize_keys; bool freeze; bool allow_unknown_ext; bool optimized_symbol_ext_type; - int symbol_ext_type; }; #define UNPACKER_BUFFER_(uk) (&(uk)->buffer) From b265f3cd5aa5577accf04417f33244387db3782b Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 11 Nov 2024 19:56:10 +0100 Subject: [PATCH 4/4] Fix msgpack_unpacker_stack_pop return type --- ext/msgpack/unpacker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/msgpack/unpacker.c b/ext/msgpack/unpacker.c index cd8e70d9..758afec3 100644 --- a/ext/msgpack/unpacker.c +++ b/ext/msgpack/unpacker.c @@ -245,7 +245,7 @@ static inline int _msgpack_unpacker_stack_push(msgpack_unpacker_t* uk, enum stac return PRIMITIVE_CONTAINER_START; } -static inline VALUE msgpack_unpacker_stack_pop(msgpack_unpacker_t* uk) +static inline size_t msgpack_unpacker_stack_pop(msgpack_unpacker_t* uk) { return --uk->stack.depth; }