Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor unpacking of recursive types #372

Merged
merged 4 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 21 additions & 31 deletions ext/msgpack/unpacker.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,9 @@ void msgpack_unpacker_static_destroy(void)

#define HEAD_BYTE_REQUIRED 0xc1

static inline msgpack_unpacker_stack_t* _msgpack_unpacker_new_stack(void) {
msgpack_unpacker_stack_t *stack = ZALLOC(msgpack_unpacker_stack_t);
static inline void _msgpack_unpacker_stack_init(msgpack_unpacker_stack_t *stack) {
stack->capacity = MSGPACK_UNPACKER_STACK_CAPACITY;
stack->data = msgpack_rmem_alloc(&s_stack_rmem);
/*memset(uk->stack, 0, MSGPACK_UNPACKER_STACK_CAPACITY);*/
return stack;
}

void _msgpack_unpacker_init(msgpack_unpacker_t* uk)
Expand All @@ -96,45 +93,40 @@ void _msgpack_unpacker_init(msgpack_unpacker_t* uk)
uk->last_object = Qnil;
uk->reading_raw = Qnil;

uk->stack = _msgpack_unpacker_new_stack();
_msgpack_unpacker_stack_init(&uk->stack);
}

static inline void _msgpack_unpacker_free_stack(msgpack_unpacker_stack_t* stack) {
if (!msgpack_rmem_free(&s_stack_rmem, stack->data)) {
rb_bug("Failed to free an rmem pointer, memory leak?");
}
xfree(stack);
stack->data = NULL;
stack->depth = 0;
}

void _msgpack_unpacker_destroy(msgpack_unpacker_t* uk)
{
msgpack_unpacker_stack_t *stack;
while ((stack = uk->stack)) {
uk->stack = stack->parent;
_msgpack_unpacker_free_stack(stack);
}

_msgpack_unpacker_free_stack(&uk->stack);
msgpack_buffer_destroy(UNPACKER_BUFFER_(uk));
}

void msgpack_unpacker_mark_stack(msgpack_unpacker_stack_t* stack)
{
while (stack) {
if (stack->data) {
msgpack_unpacker_stack_entry_t* s = stack->data;
msgpack_unpacker_stack_entry_t* send = stack->data + stack->depth;
for(; s < send; s++) {
rb_gc_mark(s->object);
rb_gc_mark(s->key);
}
stack = stack->parent;
}
}

void msgpack_unpacker_mark(msgpack_unpacker_t* uk)
{
rb_gc_mark(uk->last_object);
rb_gc_mark(uk->reading_raw);
msgpack_unpacker_mark_stack(uk->stack);
msgpack_unpacker_mark_stack(&uk->stack);
/* See MessagePack_Buffer_wrap */
/* msgpack_buffer_mark(UNPACKER_BUFFER_(uk)); */
rb_gc_mark(uk->buffer_ref);
Expand All @@ -147,8 +139,8 @@ void _msgpack_unpacker_reset(msgpack_unpacker_t* uk)

uk->head_byte = HEAD_BYTE_REQUIRED;

/*memset(uk->stack, 0, sizeof(msgpack_unpacker_t) * uk->stack->depth);*/
uk->stack->depth = 0;
/*memset(uk->stack, 0, sizeof(msgpack_unpacker_t) * uk->stack.depth);*/
uk->stack.depth = 0;
uk->last_object = Qnil;
uk->reading_raw = Qnil;
uk->reading_raw_remaining = 0;
Expand Down Expand Up @@ -232,35 +224,35 @@ static inline int object_complete_ext(msgpack_unpacker_t* uk, int ext_type, VALU
/* stack funcs */
static inline msgpack_unpacker_stack_entry_t* _msgpack_unpacker_stack_entry_top(msgpack_unpacker_t* uk)
{
return &uk->stack->data[uk->stack->depth-1];
return &uk->stack.data[uk->stack.depth-1];
}

static inline int _msgpack_unpacker_stack_push(msgpack_unpacker_t* uk, enum stack_type_t type, size_t count, VALUE object)
{
reset_head_byte(uk);

if(uk->stack->capacity - uk->stack->depth <= 0) {
if(uk->stack.capacity - uk->stack.depth <= 0) {
return PRIMITIVE_STACK_TOO_DEEP;
}

msgpack_unpacker_stack_entry_t* next = &uk->stack->data[uk->stack->depth];
msgpack_unpacker_stack_entry_t* next = &uk->stack.data[uk->stack.depth];
next->count = count;
next->type = type;
next->object = object;
next->key = Qnil;

uk->stack->depth++;
uk->stack.depth++;
return PRIMITIVE_CONTAINER_START;
}

static inline VALUE msgpack_unpacker_stack_pop(msgpack_unpacker_t* uk)
static inline size_t msgpack_unpacker_stack_pop(msgpack_unpacker_t* uk)
{
return --uk->stack->depth;
return --uk->stack.depth;
}

static inline bool msgpack_unpacker_stack_is_empty(msgpack_unpacker_t* uk)
{
return uk->stack->depth == 0;
return uk->stack.depth == 0;
}

#ifdef USE_CASE_RANGE
Expand Down Expand Up @@ -288,7 +280,7 @@ static inline bool msgpack_unpacker_stack_is_empty(msgpack_unpacker_t* uk)

static inline bool is_reading_map_key(msgpack_unpacker_t* uk)
{
if(uk->stack->depth > 0) {
if(uk->stack.depth > 0) {
msgpack_unpacker_stack_entry_t* top = _msgpack_unpacker_stack_entry_top(uk);
if(top->type == STACK_TYPE_MAP_KEY) {
return true;
Expand Down Expand Up @@ -343,14 +335,10 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type)
reset_head_byte(uk);
uk->reading_raw_remaining = 0;

msgpack_unpacker_stack_t* child_stack = _msgpack_unpacker_new_stack();
child_stack->parent = uk->stack;
uk->stack = child_stack;

_msgpack_unpacker_stack_push(uk, STACK_TYPE_RECURSIVE, 1, Qnil);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since a child no longer creating a new stack, does this change mean that recursive types will trigger a stack overflow much sooner than before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly, yes. But the stack size if 128, that's quite a deep document, I don't think that's something you are likely to hit..

That said, your comment make me realize there's just no check whatsoever for the stack size :/

Would be worth adding one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nevermind, there's a check for it:

    if(uk->stack.capacity - uk->stack.depth <= 0) {
        return PRIMITIVE_STACK_TOO_DEEP;
    }

int raised;
obj = protected_proc_call(proc, 1, &uk->self, &raised);
uk->stack = child_stack->parent;
_msgpack_unpacker_free_stack(child_stack);
msgpack_unpacker_stack_pop(uk);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we assert that the pop here is actually popping a STACK_TYPE_RECURSIVE to ensure that we don't have a stack inconsistency issue?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't quite have a good setup for asserts in the gem, but yes that would be valuable to put something like that in place.


if (raised) {
uk->last_object = rb_errinfo();
Expand Down Expand Up @@ -783,6 +771,8 @@ int msgpack_unpacker_read(msgpack_unpacker_t* uk, size_t target_stack_depth)
}
top->type = STACK_TYPE_MAP_KEY;
break;
case STACK_TYPE_RECURSIVE:
return PRIMITIVE_OBJECT_COMPLETE;
}
size_t count = --top->count;

Expand Down
11 changes: 6 additions & 5 deletions ext/msgpack/unpacker.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ enum stack_type_t {
STACK_TYPE_ARRAY,
STACK_TYPE_MAP_KEY,
STACK_TYPE_MAP_VALUE,
STACK_TYPE_RECURSIVE,
};

typedef struct {
Expand All @@ -44,31 +45,31 @@ struct msgpack_unpacker_stack_t {
size_t depth;
size_t capacity;
msgpack_unpacker_stack_entry_t *data;
msgpack_unpacker_stack_t *parent;
};

struct msgpack_unpacker_t {
msgpack_buffer_t buffer;
msgpack_unpacker_stack_t *stack;
unsigned int head_byte;
msgpack_unpacker_stack_t stack;

VALUE self;
VALUE last_object;

VALUE reading_raw;
size_t reading_raw_remaining;
int reading_raw_type;

VALUE buffer_ref;

msgpack_unpacker_ext_registry_t *ext_registry;

int reading_raw_type;
unsigned int head_byte;

/* options */
int symbol_ext_type;
bool symbolize_keys;
bool freeze;
bool allow_unknown_ext;
bool optimized_symbol_ext_type;
int symbol_ext_type;
};

#define UNPACKER_BUFFER_(uk) (&(uk)->buffer)
Expand Down
11 changes: 5 additions & 6 deletions ext/msgpack/unpacker_class.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,16 @@ static void Unpacker_mark(void *ptr)

static size_t Unpacker_memsize(const void *ptr)
{
const msgpack_unpacker_t* uk = ptr;

size_t total_size = sizeof(msgpack_unpacker_t);

const msgpack_unpacker_t* uk = ptr;
if (uk->ext_registry) {
total_size += sizeof(msgpack_unpacker_ext_registry_t) / (uk->ext_registry->borrow_count + 1);
}

msgpack_unpacker_stack_t *stack = uk->stack;
while (stack) {
total_size += (stack->depth + 1) * sizeof(msgpack_unpacker_stack_t);
stack = stack->parent;
if (uk->stack.data) {
total_size += (uk->stack.depth + 1) * sizeof(msgpack_unpacker_stack_t);
}

return total_size + msgpack_buffer_memsize(&uk->buffer);
Expand Down Expand Up @@ -162,7 +161,7 @@ static VALUE Unpacker_allow_unknown_ext_p(VALUE self)

NORETURN(static void raise_unpacker_error(msgpack_unpacker_t *uk, int r))
{
uk->stack->depth = 0;
uk->stack.depth = 0;
switch(r) {
case PRIMITIVE_EOF:
rb_raise(rb_eEOFError, "end of buffer reached");
Expand Down