Skip to content

Commit

Permalink
Faster generation of Symbols, Fixnums, and Hashes
Browse files Browse the repository at this point in the history
This improves the JSON generation speed of Symbols, Fixnums, and Hashes.

Symbols are made faster by checking for T_SYMBOL and calling rb_sym2str
instead of calling Symbol#to_s. This avoids a string allocation and a
method lookup.

Fixnums are made faster by writing our own simple itoa (to a buffer).
This avoids a string allocation, method lookup, and might be a tiny bit
faster since the radix is hardcoded.

Hashes are improved by avoiding calling to_s on the keys when we have a
String or Symbol, and calling rb_hash_foreach instead of iterating over
keys and using rb_hash_aref.

In my quick test this was approximately 30% faster.
  • Loading branch information
jhawthorn committed Jun 23, 2021
1 parent ce6cbfb commit 64b3ede
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 13 deletions.
1 change: 1 addition & 0 deletions ext/yajl/api/yajl_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ extern "C" {
* NaN, as these have no representation in JSON. In these cases the
* generator will return 'yajl_gen_invalid_number' */
YAJL_API yajl_gen_status yajl_gen_double(yajl_gen hand, double number);
YAJL_API yajl_gen_status yajl_gen_long(yajl_gen hand, long value);
YAJL_API yajl_gen_status yajl_gen_number(yajl_gen hand,
const char * num,
unsigned int len);
Expand Down
59 changes: 46 additions & 13 deletions ext/yajl/yajl_ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,36 @@ static void yajl_encoder_wrapper_mark(void * wrapper) {
}
}

static VALUE yajl_key_to_string(VALUE obj) {
switch (TYPE(obj)) {
case T_STRING:
return obj;
case T_SYMBOL:
return rb_sym2str(obj);
default:
return rb_funcall(obj, intern_to_s, 0);
}
}

void yajl_encode_part(void * wrapper, VALUE obj, VALUE io);
struct yajl_encode_hash_iter {
void *w;
VALUE io;
};

static int yajl_encode_part_hash_i(VALUE key, VALUE val, VALUE iter_v) {
struct yajl_encode_hash_iter *iter = (struct yajl_encode_hash_iter *)iter_v;
/* key must be a string */
VALUE keyStr = yajl_key_to_string(key);

/* the key */
yajl_encode_part(iter->w, keyStr, iter->io);
/* the value */
yajl_encode_part(iter->w, val, iter->io);

return ST_CONTINUE;
}

#define CHECK_STATUS(call) \
if ((status = (call)) != yajl_gen_status_ok) { break; }

Expand All @@ -166,7 +196,7 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
const unsigned char * buffer;
const char * cptr;
unsigned int len;
VALUE keys, entry, keyStr;
VALUE *ptr;

if (io != Qnil || w->on_progress_callback != Qnil) {
status = yajl_gen_get_buf(w->encoder, &buffer, &len);
Expand All @@ -188,24 +218,19 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
case T_HASH:
CHECK_STATUS(yajl_gen_map_open(w->encoder));

/* TODO: itterate through keys in the hash */
keys = rb_funcall(obj, intern_keys, 0);
for(idx=0; idx<RARRAY_LEN(keys); idx++) {
entry = rb_ary_entry(keys, idx);
keyStr = rb_funcall(entry, intern_to_s, 0); /* key must be a string */
/* the key */
yajl_encode_part(w, keyStr, io);
/* the value */
yajl_encode_part(w, rb_hash_aref(obj, entry), io);
}
struct yajl_encode_hash_iter iter;
iter.w = w;
iter.io = io;
rb_hash_foreach(obj, yajl_encode_part_hash_i, (VALUE)&iter);

CHECK_STATUS(yajl_gen_map_close(w->encoder));
break;
case T_ARRAY:
CHECK_STATUS(yajl_gen_array_open(w->encoder));

VALUE *ptr = RARRAY_PTR(obj);
for(idx=0; idx<RARRAY_LEN(obj); idx++) {
otherObj = rb_ary_entry(obj, idx);
yajl_encode_part(w, otherObj, io);
yajl_encode_part(w, ptr[idx], io);
}
CHECK_STATUS(yajl_gen_array_close(w->encoder));
break;
Expand All @@ -219,6 +244,8 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
CHECK_STATUS(yajl_gen_bool(w->encoder, 0));
break;
case T_FIXNUM:
CHECK_STATUS(yajl_gen_long(w->encoder, FIX2LONG(obj)));
break;
case T_FLOAT:
case T_BIGNUM:
str = rb_funcall(obj, intern_to_s, 0);
Expand All @@ -234,6 +261,12 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
len = (unsigned int)RSTRING_LEN(obj);
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
break;
case T_SYMBOL:
str = rb_sym2str(obj);
cptr = RSTRING_PTR(str);
len = (unsigned int)RSTRING_LEN(str);
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
break;
default:
if (rb_respond_to(obj, intern_to_json)) {
str = rb_funcall(obj, intern_to_json, 0);
Expand Down
30 changes: 30 additions & 0 deletions ext/yajl/yajl_gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,36 @@ yajl_gen_double(yajl_gen g, double number)
return yajl_gen_status_ok;
}

yajl_gen_status
yajl_gen_long(yajl_gen g, long val)
{
char buf[32], *b = buf + sizeof buf;
unsigned int len = 0;
unsigned long uval;

ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;

if (val < 0) {
g->print(g->ctx, "-", 1);
// Avoid overflow. This shouldn't happen because FIXNUMs are 1 bit less
// than LONGs, but good to be safe.
uval = 1 + (unsigned long)(-(val + 1));
} else {
uval = val;
}

do {
*--b = "0123456789"[uval % 10];
uval /= 10;
len++;
} while(uval);
g->print(g->ctx, b, len);

APPENDED_ATOM;
FINAL_NEWLINE;
return yajl_gen_status_ok;
}

yajl_gen_status
yajl_gen_number(yajl_gen g, const char * s, unsigned int l)
{
Expand Down
10 changes: 10 additions & 0 deletions spec/encoding/encoding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,16 @@ def to_s
expect(s.read).to eql("{\"foo\":\"bar\"}")
end

it "should encode all integers correctly" do
0.upto(129).each do |b|
b = 1 << b
[b, b-1, b-2, b+1, b+2].each do |i|
expect(Yajl::Encoder.encode(i)).to eq(i.to_s)
expect(Yajl::Encoder.encode(-i)).to eq((-i).to_s)
end
end
end

it "should not encode NaN" do
expect {
Yajl::Encoder.encode(0.0/0.0)
Expand Down

0 comments on commit 64b3ede

Please sign in to comment.