From 6e1d0cd237479c6ca821b857c865df75a792adb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= Date: Thu, 11 Apr 2019 13:30:05 +0200 Subject: [PATCH] Never expand the encode buffer; emit it and start anew This greatly increases the performance of long string encodes as we won't need to copy intermediate results over and over. --- c_src/encoder.c | 159 +++++++++++---------------- src/jiffy.erl | 8 +- test/jiffy_02_literal_tests.erl | 19 ++-- test/jiffy_04_string_tests.erl | 10 +- test/jiffy_10_short_double_tests.erl | 2 +- test/jiffy_util.hrl | 2 + 6 files changed, 86 insertions(+), 114 deletions(-) diff --git a/c_src/encoder.c b/c_src/encoder.c index fb30896..605a48a 100644 --- a/c_src/encoder.c +++ b/c_src/encoder.c @@ -39,12 +39,12 @@ typedef struct { int shiftcnt; int count; - size_t iolen; size_t iosize; ERL_NIF_TERM iolist; - ErlNifBinary bin; - ErlNifBinary* curr; + int partial_output; + ErlNifBinary buffer; + int have_buffer; char* p; unsigned char* u; @@ -82,19 +82,20 @@ enc_new(ErlNifEnv* env) e->shiftcnt = 0; e->count = 0; - e->iolen = 0; e->iosize = 0; - e->curr = &(e->bin); - if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) { - e->curr = NULL; + e->iolist = enif_make_list(env, 0); + + e->partial_output = 0; + + if(!enif_alloc_binary(BIN_INC_SIZE, &e->buffer)) { enif_release_resource(e); return NULL; } - memset(e->curr->data, 0, e->curr->size); + e->have_buffer = 1; - e->p = (char*) e->curr->data; - e->u = (unsigned char*) e->curr->data; + e->p = (char*)e->buffer.data; + e->u = (unsigned char*)e->buffer.data; e->i = 0; return e; @@ -112,8 +113,8 @@ enc_destroy(ErlNifEnv* env, void* obj) { Encoder* e = (Encoder*) obj; - if(e->curr != NULL) { - enif_release_binary(e->curr); + if(e->have_buffer) { + enif_release_binary(&e->buffer); } } @@ -130,56 +131,55 @@ enc_obj_error(Encoder* e, const char* msg, ERL_NIF_TERM obj) return make_obj_error(e->atoms, e->env, msg, obj); } -static inline int -enc_ensure(Encoder* e, size_t req) +int +enc_flush(Encoder* e) { - size_t need = e->curr->size; - while(req >= (need - e->i)) need <<= 1; + ERL_NIF_TERM bin; - if(need != e->curr->size) { - if(!enif_realloc_binary(e->curr, need)) { - return 0; - } - e->p = (char*) e->curr->data; - e->u = (unsigned char*) e->curr->data; + if(e->i == 0) { + return 1; } - return 1; -} - -int -enc_result(Encoder* e, ERL_NIF_TERM* value) -{ - if(e->i != e->curr->size) { - if(!enif_realloc_binary(e->curr, e->i)) { + if(e->i < e->buffer.size) { + if(!enif_realloc_binary(&e->buffer, e->i)) { return 0; } } - *value = enif_make_binary(e->env, e->curr); - e->curr = NULL; + bin = enif_make_binary(e->env, &e->buffer); + e->have_buffer = 0; + + e->iolist = enif_make_list_cell(e->env, bin, e->iolist); + e->iosize += e->i; + return 1; } -int -enc_done(Encoder* e, ERL_NIF_TERM* value) +static inline int +enc_ensure(Encoder* e, size_t req) { - ERL_NIF_TERM last; + size_t new_size = BIN_INC_SIZE; + + if(req < (e->buffer.size - e->i)) { + return 1; + } - if(e->iolen == 0) { - return enc_result(e, value); + if(!enc_flush(e)) { + return 0; } - if(e->i > 0 ) { - if(!enc_result(e, &last)) { - return 0; - } + for(new_size = BIN_INC_SIZE; new_size < req; new_size <<= 1); - e->iolist = enif_make_list_cell(e->env, last, e->iolist); - e->iolen++; + if(!enif_alloc_binary(new_size, &e->buffer)) { + return 0; } - *value = e->iolist; + e->have_buffer = 1; + + e->p = (char*)e->buffer.data; + e->u = (unsigned char*)e->buffer.data; + e->i = 0; + return 1; } @@ -266,50 +266,6 @@ termstack_destroy(TermStack *stack) } } -static inline int -enc_unknown(Encoder* e, ERL_NIF_TERM value) -{ - ErlNifBinary* bin = e->curr; - ERL_NIF_TERM curr; - - if(e->i > 0) { - if(!enc_result(e, &curr)) { - return 0; - } - - e->iolist = enif_make_list_cell(e->env, curr, e->iolist); - e->iolen++; - } - - e->iolist = enif_make_list_cell(e->env, value, e->iolist); - e->iolen++; - - // Track the total number of bytes produced before - // splitting our IO buffer. We add 16 to this value - // as a rough estimate of the number of bytes that - // a bignum might produce when encoded. - e->iosize += e->i + 16; - - // Reinitialize our binary for the next buffer if we - // used any data in the buffer. If we haven't used any - // bytes in the buffer then we can safely reuse it - // for anything following the unknown value. - if(e->i > 0) { - e->curr = bin; - if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) { - return 0; - } - - memset(e->curr->data, 0, e->curr->size); - - e->p = (char*) e->curr->data; - e->u = (unsigned char*) e->curr->data; - e->i = 0; - } - - return 1; -} - static inline int enc_literal(Encoder* e, const char* literal, size_t len) { @@ -615,7 +571,7 @@ enc_double(Encoder* e, double val) start = &(e->p[e->i]); - if(!double_to_shortest(start, e->curr->size, &len, val)) { + if(!double_to_shortest(start, e->buffer.size, &len, val)) { return 0; } @@ -1026,23 +982,36 @@ encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) termstack_push(&stack, curr); termstack_push(&stack, e->atoms->ref_array); termstack_push(&stack, item); - } else { - if(!enc_unknown(e, curr)) { + } else if(enif_is_number(env, curr)) { + /* This is a bignum and we need to handle it up in Erlang code as + * the NIF API doesn't support them yet. + * + * Flush our current output and mark ourselves as needing a fixup + * after we return. */ + if(!enc_flush(e)) { ret = enc_error(e, "internal_error"); goto done; } + + e->iolist = enif_make_list_cell(e->env, curr, e->iolist); + e->partial_output = 1; + } else { + ret = enc_obj_error(e, "invalid_ejson", curr); + goto done; } } - if(!enc_done(e, &item)) { + if(!enc_flush(e)) { ret = enc_error(e, "internal_error"); goto done; } - if(e->iolen == 0) { - ret = item; + assert(enif_is_list(env, e->iolist)); + + if(e->partial_output) { + ret = enif_make_tuple2(env, e->atoms->atom_partial, e->iolist); } else { - ret = enif_make_tuple2(env, e->atoms->atom_partial, item); + ret = e->iolist; } done: diff --git a/src/jiffy.erl b/src/jiffy.erl index 2ed3620..e7be219 100644 --- a/src/jiffy.erl +++ b/src/jiffy.erl @@ -95,10 +95,10 @@ encode(Data, Options) -> encode(FixedData, Options -- [force_utf8]); {error, _} = Error -> throw(Error); - {partial, IOData} -> - finish_encode(IOData, []); - IOData -> - IOData + {partial, IOList} -> + finish_encode(IOList, []); + Rev_IOVec when is_list(Rev_IOVec) -> + lists:reverse(Rev_IOVec) end. diff --git a/test/jiffy_02_literal_tests.erl b/test/jiffy_02_literal_tests.erl index 2699199..5a6a17f 100644 --- a/test/jiffy_02_literal_tests.erl +++ b/test/jiffy_02_literal_tests.erl @@ -4,32 +4,33 @@ -module(jiffy_02_literal_tests). -include_lib("eunit/include/eunit.hrl"). +-include("jiffy_util.hrl"). true_test_() -> {"true", [ - {"Decode", ?_assertEqual(true, jiffy:decode(<<"true">>))}, - {"Encode", ?_assertEqual(<<"true">>, jiffy:encode(true))} + {"Decode", ?_assertEqual(true, dec(<<"true">>))}, + {"Encode", ?_assertEqual(<<"true">>, enc(true))} ]}. false_test_() -> {"false", [ - {"Decode", ?_assertEqual(false, jiffy:decode(<<"false">>))}, - {"Encode", ?_assertEqual(<<"false">>, jiffy:encode(false))} + {"Decode", ?_assertEqual(false, dec(<<"false">>))}, + {"Encode", ?_assertEqual(<<"false">>, enc(false))} ]}. null_test_() -> {"null", [ - {"Decode", ?_assertEqual(null, jiffy:decode(<<"null">>))}, - {"Encode", ?_assertEqual(<<"null">>, jiffy:encode(null))} + {"Decode", ?_assertEqual(null, dec(<<"null">>))}, + {"Encode", ?_assertEqual(<<"null">>, enc(null))} ]}. nil_test_() -> {"null", [ - {"Decode", ?_assertEqual(nil, jiffy:decode(<<"null">>, [use_nil]))}, - {"Encode", ?_assertEqual(<<"null">>, jiffy:encode(nil, [use_nil]))} + {"Decode", ?_assertEqual(nil, dec(<<"null">>, [use_nil]))}, + {"Encode", ?_assertEqual(<<"null">>, enc(nil, [use_nil]))} ]}. null_term_test_() -> @@ -41,4 +42,4 @@ null_term_test_() -> {whatever, [{null_term, undefined}, {null_term, whatever}]} ], {"null_term", - [?_assertEqual(R, jiffy:decode(<<"null">>, O)) || {R, O} <- T]}. + [?_assertEqual(R, dec(<<"null">>, O)) || {R, O} <- T]}. diff --git a/test/jiffy_04_string_tests.erl b/test/jiffy_04_string_tests.erl index 8c78a1f..8dc3238 100644 --- a/test/jiffy_04_string_tests.erl +++ b/test/jiffy_04_string_tests.erl @@ -59,15 +59,15 @@ gen(utf8, {Case, Fixed}) -> Case2 = <<34, Case/binary, 34>>, Fixed2 = <<34, Fixed/binary, 34>>, {msg("UTF-8: ~s", [hex(Case)]), [ - ?_assertThrow({error, {invalid_string, _}}, jiffy:encode(Case)), - ?_assertEqual(Fixed2, jiffy:encode(Case, [force_utf8])), - ?_assertThrow({error, {_, invalid_string}}, jiffy:decode(Case2)) + ?_assertThrow({error, {invalid_string, _}}, enc(Case)), + ?_assertEqual(Fixed2, enc(Case, [force_utf8])), + ?_assertThrow({error, {_, invalid_string}}, dec(Case2)) ]}; gen(bad_utf8_key, {J, E}) -> {msg("Bad UTF-8 key: - ~p", [size(term_to_binary(J))]), [ - ?_assertThrow({error, {invalid_object_member_key, _}}, jiffy:encode(J)), - ?_assertEqual(E, jiffy:decode(jiffy:encode(J, [force_utf8]))) + ?_assertThrow({error, {invalid_object_member_key, _}}, enc(J)), + ?_assertEqual(E, dec(enc(J, [force_utf8]))) ]}; gen(escaped_slashes, {J, E}) -> diff --git a/test/jiffy_10_short_double_tests.erl b/test/jiffy_10_short_double_tests.erl index 0b099c1..08692a6 100644 --- a/test/jiffy_10_short_double_tests.erl +++ b/test/jiffy_10_short_double_tests.erl @@ -23,7 +23,7 @@ run(Fd, Acc) -> V1 = re:replace(iolist_to_binary(Data), <<"\.\n">>, <<"">>), V2 = iolist_to_binary(V1), V3 = <<34, V2/binary, 34>>, - R = jiffy:encode(jiffy:decode(V3)), + R = enc(dec(V3)), case R == V3 of true -> run(Fd, Acc); false -> run(Fd, Acc + 1) diff --git a/test/jiffy_util.hrl b/test/jiffy_util.hrl index 4b714d9..15e6351 100644 --- a/test/jiffy_util.hrl +++ b/test/jiffy_util.hrl @@ -17,6 +17,8 @@ hex(Bin) when is_binary(Bin) -> dec(V) -> jiffy:decode(V). +dec(V, Opts) -> + jiffy:decode(V, Opts). enc(V) ->