Selaa lähdekoodia

Never expand the encode buffer; emit and restart

This greatly increases the performance of long string encodes as
we won't need to copy intermediate results over and over.
pull/184/head
John Högberg 6 vuotta sitten
committed by Paul J. Davis
vanhempi
commit
a1196ba754
8 muutettua tiedostoa jossa 106 lisäystä ja 112 poistoa
  1. +69
    -89
      c_src/encoder.c
  2. +8
    -4
      src/jiffy.erl
  3. +10
    -9
      test/jiffy_02_literal_tests.erl
  4. +1
    -0
      test/jiffy_03_number_tests.erl
  5. +5
    -5
      test/jiffy_04_string_tests.erl
  6. +1
    -1
      test/jiffy_10_short_double_tests.erl
  7. +6
    -2
      test/jiffy_11_property_tests.erl
  8. +6
    -2
      test/jiffy_util.hrl

+ 69
- 89
c_src/encoder.c Näytä tiedosto

@ -39,12 +39,12 @@ typedef struct {
int shiftcnt;
int count;
size_t iolen;
size_t iosize;
ERL_NIF_TERM iolist;
ErlNifBinary bin;
ErlNifBinary* curr;
int partial_output;
ErlNifBinary buffer;
int have_buffer;
char* p;
unsigned char* u;
@ -82,19 +82,20 @@ enc_new(ErlNifEnv* env)
e->shiftcnt = 0;
e->count = 0;
e->iolen = 0;
e->iosize = 0;
e->curr = &(e->bin);
if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
e->curr = NULL;
e->iolist = enif_make_list(env, 0);
e->partial_output = 0;
if(!enif_alloc_binary(BIN_INC_SIZE, &e->buffer)) {
enif_release_resource(e);
return NULL;
}
memset(e->curr->data, 0, e->curr->size);
e->have_buffer = 1;
e->p = (char*) e->curr->data;
e->u = (unsigned char*) e->curr->data;
e->p = (char*)e->buffer.data;
e->u = (unsigned char*)e->buffer.data;
e->i = 0;
return e;
@ -112,8 +113,8 @@ enc_destroy(ErlNifEnv* env, void* obj)
{
Encoder* e = (Encoder*) obj;
if(e->curr != NULL) {
enif_release_binary(e->curr);
if(e->have_buffer) {
enif_release_binary(&e->buffer);
}
}
@ -130,100 +131,57 @@ enc_obj_error(Encoder* e, const char* msg, ERL_NIF_TERM obj)
return make_obj_error(e->atoms, e->env, msg, obj);
}
static inline int
enc_ensure(Encoder* e, size_t req)
int
enc_flush(Encoder* e)
{
size_t need = e->curr->size;
while(req >= (need - e->i)) need <<= 1;
ERL_NIF_TERM bin;
if(need != e->curr->size) {
if(!enif_realloc_binary(e->curr, need)) {
return 0;
}
e->p = (char*) e->curr->data;
e->u = (unsigned char*) e->curr->data;
if(e->i == 0) {
return 1;
}
return 1;
}
int
enc_result(Encoder* e, ERL_NIF_TERM* value)
{
if(e->i != e->curr->size) {
if(!enif_realloc_binary(e->curr, e->i)) {
if(e->i < e->buffer.size) {
if(!enif_realloc_binary(&e->buffer, e->i)) {
return 0;
}
}
*value = enif_make_binary(e->env, e->curr);
e->curr = NULL;
return 1;
}
int
enc_done(Encoder* e, ERL_NIF_TERM* value)
{
ERL_NIF_TERM last;
if(e->iolen == 0) {
return enc_result(e, value);
}
if(e->i > 0 ) {
if(!enc_result(e, &last)) {
return 0;
}
bin = enif_make_binary(e->env, &e->buffer);
e->have_buffer = 0;
e->iolist = enif_make_list_cell(e->env, last, e->iolist);
e->iolen++;
}
e->iolist = enif_make_list_cell(e->env, bin, e->iolist);
e->iosize += e->i;
*value = e->iolist;
return 1;
}
static inline int
enc_unknown(Encoder* e, ERL_NIF_TERM value)
enc_ensure(Encoder* e, size_t req)
{
ErlNifBinary* bin = e->curr;
ERL_NIF_TERM curr;
size_t new_size = BIN_INC_SIZE;
if(e->i > 0) {
if(!enc_result(e, &curr)) {
return 0;
if(e->have_buffer) {
if(req < (e->buffer.size - e->i)) {
return 1;
}
e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
e->iolen++;
}
e->iolist = enif_make_list_cell(e->env, value, e->iolist);
e->iolen++;
// Track the total number of bytes produced before
// splitting our IO buffer. We add 16 to this value
// as a rough estimate of the number of bytes that
// a bignum might produce when encoded.
e->iosize += e->i + 16;
// Reinitialize our binary for the next buffer if we
// used any data in the buffer. If we haven't used any
// bytes in the buffer then we can safely reuse it
// for anything following the unknown value.
if(e->i > 0) {
e->curr = bin;
if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
if(!enc_flush(e)) {
return 0;
}
}
memset(e->curr->data, 0, e->curr->size);
for(new_size = BIN_INC_SIZE; new_size < req; new_size <<= 1);
e->p = (char*) e->curr->data;
e->u = (unsigned char*) e->curr->data;
e->i = 0;
if(!enif_alloc_binary(new_size, &e->buffer)) {
return 0;
}
e->have_buffer = 1;
e->p = (char*)e->buffer.data;
e->u = (unsigned char*)e->buffer.data;
e->i = 0;
return 1;
}
@ -240,6 +198,23 @@ enc_literal(Encoder* e, const char* literal, size_t len)
return 1;
}
static inline int
enc_bignum(Encoder* e, ERL_NIF_TERM value) {
/* This is a bignum and we need to handle it up in Erlang code as
* the NIF API doesn't support them yet.
*
* Flush our current output and mark ourselves as needing a fixup
* after we return. */
if(!enc_flush(e)) {
return 0;
}
e->iolist = enif_make_list_cell(e->env, value, e->iolist);
e->partial_output = 1;
return 1;
}
static inline int
enc_special_character(Encoder* e, int val) {
switch(val) {
@ -529,7 +504,7 @@ enc_double(Encoder* e, double val)
start = &(e->p[e->i]);
if(!double_to_shortest(start, e->curr->size, &len, val)) {
if(!double_to_shortest(start, e->buffer.size, &len, val)) {
return 0;
}
@ -948,23 +923,28 @@ encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
termstack_push(&stack, curr);
termstack_push(&stack, e->atoms->ref_array);
termstack_push(&stack, item);
} else {
if(!enc_unknown(e, curr)) {
} else if(enif_is_number(env, curr)) {
if(!enc_bignum(e, curr)) {
ret = enc_error(e, "internal_error");
goto done;
}
} else {
ret = enc_obj_error(e, "invalid_ejson", curr);
goto done;
}
}
if(!enc_done(e, &item)) {
if(!enc_flush(e)) {
ret = enc_error(e, "internal_error");
goto done;
}
if(e->iolen == 0) {
ret = item;
assert(enif_is_list(env, e->iolist));
if(e->partial_output) {
ret = enif_make_tuple2(env, e->atoms->atom_partial, e->iolist);
} else {
ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
ret = e->iolist;
}
done:

+ 8
- 4
src/jiffy.erl Näytä tiedosto

@ -101,8 +101,10 @@ encode(Data, Options) ->
finish_encode(IOData, []);
{iter, {Encoder, Stack, IOBuf}} ->
encode_loop(Data, Options, Encoder, Stack, IOBuf);
IOData ->
IOData
[Bin] when is_binary(Bin) ->
Bin;
RevIOData when is_list(RevIOData) ->
lists:reverse(RevIOData)
end.
@ -206,8 +208,10 @@ encode_loop(Data, Options, Encoder, Stack, IOBuf) ->
finish_encode(IOData, []);
{iter, {NewEncoder, NewStack, NewIOBuf}} ->
encode_loop(Data, Options, NewEncoder, NewStack, NewIOBuf);
IOData ->
IOData
[Bin] when is_binary(Bin) ->
Bin;
RevIOData when is_list(RevIOData) ->
lists:reverse(RevIOData)
end.

+ 10
- 9
test/jiffy_02_literal_tests.erl Näytä tiedosto

@ -4,32 +4,33 @@
-module(jiffy_02_literal_tests).
-include_lib("eunit/include/eunit.hrl").
-include("jiffy_util.hrl").
true_test_() ->
{"true", [
{"Decode", ?_assertEqual(true, jiffy:decode(<<"true">>))},
{"Encode", ?_assertEqual(<<"true">>, jiffy:encode(true))}
{"Decode", ?_assertEqual(true, dec(<<"true">>))},
{"Encode", ?_assertEqual(<<"true">>, enc(true))}
]}.
false_test_() ->
{"false", [
{"Decode", ?_assertEqual(false, jiffy:decode(<<"false">>))},
{"Encode", ?_assertEqual(<<"false">>, jiffy:encode(false))}
{"Decode", ?_assertEqual(false, dec(<<"false">>))},
{"Encode", ?_assertEqual(<<"false">>, enc(false))}
]}.
null_test_() ->
{"null", [
{"Decode", ?_assertEqual(null, jiffy:decode(<<"null">>))},
{"Encode", ?_assertEqual(<<"null">>, jiffy:encode(null))}
{"Decode", ?_assertEqual(null, dec(<<"null">>))},
{"Encode", ?_assertEqual(<<"null">>, enc(null))}
]}.
nil_test_() ->
{"null", [
{"Decode", ?_assertEqual(nil, jiffy:decode(<<"null">>, [use_nil]))},
{"Encode", ?_assertEqual(<<"null">>, jiffy:encode(nil, [use_nil]))}
{"Decode", ?_assertEqual(nil, dec(<<"null">>, [use_nil]))},
{"Encode", ?_assertEqual(<<"null">>, enc(nil, [use_nil]))}
]}.
null_term_test_() ->
@ -41,4 +42,4 @@ null_term_test_() ->
{whatever, [{null_term, undefined}, {null_term, whatever}]}
],
{"null_term",
[?_assertEqual(R, jiffy:decode(<<"null">>, O)) || {R, O} <- T]}.
[?_assertEqual(R, dec(<<"null">>, O)) || {R, O} <- T]}.

+ 1
- 0
test/jiffy_03_number_tests.erl Näytä tiedosto

@ -48,6 +48,7 @@ cases(ok) ->
{<<"1">>, 1},
{<<"12">>, 12},
{<<"-3">>, -3},
{<<"{\"key\":9223372036854775808}">>,{[{<<"key">>,1 bsl 63}]}},
{<<"1234567890123456789012345">>, 1234567890123456789012345},
{<<"1310050760199">>, 1310050760199},
{

+ 5
- 5
test/jiffy_04_string_tests.erl Näytä tiedosto

@ -65,15 +65,15 @@ gen(utf8, {Case, Fixed}) ->
Case2 = <<34, Case/binary, 34>>,
Fixed2 = <<34, Fixed/binary, 34>>,
{msg("UTF-8: ~s", [hex(Case)]), [
?_assertError({invalid_string, _}, jiffy:encode(Case)),
?_assertEqual(Fixed2, jiffy:encode(Case, [force_utf8])),
?_assertError({_, invalid_string}, jiffy:decode(Case2))
?_assertError({invalid_string, _}, enc(Case)),
?_assertEqual(Fixed2, enc(Case, [force_utf8])),
?_assertError({_, invalid_string}, dec(Case2))
]};
gen(bad_utf8_key, {J, E}) ->
{msg("Bad UTF-8 key: - ~p", [size(term_to_binary(J))]), [
?_assertError({invalid_object_member_key, _}, jiffy:encode(J)),
?_assertEqual(E, jiffy:decode(jiffy:encode(J, [force_utf8])))
?_assertError({invalid_object_member_key, _}, enc(J)),
?_assertEqual(E, dec(enc(J, [force_utf8])))
]};
gen(escaped_slashes, {J, E}) ->

+ 1
- 1
test/jiffy_10_short_double_tests.erl Näytä tiedosto

@ -23,7 +23,7 @@ run(Fd, Acc) ->
V1 = re:replace(iolist_to_binary(Data), <<"\.\n">>, <<"">>),
V2 = iolist_to_binary(V1),
V3 = <<34, V2/binary, 34>>,
R = jiffy:encode(jiffy:decode(V3)),
R = enc(dec(V3)),
case R == V3 of
true -> run(Fd, Acc);
false -> run(Fd, Acc + 1)

+ 6
- 2
test/jiffy_11_property_tests.erl Näytä tiedosto

@ -5,6 +5,10 @@
-ifdef(HAVE_EQC).
-compile(export_all).
-include_lib("eqc/include/eqc.hrl").
-include_lib("eunit/include/eunit.hrl").
-include("jiffy_util.hrl").
@ -41,8 +45,8 @@ prop_enc_dec() ->
prop_dec_trailer() ->
?FORALL({T1, Comb, T2}, {json(), combiner(), json()},
begin
B1 = jiffy:encode(T1),
B2 = jiffy:encode(T2),
B1 = iolist_to_binary(jiffy:encode(T1)),
B2 = iolist_to_binary(jiffy:encode(T2)),
Bin = <<B1/binary, Comb/binary, B2/binary>>,
{has_trailer, T1, Rest} = jiffy:decode(Bin, [return_trailer]),
T2 = jiffy:decode(Rest),

+ 6
- 2
test/jiffy_util.hrl Näytä tiedosto

@ -1,5 +1,5 @@
-compile(export_all).
% This file is part of Jiffy released under the MIT license.
% See the LICENSE file for more information.
msg(Fmt, Args) ->
M1 = io_lib:format(Fmt, Args),
@ -19,6 +19,10 @@ dec(V) ->
jiffy:decode(V).
dec(V, Opts) ->
jiffy:decode(V, Opts).
enc(V) ->
iolist_to_binary(jiffy:encode(V)).

Ladataan…
Peruuta
Tallenna