Ver código fonte

Add `copy_strings` feature

Some users of Jiffy have experienced issues when decoding large JSON
documents. Normally Jiffy expects smallish documents and returns any
strings as sub-binaries. When dealing with large documents these
sub-binary references can keep a large amount of RAM around unless the
user goes through and applies `binary:copy/1` on every string returned
from Jiffy. This however causes a large amount of CPU usage to do
something that Jiffy could do as it builds the JSON structure.

The `copy_strings` decoder option does exactly this. Instead of
returning sub-binaries Jiffy now copies every string into a newly
allocated binary. Users report that this fixes the memory issues while
also not negatively affecting performance significantly.
pull/158/head
Paul J. Davis 7 anos atrás
pai
commit
dddb392f88
5 arquivos alterados com 53 adições e 1 exclusões
  1. +6
    -0
      README.md
  2. +10
    -1
      c_src/decoder.c
  3. +1
    -0
      c_src/jiffy.c
  4. +1
    -0
      c_src/jiffy.h
  5. +35
    -0
      test/jiffy_17_copy_strings_tests.erl

+ 6
- 0
README.md Ver arquivo

@ -52,6 +52,12 @@ The options for decode are:
will ensure that the parsed object only contains a single entry
containing the last value seen. This mirrors the parsing beahvior
of virtually every other JSON parser.
* `copy_strings` - Normaly when strings are decoded they are created
as sub-binaries of the input data. With some workloads this can lead
to an undeseriable bloating of memory when a few small strings in JSON
keep a reference to the full JSON document alive. Setting this option
will instead allocate new binaries for each string to avoid keeping
the original JSON document around after garbage collection.
* `{bytes_per_red, N}` where N >= 0 - This controls the number of
bytes that Jiffy will process as an equivalent to a reduction. Each
20 reductions we consume 1% of our allocated time slice for the current

+ 10
- 1
c_src/decoder.c Ver arquivo

@ -54,6 +54,7 @@ typedef struct {
int return_maps;
int return_trailer;
int dedupe_keys;
int copy_strings;
ERL_NIF_TERM null_term;
char* p;
@ -84,6 +85,7 @@ dec_new(ErlNifEnv* env)
d->return_maps = 0;
d->return_trailer = 0;
d->dedupe_keys = 0;
d->copy_strings = 0;
d->null_term = d->atoms->atom_null;
d->p = NULL;
@ -291,9 +293,14 @@ dec_string(Decoder* d, ERL_NIF_TERM* value)
return 0;
parse:
if(!has_escape) {
if(!has_escape && !d->copy_strings) {
*value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1));
return 1;
} else if(!has_escape) {
ulen = d->i - 1 - st;
chrbuf = (char*) enif_make_new_binary(d->env, ulen, value),
memcpy(chrbuf, &(d->p[st]), ulen);
return 1;
}
hi = 0;
@ -684,6 +691,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
d->return_trailer = 1;
} else if(enif_compare(val, d->atoms->atom_dedupe_keys) == 0) {
d->dedupe_keys = 1;
} else if(enif_compare(val, d->atoms->atom_copy_strings) == 0) {
d->copy_strings = 1;
} else if(enif_compare(val, d->atoms->atom_use_nil) == 0) {
d->null_term = d->atoms->atom_nil;
} else if(get_null_term(env, val, &(d->null_term))) {

+ 1
- 0
c_src/jiffy.c Ver arquivo

@ -33,6 +33,7 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
st->atom_null_term = make_atom(env, "null_term");
st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes");
st->atom_dedupe_keys = make_atom(env, "dedupe_keys");
st->atom_copy_strings = make_atom(env, "copy_strings");
// Markers used in encoding
st->ref_object = make_atom(env, "$object_ref$");

+ 1
- 0
c_src/jiffy.h Ver arquivo

@ -36,6 +36,7 @@ typedef struct {
ERL_NIF_TERM atom_null_term;
ERL_NIF_TERM atom_escape_forward_slashes;
ERL_NIF_TERM atom_dedupe_keys;
ERL_NIF_TERM atom_copy_strings;
ERL_NIF_TERM ref_object;
ERL_NIF_TERM ref_array;

+ 35
- 0
test/jiffy_17_copy_strings_tests.erl Ver arquivo

@ -0,0 +1,35 @@
% This file is part of Jiffy released under the MIT license.
% See the LICENSE file for more information.
-module(jiffy_17_copy_strings_tests).
-include_lib("eunit/include/eunit.hrl").
check_binaries({Props}) when is_list(Props) ->
lists:all(fun({Key, Value}) ->
check_binaries(Key) andalso check_binaries(Value)
end, Props);
check_binaries(Values) when is_list(Values) ->
lists:all(fun(Value) ->
check_binaries(Value)
end, Values);
check_binaries(Bin) when is_binary(Bin) ->
io:format("~s :: ~p ~p", [Bin, byte_size(Bin), binary:referenced_byte_size(Bin)]),
byte_size(Bin) == binary:referenced_byte_size(Bin);
check_binaries(Bin) ->
true.
copy_strings_test_() ->
Opts = [copy_strings],
Cases = [
<<"\"foo\"">>,
<<"[\"bar\"]">>,
<<"{\"foo\":\"bar\"}">>,
<<"{\"foo\":[\"bar\"]}">>
],
{"Test copy_strings", lists:map(fun(Json) ->
EJson = jiffy:decode(Json, Opts),
?_assert(check_binaries(EJson))
end, Cases)}.

Carregando…
Cancelar
Salvar