From dc1fd5607a8d0f4f5e2975b89c9a83068b03d7c7 Mon Sep 17 00:00:00 2001 From: Jose M Perez Date: Tue, 7 Apr 2020 04:47:26 +0200 Subject: [PATCH] Add partial option to encode --- README.md | 25 +++++- c_src/decoder.c | 10 +-- c_src/encoder.c | 7 ++ c_src/jiffy.c | 13 +++- c_src/jiffy.h | 7 +- c_src/wrapper.c | 77 +++++++++++++++++++ src/jiffy.erl | 46 +++++++---- ..._tests.erl => jiffy_18_partials_tests.erl} | 58 +++++++++----- 8 files changed, 194 insertions(+), 49 deletions(-) create mode 100644 c_src/wrapper.c rename test/{jiffy_18_decode_levels_tests.erl => jiffy_18_partials_tests.erl} (57%) diff --git a/README.md b/README.md index 3606694..03d96ab 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,8 @@ The options for decode are: the decode result is still in use. * `{max_levels, N}` where N >= 0 - This controls when to stop decoding by depth, after N levels are decoded, the rest is returned as a - `{json, binary()}`. + `Resource::reference()`. Resources have some limitations, check [partial jsons + section](#partial-jsons). * `{bytes_per_red, N}` where N >= 0 - This controls the number of bytes that Jiffy will process as an equivalent to a reduction. Each 20 reductions we consume 1% of our allocated time slice for the current @@ -88,6 +89,11 @@ The options for encode are: * `use_nil` - Encode's the atom `nil` as `null`. * `escape_forward_slashes` - Escapes the `/` character which can be useful when encoding URLs in some cases. +* `partial` - Instead of returning an `iodata()`, returns a + `Resource::reference()` which holds the verified raw json. This resource can be used + as a block to build more complex jsons, without the need to encode these + blocks again. Resources have some limitations, check [partial jsons + section](#partial-jsons). * `{bytes_per_red, N}` - Refer to the decode options * `{bytes_per_iter, N}` - Refer to the decode options @@ -123,3 +129,20 @@ Jiffy should be in all ways an improvement over EEP0018. It no longer imposes limits on the nesting depth. It is capable of encoding and decoding large numbers and it does quite a bit more validation of UTF-8 in strings. +Partial JSONs +------------------------- + +`jiffy:encode/2` with option `partial` returns a `Resource::reference()`. + +`jiffy:decode/2` with option `max_levels` may place a `Resource::reference()` +instead of some `json_value()`. + +These resources hold a `binary()` with the verified JSON data and can be used +directly, or as a part of a larger ejson in `jiffy:encode/1,2`. These binaries +won't be reencoded, instead, they will be placed directly in the result. + +However, using resources has some limitations: The resource is only valid in +the node where it was created. If a resource is serialized and deserialized, or +if it changes nodes back and forth, it will only be still valid if the original +resource was not GC'd. + diff --git a/c_src/decoder.c b/c_src/decoder.c index f01794e..7988098 100644 --- a/c_src/decoder.c +++ b/c_src/decoder.c @@ -208,15 +208,7 @@ static int inline level_decrease(Decoder* d, ERL_NIF_TERM* value) { if (d->max_levels && d->max_levels == --d->current_depth) { // Only builds term in threshold - ERL_NIF_TERM bin; - if(!d->copy_strings) { - bin = enif_make_sub_binary(d->env, d->arg, d->level_start, (d->i - d->level_start + 1)); - } else { - unsigned ulen = d->i - d->level_start + 1; - char* chrbuf = (char*) enif_make_new_binary(d->env, ulen, &bin); - memcpy(chrbuf, &(d->p[d->level_start]), ulen); - } - *value = enif_make_tuple2(d->env, d->atoms->atom_json, bin); + *value = wrap_enif_make_sub_binary(d->env, d->arg, d->level_start, (d->i - d->level_start + 1)); return 1; } return 0; diff --git a/c_src/encoder.c b/c_src/encoder.c index 4cfb353..759d231 100644 --- a/c_src/encoder.c +++ b/c_src/encoder.c @@ -683,6 +683,8 @@ encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) continue; } else if(get_bytes_per_red(env, val, &(e->bytes_per_red))) { continue; + } else if(enif_is_identical(val, e->atoms->atom_partial)) { + // Ignore, handled in Erlang } else { return enif_make_badarg(env); } @@ -923,6 +925,11 @@ encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) termstack_push(&stack, curr); termstack_push(&stack, e->atoms->ref_array); termstack_push(&stack, item); + } else if(unwrap(env, curr, &item)) { + if(!enc_unknown(e, item)) { + ret = enc_error(e, "internal_error"); + goto done; + } } else { if(!enc_unknown(e, curr)) { ret = enc_error(e, "internal_error"); diff --git a/c_src/jiffy.c b/c_src/jiffy.c index 61b3b55..bb7a418 100644 --- a/c_src/jiffy.c +++ b/c_src/jiffy.c @@ -34,7 +34,6 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes"); st->atom_dedupe_keys = make_atom(env, "dedupe_keys"); st->atom_copy_strings = make_atom(env, "copy_strings"); - st->atom_json = make_atom(env, "json"); st->atom_max_levels = make_atom(env, "max_levels"); // Markers used in encoding @@ -59,6 +58,15 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) NULL ); + st->res_wrapper = enif_open_resource_type( + env, + NULL, + "wrapper", + wrapper_destroy, + ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, + NULL + ); + *priv = (void*) st; return 0; @@ -88,7 +96,8 @@ static ErlNifFunc funcs[] = {"nif_decode_init", 2, decode_init}, {"nif_decode_iter", 5, decode_iter}, {"nif_encode_init", 2, encode_init}, - {"nif_encode_iter", 3, encode_iter} + {"nif_encode_iter", 3, encode_iter}, + {"nif_wrap_binary", 1, wrap_binary} }; ERL_NIF_INIT(jiffy, funcs, &load, &reload, &upgrade, &unload); diff --git a/c_src/jiffy.h b/c_src/jiffy.h index 9743662..5902480 100644 --- a/c_src/jiffy.h +++ b/c_src/jiffy.h @@ -43,7 +43,6 @@ typedef struct { ERL_NIF_TERM atom_escape_forward_slashes; ERL_NIF_TERM atom_dedupe_keys; ERL_NIF_TERM atom_copy_strings; - ERL_NIF_TERM atom_json; ERL_NIF_TERM atom_max_levels; ERL_NIF_TERM ref_object; @@ -51,6 +50,7 @@ typedef struct { ErlNifResourceType* res_dec; ErlNifResourceType* res_enc; + ErlNifResourceType* res_wrapper; } jiffy_st; ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name); @@ -68,9 +68,11 @@ ERL_NIF_TERM decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); +ERL_NIF_TERM wrap_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); void dec_destroy(ErlNifEnv* env, void* obj); void enc_destroy(ErlNifEnv* env, void* obj); +void wrapper_destroy(ErlNifEnv* env, void* obj); int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, int ret_map, int dedupe_keys); @@ -86,4 +88,7 @@ int unicode_from_pair(int hi, int lo); int unicode_uescape(int c, unsigned char* buf); int double_to_shortest(unsigned char *buf, size_t size, size_t* len, double val); +ERL_NIF_TERM wrap_enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size); +int unwrap(ErlNifEnv* env, ERL_NIF_TERM wrapper_resource, ERL_NIF_TERM* bin_term_p); + #endif // Included JIFFY_H diff --git a/c_src/wrapper.c b/c_src/wrapper.c new file mode 100644 index 0000000..13c9511 --- /dev/null +++ b/c_src/wrapper.c @@ -0,0 +1,77 @@ +// This file is part of Jiffy released under the MIT license. +// See the LICENSE file for more information. + +#include "erl_nif.h" +#include "jiffy.h" + +typedef struct { + // The Wrapper is a struct intended to be used as a resource to hold a + // binary that's been validated by jiffy to be a valid JSON value + + ErlNifEnv* env; // Process independent env to hold the wrapped binary + ERL_NIF_TERM bin; +} Wrapper; + +static ERL_NIF_TERM +wrap_new(ErlNifEnv* process_env, ErlNifEnv* process_independent_env, ERL_NIF_TERM binary) +{ + jiffy_st* st = (jiffy_st*) enif_priv_data(process_env); + + Wrapper* wrapper_p = enif_alloc_resource(st->res_wrapper, sizeof(Wrapper)); + ERL_NIF_TERM wrapper_term = enif_make_resource(process_env, wrapper_p); + enif_release_resource(wrapper_p); + + wrapper_p->env = process_independent_env; + wrapper_p->bin = binary; + + return wrapper_term; +} + +ERL_NIF_TERM +wrap_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + if(argc != 1) { + return enif_make_badarg(env); + } + + ERL_NIF_TERM binary = argv[0]; + if(!enif_is_binary(env, binary)) { + return enif_make_badarg(env); + } + + ErlNifEnv* process_independent_env = enif_alloc_env(); + ERL_NIF_TERM bin_copy = enif_make_copy(process_independent_env, binary); + + return wrap_new(env, process_independent_env, bin_copy); +} + +ERL_NIF_TERM +wrap_enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size) +{ + ErlNifEnv* process_independent_env = enif_alloc_env(); + // sub_bin must be created in the same env as the parent binary and then copied + ERL_NIF_TERM sub_bin = enif_make_sub_binary(env, bin_term, pos, size); + return wrap_new(env, process_independent_env, enif_make_copy(process_independent_env, sub_bin)); +} + +int +unwrap(ErlNifEnv* env, ERL_NIF_TERM wrapper_resource, ERL_NIF_TERM* bin_term_p) +{ + jiffy_st* st = (jiffy_st*) enif_priv_data(env); + + Wrapper* wrapper_p = NULL; + if(!enif_get_resource(env, wrapper_resource, st->res_wrapper, (void**) &wrapper_p)) { + return 0; + } + + *bin_term_p = enif_make_copy(env, wrapper_p->bin); + return 1; +} + +void +wrapper_destroy(ErlNifEnv* env, void* obj) +{ + Wrapper* wrapper_p = (Wrapper*) obj; + enif_free_env(wrapper_p->env); +} + diff --git a/src/jiffy.erl b/src/jiffy.erl index a5e74bd..68a6fa9 100644 --- a/src/jiffy.erl +++ b/src/jiffy.erl @@ -22,7 +22,7 @@ -type json_array() :: [json_value()]. -type json_string() :: atom() | binary(). -type json_number() :: integer() | float(). --type json_raw() :: {json, binary()}. % Only when decoding with max_levels +-type json_raw() :: reference(). % Only when decoding with max_levels or encoding with partial -ifdef(JIFFY_NO_MAPS). @@ -53,6 +53,7 @@ | force_utf8 | use_nil | escape_forward_slashes + | partial | {bytes_per_iter, non_neg_integer()} | {bytes_per_red, non_neg_integer()}. @@ -83,14 +84,15 @@ decode(Data, Opts) when is_list(Data) -> decode(iolist_to_binary(Data), Opts). --spec encode(json_value()) -> iodata(). +-spec encode(json_value()) -> iodata() | reference(). encode(Data) -> encode(Data, []). --spec encode(json_value(), encode_options()) -> iodata(). +-spec encode(json_value(), encode_options()) -> iodata() | reference(). encode(Data, Options) -> ForceUTF8 = lists:member(force_utf8, Options), + ReturnPartial = lists:member(partial, Options), case nif_encode_init(Data, Options) of {error, {invalid_string, _}} when ForceUTF8 == true -> FixedData = jiffy_utf8:fix(Data), @@ -101,13 +103,15 @@ encode(Data, Options) -> {error, Error} -> error(Error); {partial, IOData} -> - finish_encode(IOData, []); + finish_encode(IOData, [], ReturnPartial); {iter, {Encoder, Stack, IOBuf}} -> encode_loop(Data, Options, Encoder, Stack, IOBuf); - [Bin] when is_binary(Bin) -> + [Bin] when is_binary(Bin), not ReturnPartial -> Bin; + RevIOData when is_list(RevIOData), not ReturnPartial -> + lists:reverse(RevIOData); RevIOData when is_list(RevIOData) -> - lists:reverse(RevIOData) + nif_wrap_binary(iolist_to_binary(lists:reverse(RevIOData))) end. @@ -166,18 +170,22 @@ finish_decode_arr([V | Vals], Acc) -> finish_decode_arr(Vals, [finish_decode(V) | Acc]). -finish_encode([], Acc) -> +finish_encode([], Acc, false) -> %% No reverse! The NIF returned us %% the pieces in reverse order. Acc; -finish_encode([<<_/binary>>=B | Rest], Acc) -> - finish_encode(Rest, [B | Acc]); -finish_encode([Val | Rest], Acc) when is_integer(Val) -> +finish_encode([], Acc, true) -> + %% No reverse! The NIF returned us + %% the pieces in reverse order. + nif_wrap_binary(iolist_to_binary(Acc)); +finish_encode([<<_/binary>>=B | Rest], Acc, ReturnPartial) -> + finish_encode(Rest, [B | Acc], ReturnPartial); +finish_encode([Val | Rest], Acc, ReturnPartial) when is_integer(Val) -> Bin = list_to_binary(integer_to_list(Val)), - finish_encode(Rest, [Bin | Acc]); -finish_encode([InvalidEjson | _], _) -> + finish_encode(Rest, [Bin | Acc], ReturnPartial); +finish_encode([InvalidEjson | _], _, _) -> error({invalid_ejson, InvalidEjson}); -finish_encode(_, _) -> +finish_encode(_, _, _) -> error(invalid_ejson). @@ -208,6 +216,7 @@ decode_loop(Data, Decoder, Val, Objs, Curr) -> encode_loop(Data, Options, Encoder, Stack, IOBuf) -> ForceUTF8 = lists:member(force_utf8, Options), + ReturnPartial = lists:member(partial, Options), case nif_encode_iter(Encoder, Stack, IOBuf) of {error, {invalid_string, _}} when ForceUTF8 == true -> FixedData = jiffy_utf8:fix(Data), @@ -218,13 +227,15 @@ encode_loop(Data, Options, Encoder, Stack, IOBuf) -> {error, Error} -> error(Error); {partial, IOData} -> - finish_encode(IOData, []); + finish_encode(IOData, [], ReturnPartial); {iter, {NewEncoder, NewStack, NewIOBuf}} -> encode_loop(Data, Options, NewEncoder, NewStack, NewIOBuf); - [Bin] when is_binary(Bin) -> + [Bin] when is_binary(Bin), not ReturnPartial -> Bin; + RevIOData when is_list(RevIOData), not ReturnPartial -> + lists:reverse(RevIOData); RevIOData when is_list(RevIOData) -> - lists:reverse(RevIOData) + nif_wrap_binary(iolist_to_binary(lists:reverse(RevIOData))) end. @@ -242,3 +253,6 @@ nif_encode_init(_Data, _Options) -> nif_encode_iter(_Encoder, _Stack, _IoList) -> ?NOT_LOADED. + +nif_wrap_binary(_BinData) -> + ?NOT_LOADED. diff --git a/test/jiffy_18_decode_levels_tests.erl b/test/jiffy_18_partials_tests.erl similarity index 57% rename from test/jiffy_18_decode_levels_tests.erl rename to test/jiffy_18_partials_tests.erl index f009557..abf2883 100644 --- a/test/jiffy_18_decode_levels_tests.erl +++ b/test/jiffy_18_partials_tests.erl @@ -1,18 +1,12 @@ % This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. --module(jiffy_18_decode_levels_tests). +-module(jiffy_18_partials_tests). -include_lib("eunit/include/eunit.hrl"). decode_levels_test_() -> MaxOptMaxLevels = 4, - Cases = [ - <<"{\"foo\":\"bar\"}">>, - <<"{\"foo\":[\"bar\"]}">>, - <<"[[[[]],\"foo\"], [\"bar\", []], [\"baz\"], [[], 1]]">>, - <<"{\"foo\":{},\"bar\":{\"baz\":[1,2,3], \"foo2\":{}}}">> - ], {"Test max_levels", lists:map(fun(Json) -> [ begin @@ -20,13 +14,44 @@ decode_levels_test_() -> FullEJson = to_full_json(EJson, MaxLevels, Opts), ?_assertEqual(jiffy:decode(Json, Opts), FullEJson) end || MaxLevels <- lists:seq(1, MaxOptMaxLevels), Opts <- generate_options_groups()] - end, Cases)}. + end, jsons())}. + +encode_resources_test_() -> + {"Test encode resources", lists:map(fun(Json) -> + [ + begin + EJsonWithResources = jiffy:decode(Json, [{max_levels, 1} | Opts]), + JsonFromResources = jiffy:encode(EJsonWithResources), + ?_assertEqual(jiffy:decode(Json, Opts), jiffy:decode(JsonFromResources, Opts)) + end || Opts <- generate_options_groups()] + end, jsons())}. + +encode_partials_test_() -> + {"Test encode partials", lists:map(fun(Json) -> + [ + begin + EJson = jiffy:decode(Json, Opts), + PartialResource = jiffy:encode(EJson, [partial]), + true = is_reference(PartialResource), + PartialIOData = jiffy:encode(PartialResource), + ?_assertEqual(EJson, jiffy:decode(PartialIOData, Opts)) + end || Opts <- generate_options_groups()] + end, jsons())}. + + +jsons() -> + [ + <<"{\"foo\":\"bar\"}">>, + <<"{\"foo\":[\"bar\"]}">>, + <<"[[[[]],\"foo\"], [\"bar\", []], [\"baz\"], [[], 1]]">>, + <<"{\"foo\":{},\"bar\":{\"baz\":[1,2,3], \"foo2\":{}}}">> + ]. -ifndef(JIFFY_NO_MAPS). -generate_options_groups() -> generate_options_groups([return_maps, copy_strings]). +generate_options_groups() -> generate_options_groups([return_maps]). -else. -generate_options_groups() -> generate_options_groups([copy_strings]). +generate_options_groups() -> generate_options_groups([]). -endif. generate_options_groups(AvailableOptions) -> @@ -41,17 +66,10 @@ to_full_json(Val, MaxDepth, DecodeOptions) -> to_full_json(Val, 0, MaxDepth, DecodeOptions). to_full_json(_Val, Depth, MaxDepth, _DecodeOptions) when Depth > MaxDepth -> error(too_deep); -to_full_json({json, ValueBin}, Depth, MaxDepth, DecodeOptions) -> +to_full_json(PartialResource, Depth, MaxDepth, DecodeOptions) when is_reference(PartialResource) -> MaxDepth = Depth, - true = is_binary(ValueBin), - ByteSize = byte_size(ValueBin), - case lists:member(copy_strings, DecodeOptions) of - true -> - ByteSize = binary:referenced_byte_size(ValueBin); - _ -> - true = ByteSize < binary:referenced_byte_size(ValueBin) - end, - jiffy:decode(ValueBin, DecodeOptions); + IOData = jiffy:encode(PartialResource), + jiffy:decode(IOData, DecodeOptions); to_full_json({Pairs}, Depth, MaxDepth, DecodeOptions) when is_list(Pairs) -> {[{K, to_full_json(V, Depth+1, MaxDepth, DecodeOptions)} || {K, V} <- Pairs]}; to_full_json(Vals, Depth, MaxDepth, DecodeOptions) when is_list(Vals) ->