Bladeren bron

Add partial option to encode

pull/195/head
Jose M Perez 5 jaren geleden
bovenliggende
commit
dc1fd5607a
8 gewijzigde bestanden met toevoegingen van 194 en 49 verwijderingen
  1. +24
    -1
      README.md
  2. +1
    -9
      c_src/decoder.c
  3. +7
    -0
      c_src/encoder.c
  4. +11
    -2
      c_src/jiffy.c
  5. +6
    -1
      c_src/jiffy.h
  6. +77
    -0
      c_src/wrapper.c
  7. +30
    -16
      src/jiffy.erl
  8. +38
    -20
      test/jiffy_18_partials_tests.erl

+ 24
- 1
README.md Bestand weergeven

@ -61,7 +61,8 @@ The options for decode are:
the decode result is still in use.
* `{max_levels, N}` where N >= 0 - This controls when to stop decoding
by depth, after N levels are decoded, the rest is returned as a
`{json, binary()}`.
`Resource::reference()`. Resources have some limitations, check [partial jsons
section](#partial-jsons).
* `{bytes_per_red, N}` where N >= 0 - This controls the number of
bytes that Jiffy will process as an equivalent to a reduction. Each
20 reductions we consume 1% of our allocated time slice for the current
@ -88,6 +89,11 @@ The options for encode are:
* `use_nil` - Encode's the atom `nil` as `null`.
* `escape_forward_slashes` - Escapes the `/` character which can be
useful when encoding URLs in some cases.
* `partial` - Instead of returning an `iodata()`, returns a
`Resource::reference()` which holds the verified raw json. This resource can be used
as a block to build more complex jsons, without the need to encode these
blocks again. Resources have some limitations, check [partial jsons
section](#partial-jsons).
* `{bytes_per_red, N}` - Refer to the decode options
* `{bytes_per_iter, N}` - Refer to the decode options
@ -123,3 +129,20 @@ Jiffy should be in all ways an improvement over EEP0018. It no longer
imposes limits on the nesting depth. It is capable of encoding and
decoding large numbers and it does quite a bit more validation of UTF-8 in strings.
Partial JSONs
-------------------------
`jiffy:encode/2` with option `partial` returns a `Resource::reference()`.
`jiffy:decode/2` with option `max_levels` may place a `Resource::reference()`
instead of some `json_value()`.
These resources hold a `binary()` with the verified JSON data and can be used
directly, or as a part of a larger ejson in `jiffy:encode/1,2`. These binaries
won't be reencoded, instead, they will be placed directly in the result.
However, using resources has some limitations: The resource is only valid in
the node where it was created. If a resource is serialized and deserialized, or
if it changes nodes back and forth, it will only be still valid if the original
resource was not GC'd.

+ 1
- 9
c_src/decoder.c Bestand weergeven

@ -208,15 +208,7 @@ static int inline
level_decrease(Decoder* d, ERL_NIF_TERM* value) {
if (d->max_levels && d->max_levels == --d->current_depth) {
// Only builds term in threshold
ERL_NIF_TERM bin;
if(!d->copy_strings) {
bin = enif_make_sub_binary(d->env, d->arg, d->level_start, (d->i - d->level_start + 1));
} else {
unsigned ulen = d->i - d->level_start + 1;
char* chrbuf = (char*) enif_make_new_binary(d->env, ulen, &bin);
memcpy(chrbuf, &(d->p[d->level_start]), ulen);
}
*value = enif_make_tuple2(d->env, d->atoms->atom_json, bin);
*value = wrap_enif_make_sub_binary(d->env, d->arg, d->level_start, (d->i - d->level_start + 1));
return 1;
}
return 0;

+ 7
- 0
c_src/encoder.c Bestand weergeven

@ -683,6 +683,8 @@ encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
continue;
} else if(get_bytes_per_red(env, val, &(e->bytes_per_red))) {
continue;
} else if(enif_is_identical(val, e->atoms->atom_partial)) {
// Ignore, handled in Erlang
} else {
return enif_make_badarg(env);
}
@ -923,6 +925,11 @@ encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
termstack_push(&stack, curr);
termstack_push(&stack, e->atoms->ref_array);
termstack_push(&stack, item);
} else if(unwrap(env, curr, &item)) {
if(!enc_unknown(e, item)) {
ret = enc_error(e, "internal_error");
goto done;
}
} else {
if(!enc_unknown(e, curr)) {
ret = enc_error(e, "internal_error");

+ 11
- 2
c_src/jiffy.c Bestand weergeven

@ -34,7 +34,6 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes");
st->atom_dedupe_keys = make_atom(env, "dedupe_keys");
st->atom_copy_strings = make_atom(env, "copy_strings");
st->atom_json = make_atom(env, "json");
st->atom_max_levels = make_atom(env, "max_levels");
// Markers used in encoding
@ -59,6 +58,15 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
NULL
);
st->res_wrapper = enif_open_resource_type(
env,
NULL,
"wrapper",
wrapper_destroy,
ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER,
NULL
);
*priv = (void*) st;
return 0;
@ -88,7 +96,8 @@ static ErlNifFunc funcs[] =
{"nif_decode_init", 2, decode_init},
{"nif_decode_iter", 5, decode_iter},
{"nif_encode_init", 2, encode_init},
{"nif_encode_iter", 3, encode_iter}
{"nif_encode_iter", 3, encode_iter},
{"nif_wrap_binary", 1, wrap_binary}
};
ERL_NIF_INIT(jiffy, funcs, &load, &reload, &upgrade, &unload);

+ 6
- 1
c_src/jiffy.h Bestand weergeven

@ -43,7 +43,6 @@ typedef struct {
ERL_NIF_TERM atom_escape_forward_slashes;
ERL_NIF_TERM atom_dedupe_keys;
ERL_NIF_TERM atom_copy_strings;
ERL_NIF_TERM atom_json;
ERL_NIF_TERM atom_max_levels;
ERL_NIF_TERM ref_object;
@ -51,6 +50,7 @@ typedef struct {
ErlNifResourceType* res_dec;
ErlNifResourceType* res_enc;
ErlNifResourceType* res_wrapper;
} jiffy_st;
ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name);
@ -68,9 +68,11 @@ ERL_NIF_TERM decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM wrap_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
void dec_destroy(ErlNifEnv* env, void* obj);
void enc_destroy(ErlNifEnv* env, void* obj);
void wrapper_destroy(ErlNifEnv* env, void* obj);
int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
int ret_map, int dedupe_keys);
@ -86,4 +88,7 @@ int unicode_from_pair(int hi, int lo);
int unicode_uescape(int c, unsigned char* buf);
int double_to_shortest(unsigned char *buf, size_t size, size_t* len, double val);
ERL_NIF_TERM wrap_enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size);
int unwrap(ErlNifEnv* env, ERL_NIF_TERM wrapper_resource, ERL_NIF_TERM* bin_term_p);
#endif // Included JIFFY_H

+ 77
- 0
c_src/wrapper.c Bestand weergeven

@ -0,0 +1,77 @@
// This file is part of Jiffy released under the MIT license.
// See the LICENSE file for more information.
#include "erl_nif.h"
#include "jiffy.h"
typedef struct {
// The Wrapper is a struct intended to be used as a resource to hold a
// binary that's been validated by jiffy to be a valid JSON value
ErlNifEnv* env; // Process independent env to hold the wrapped binary
ERL_NIF_TERM bin;
} Wrapper;
static ERL_NIF_TERM
wrap_new(ErlNifEnv* process_env, ErlNifEnv* process_independent_env, ERL_NIF_TERM binary)
{
jiffy_st* st = (jiffy_st*) enif_priv_data(process_env);
Wrapper* wrapper_p = enif_alloc_resource(st->res_wrapper, sizeof(Wrapper));
ERL_NIF_TERM wrapper_term = enif_make_resource(process_env, wrapper_p);
enif_release_resource(wrapper_p);
wrapper_p->env = process_independent_env;
wrapper_p->bin = binary;
return wrapper_term;
}
ERL_NIF_TERM
wrap_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
if(argc != 1) {
return enif_make_badarg(env);
}
ERL_NIF_TERM binary = argv[0];
if(!enif_is_binary(env, binary)) {
return enif_make_badarg(env);
}
ErlNifEnv* process_independent_env = enif_alloc_env();
ERL_NIF_TERM bin_copy = enif_make_copy(process_independent_env, binary);
return wrap_new(env, process_independent_env, bin_copy);
}
ERL_NIF_TERM
wrap_enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size)
{
ErlNifEnv* process_independent_env = enif_alloc_env();
// sub_bin must be created in the same env as the parent binary and then copied
ERL_NIF_TERM sub_bin = enif_make_sub_binary(env, bin_term, pos, size);
return wrap_new(env, process_independent_env, enif_make_copy(process_independent_env, sub_bin));
}
int
unwrap(ErlNifEnv* env, ERL_NIF_TERM wrapper_resource, ERL_NIF_TERM* bin_term_p)
{
jiffy_st* st = (jiffy_st*) enif_priv_data(env);
Wrapper* wrapper_p = NULL;
if(!enif_get_resource(env, wrapper_resource, st->res_wrapper, (void**) &wrapper_p)) {
return 0;
}
*bin_term_p = enif_make_copy(env, wrapper_p->bin);
return 1;
}
void
wrapper_destroy(ErlNifEnv* env, void* obj)
{
Wrapper* wrapper_p = (Wrapper*) obj;
enif_free_env(wrapper_p->env);
}

+ 30
- 16
src/jiffy.erl Bestand weergeven

@ -22,7 +22,7 @@
-type json_array() :: [json_value()].
-type json_string() :: atom() | binary().
-type json_number() :: integer() | float().
-type json_raw() :: {json, binary()}. % Only when decoding with max_levels
-type json_raw() :: reference(). % Only when decoding with max_levels or encoding with partial
-ifdef(JIFFY_NO_MAPS).
@ -53,6 +53,7 @@
| force_utf8
| use_nil
| escape_forward_slashes
| partial
| {bytes_per_iter, non_neg_integer()}
| {bytes_per_red, non_neg_integer()}.
@ -83,14 +84,15 @@ decode(Data, Opts) when is_list(Data) ->
decode(iolist_to_binary(Data), Opts).
-spec encode(json_value()) -> iodata().
-spec encode(json_value()) -> iodata() | reference().
encode(Data) ->
encode(Data, []).
-spec encode(json_value(), encode_options()) -> iodata().
-spec encode(json_value(), encode_options()) -> iodata() | reference().
encode(Data, Options) ->
ForceUTF8 = lists:member(force_utf8, Options),
ReturnPartial = lists:member(partial, Options),
case nif_encode_init(Data, Options) of
{error, {invalid_string, _}} when ForceUTF8 == true ->
FixedData = jiffy_utf8:fix(Data),
@ -101,13 +103,15 @@ encode(Data, Options) ->
{error, Error} ->
error(Error);
{partial, IOData} ->
finish_encode(IOData, []);
finish_encode(IOData, [], ReturnPartial);
{iter, {Encoder, Stack, IOBuf}} ->
encode_loop(Data, Options, Encoder, Stack, IOBuf);
[Bin] when is_binary(Bin) ->
[Bin] when is_binary(Bin), not ReturnPartial ->
Bin;
RevIOData when is_list(RevIOData), not ReturnPartial ->
lists:reverse(RevIOData);
RevIOData when is_list(RevIOData) ->
lists:reverse(RevIOData)
nif_wrap_binary(iolist_to_binary(lists:reverse(RevIOData)))
end.
@ -166,18 +170,22 @@ finish_decode_arr([V | Vals], Acc) ->
finish_decode_arr(Vals, [finish_decode(V) | Acc]).
finish_encode([], Acc) ->
finish_encode([], Acc, false) ->
%% No reverse! The NIF returned us
%% the pieces in reverse order.
Acc;
finish_encode([<<_/binary>>=B | Rest], Acc) ->
finish_encode(Rest, [B | Acc]);
finish_encode([Val | Rest], Acc) when is_integer(Val) ->
finish_encode([], Acc, true) ->
%% No reverse! The NIF returned us
%% the pieces in reverse order.
nif_wrap_binary(iolist_to_binary(Acc));
finish_encode([<<_/binary>>=B | Rest], Acc, ReturnPartial) ->
finish_encode(Rest, [B | Acc], ReturnPartial);
finish_encode([Val | Rest], Acc, ReturnPartial) when is_integer(Val) ->
Bin = list_to_binary(integer_to_list(Val)),
finish_encode(Rest, [Bin | Acc]);
finish_encode([InvalidEjson | _], _) ->
finish_encode(Rest, [Bin | Acc], ReturnPartial);
finish_encode([InvalidEjson | _], _, _) ->
error({invalid_ejson, InvalidEjson});
finish_encode(_, _) ->
finish_encode(_, _, _) ->
error(invalid_ejson).
@ -208,6 +216,7 @@ decode_loop(Data, Decoder, Val, Objs, Curr) ->
encode_loop(Data, Options, Encoder, Stack, IOBuf) ->
ForceUTF8 = lists:member(force_utf8, Options),
ReturnPartial = lists:member(partial, Options),
case nif_encode_iter(Encoder, Stack, IOBuf) of
{error, {invalid_string, _}} when ForceUTF8 == true ->
FixedData = jiffy_utf8:fix(Data),
@ -218,13 +227,15 @@ encode_loop(Data, Options, Encoder, Stack, IOBuf) ->
{error, Error} ->
error(Error);
{partial, IOData} ->
finish_encode(IOData, []);
finish_encode(IOData, [], ReturnPartial);
{iter, {NewEncoder, NewStack, NewIOBuf}} ->
encode_loop(Data, Options, NewEncoder, NewStack, NewIOBuf);
[Bin] when is_binary(Bin) ->
[Bin] when is_binary(Bin), not ReturnPartial ->
Bin;
RevIOData when is_list(RevIOData), not ReturnPartial ->
lists:reverse(RevIOData);
RevIOData when is_list(RevIOData) ->
lists:reverse(RevIOData)
nif_wrap_binary(iolist_to_binary(lists:reverse(RevIOData)))
end.
@ -242,3 +253,6 @@ nif_encode_init(_Data, _Options) ->
nif_encode_iter(_Encoder, _Stack, _IoList) ->
?NOT_LOADED.
nif_wrap_binary(_BinData) ->
?NOT_LOADED.

test/jiffy_18_decode_levels_tests.erl → test/jiffy_18_partials_tests.erl Bestand weergeven

@ -1,18 +1,12 @@
% This file is part of Jiffy released under the MIT license.
% See the LICENSE file for more information.
-module(jiffy_18_decode_levels_tests).
-module(jiffy_18_partials_tests).
-include_lib("eunit/include/eunit.hrl").
decode_levels_test_() ->
MaxOptMaxLevels = 4,
Cases = [
<<"{\"foo\":\"bar\"}">>,
<<"{\"foo\":[\"bar\"]}">>,
<<"[[[[]],\"foo\"], [\"bar\", []], [\"baz\"], [[], 1]]">>,
<<"{\"foo\":{},\"bar\":{\"baz\":[1,2,3], \"foo2\":{}}}">>
],
{"Test max_levels", lists:map(fun(Json) ->
[
begin
@ -20,13 +14,44 @@ decode_levels_test_() ->
FullEJson = to_full_json(EJson, MaxLevels, Opts),
?_assertEqual(jiffy:decode(Json, Opts), FullEJson)
end || MaxLevels <- lists:seq(1, MaxOptMaxLevels), Opts <- generate_options_groups()]
end, Cases)}.
end, jsons())}.
encode_resources_test_() ->
{"Test encode resources", lists:map(fun(Json) ->
[
begin
EJsonWithResources = jiffy:decode(Json, [{max_levels, 1} | Opts]),
JsonFromResources = jiffy:encode(EJsonWithResources),
?_assertEqual(jiffy:decode(Json, Opts), jiffy:decode(JsonFromResources, Opts))
end || Opts <- generate_options_groups()]
end, jsons())}.
encode_partials_test_() ->
{"Test encode partials", lists:map(fun(Json) ->
[
begin
EJson = jiffy:decode(Json, Opts),
PartialResource = jiffy:encode(EJson, [partial]),
true = is_reference(PartialResource),
PartialIOData = jiffy:encode(PartialResource),
?_assertEqual(EJson, jiffy:decode(PartialIOData, Opts))
end || Opts <- generate_options_groups()]
end, jsons())}.
jsons() ->
[
<<"{\"foo\":\"bar\"}">>,
<<"{\"foo\":[\"bar\"]}">>,
<<"[[[[]],\"foo\"], [\"bar\", []], [\"baz\"], [[], 1]]">>,
<<"{\"foo\":{},\"bar\":{\"baz\":[1,2,3], \"foo2\":{}}}">>
].
-ifndef(JIFFY_NO_MAPS).
generate_options_groups() -> generate_options_groups([return_maps, copy_strings]).
generate_options_groups() -> generate_options_groups([return_maps]).
-else.
generate_options_groups() -> generate_options_groups([copy_strings]).
generate_options_groups() -> generate_options_groups([]).
-endif.
generate_options_groups(AvailableOptions) ->
@ -41,17 +66,10 @@ to_full_json(Val, MaxDepth, DecodeOptions) ->
to_full_json(Val, 0, MaxDepth, DecodeOptions).
to_full_json(_Val, Depth, MaxDepth, _DecodeOptions) when Depth > MaxDepth ->
error(too_deep);
to_full_json({json, ValueBin}, Depth, MaxDepth, DecodeOptions) ->
to_full_json(PartialResource, Depth, MaxDepth, DecodeOptions) when is_reference(PartialResource) ->
MaxDepth = Depth,
true = is_binary(ValueBin),
ByteSize = byte_size(ValueBin),
case lists:member(copy_strings, DecodeOptions) of
true ->
ByteSize = binary:referenced_byte_size(ValueBin);
_ ->
true = ByteSize < binary:referenced_byte_size(ValueBin)
end,
jiffy:decode(ValueBin, DecodeOptions);
IOData = jiffy:encode(PartialResource),
jiffy:decode(IOData, DecodeOptions);
to_full_json({Pairs}, Depth, MaxDepth, DecodeOptions) when is_list(Pairs) ->
{[{K, to_full_json(V, Depth+1, MaxDepth, DecodeOptions)} || {K, V} <- Pairs]};
to_full_json(Vals, Depth, MaxDepth, DecodeOptions) when is_list(Vals) ->

Laden…
Annuleren
Opslaan