diff --git a/README.md b/README.md index 72f773a..1a5a742 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,10 @@ The options for decode are: this option will instead allocate new binaries for each string, so the original JSON document can be garbage collected even though the decode result is still in use. +* `{max_levels, N}` where N >= 0 - This controls when to stop decoding + by depth, after N levels are decoded, the rest is returned as a + `Resource::reference()`. Resources have some limitations, check [partial jsons + section](#partial-jsons). * `{bytes_per_red, N}` where N >= 0 - This controls the number of bytes that Jiffy will process as an equivalent to a reduction. Each 20 reductions we consume 1% of our allocated time slice for the current @@ -85,9 +89,24 @@ The options for encode are: * `use_nil` - Encode's the atom `nil` as `null`. * `escape_forward_slashes` - Escapes the `/` character which can be useful when encoding URLs in some cases. +* `partial` - Instead of returning an `iodata()`, returns a + `Resource::reference()` which holds the verified raw json. This resource can be used + as a block to build more complex jsons, without the need to encode these + blocks again. Resources have some limitations, check [partial jsons + section](#partial-jsons). * `{bytes_per_red, N}` - Refer to the decode options * `{bytes_per_iter, N}` - Refer to the decode options +`jiffy:validate/1,2` +------------------ + +* `jiffy:validate(IoData)` +* `jiffy:validate(IoData, Options)` + +Performs a fast decode to validate the correct IoData, uses the same Options as +`jiffy:decode/2` (although some may make no sense). +Returns a boolean instead of an EJSON. + Data Format ----------- @@ -120,3 +139,20 @@ Jiffy should be in all ways an improvement over EEP0018. It no longer imposes limits on the nesting depth. It is capable of encoding and decoding large numbers and it does quite a bit more validation of UTF-8 in strings. +Partial JSONs +------------------------- + +`jiffy:encode/2` with option `partial` returns a `Resource::reference()`. + +`jiffy:decode/2` with option `max_levels` may place a `Resource::reference()` +instead of some `json_value()`. + +These resources hold a `binary()` with the verified JSON data and can be used +directly, or as a part of a larger EJSON in `jiffy:encode/1,2`. These binaries +won't be reencoded, instead, they will be placed directly in the result. + +However, using resources has some limitations: The resource is only valid in +the node where it was created. If a resource is serialized and deserialized, or +if it changes nodes back and forth, it will only be still valid if the resource +was not GC'd. + diff --git a/c_src/decoder.c b/c_src/decoder.c index 8f78117..08d689d 100644 --- a/c_src/decoder.c +++ b/c_src/decoder.c @@ -64,6 +64,11 @@ typedef struct { char* st_data; int st_size; int st_top; + + int current_depth; + int max_levels; + unsigned int level_start; + unsigned int empty_element; } Decoder; Decoder* @@ -99,6 +104,11 @@ dec_new(ErlNifEnv* env) d->st_data[i] = st_invalid; } + d->current_depth = 0; + d->max_levels = -1; + d->level_start = 0; + d->empty_element = 1; + d->st_data[0] = st_value; d->st_top++; @@ -187,6 +197,34 @@ dec_pop_assert(Decoder* d, char val) (void)current; } +static void inline +level_increase(Decoder* d) { + if(d->max_levels >= 0 && (d->max_levels == d->current_depth++)) { + d->level_start = d->i; + } +} + +static int inline +level_decrease(Decoder* d, ERL_NIF_TERM* value) { + if (d->max_levels >= 0 && d->max_levels == --d->current_depth) { + // Only builds term in threshold + unsigned ulen = d->i - d->level_start + 1; + if(!d->copy_strings) { + *value = wrap_enif_make_sub_binary(d->env, d->arg, d->level_start, ulen); + } else { + char* chrbuf = wrap_enif_make_new_binary(d->env, ulen, value); + memcpy(chrbuf, &(d->p[d->level_start]), ulen); + } + return 1; + } + return 0; +} + +static int inline +level_allows_terms(Decoder* d) { + return (d->max_levels < 0) || (d->max_levels >= d->current_depth); +} + int dec_string(Decoder* d, ERL_NIF_TERM* value) { @@ -197,8 +235,10 @@ dec_string(Decoder* d, ERL_NIF_TERM* value) int ui; int hi; int lo; - char* chrbuf; + char* chrbuf = NULL; + char buf[4]; // Substitute for chrbuf when no term is needed int chrpos; + int chrpos_increment; if(d->p[d->i] != '\"') { return 0; @@ -291,7 +331,11 @@ dec_string(Decoder* d, ERL_NIF_TERM* value) return 0; parse: - if(!has_escape && !d->copy_strings) { + if(!has_escape && !level_allows_terms(d)) { + // If has_escape, the binary is still constructed as a side effect of + // the escape validation, although it's ignored by the caller + return 1; + } else if(!has_escape && !d->copy_strings) { *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1)); return 1; } else if(!has_escape) { @@ -305,12 +349,22 @@ parse: lo = 0; ulen = (d->i - 1) - st - num_escapes; - chrbuf = (char*) enif_make_new_binary(d->env, ulen, value); - chrpos = 0; + if(level_allows_terms(d)) { + chrbuf = (char*) enif_make_new_binary(d->env, ulen, value); + chrpos_increment = 1; + chrpos = -1; + } else { + // No term is created, but the string is still validated + // (Thus the chrpos_increment = 0, so we overwrite buf) + chrbuf = &buf[0]; + chrpos_increment = 0; + chrpos = 0; + } ui = st; while(ui < d->i - 1) { + chrpos += chrpos_increment; if(d->p[ui] != '\\') { - chrbuf[chrpos++] = d->p[ui++]; + chrbuf[chrpos] = d->p[ui++]; continue; } ui++; @@ -318,27 +372,27 @@ parse: case '\"': case '\\': case '/': - chrbuf[chrpos++] = d->p[ui]; + chrbuf[chrpos] = d->p[ui]; ui++; break; case 'b': - chrbuf[chrpos++] = '\b'; + chrbuf[chrpos] = '\b'; ui++; break; case 'f': - chrbuf[chrpos++] = '\f'; + chrbuf[chrpos] = '\f'; ui++; break; case 'n': - chrbuf[chrpos++] = '\n'; + chrbuf[chrpos] = '\n'; ui++; break; case 'r': - chrbuf[chrpos++] = '\r'; + chrbuf[chrpos] = '\r'; ui++; break; case 't': - chrbuf[chrpos++] = '\t'; + chrbuf[chrpos] = '\t'; ui++; break; case 'u': @@ -357,11 +411,11 @@ parse: } else { ui += 4; } - hi = unicode_to_utf8(hi, (unsigned char*) chrbuf+chrpos); + hi = unicode_to_utf8(hi, (unsigned char*) &chrbuf[chrpos]); if(hi < 0) { return 0; } - chrpos += hi; + chrpos += (hi-1) * chrpos_increment; break; default: return 0; @@ -572,7 +626,6 @@ dec_number(Decoder* d, ERL_NIF_TERM* value) } parse: - switch(state) { case nst_init: case nst_sign: @@ -583,6 +636,10 @@ parse: break; } + if(!level_allows_terms(d)) { + return 1; + } + errno = 0; if(d->i - st < NUM_BUF_LEN) { @@ -643,6 +700,39 @@ make_array(ErlNifEnv* env, ERL_NIF_TERM list) return ret; } +int +get_max_levels(ErlNifEnv* env, ERL_NIF_TERM val, int* max_levels_p) +{ + jiffy_st* st = (jiffy_st*) enif_priv_data(env); + const ERL_NIF_TERM* tuple; + int arity; + int max_levels; + + if(!enif_get_tuple(env, val, &arity, &tuple)) { + return 0; + } + + if(arity != 2) { + return 0; + } + + if(enif_compare(tuple[0], st->atom_max_levels) != 0) { + return 0; + } + + if(!enif_get_int(env, tuple[1], &max_levels)) { + return 0; + } + + if(max_levels < 0) { + return 0; + } + + *max_levels_p = max_levels; + + return 1; +} + ERL_NIF_TERM decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { @@ -695,6 +785,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) d->null_term = d->atoms->atom_nil; } else if(get_null_term(env, val, &(d->null_term))) { continue; + } else if(get_max_levels(env, val, &(d->max_levels))) { + continue; } else { return enif_make_badarg(env); } @@ -791,6 +883,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) val = d->null_term; dec_pop_assert(d, st_value); d->i += 4; + d->empty_element = 0; break; case 't': if(d->i + 3 >= d->len) { @@ -804,6 +897,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) val = d->atoms->atom_true; dec_pop_assert(d, st_value); d->i += 4; + d->empty_element = 0; break; case 'f': if(d->i + 4 >= bin.size) { @@ -817,6 +911,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) val = d->atoms->atom_false; dec_pop_assert(d, st_value); d->i += 5; + d->empty_element = 0; break; case '\"': if(!dec_string(d, &val)) { @@ -824,6 +919,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) goto done; } dec_pop_assert(d, st_value); + d->empty_element = 0; break; case '-': case '0': @@ -841,23 +937,34 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) goto done; } dec_pop_assert(d, st_value); + d->empty_element = 0; break; case '{': dec_push(d, st_object); dec_push(d, st_key); - objs = enif_make_list_cell(env, curr, objs); - curr = enif_make_list(env, 0); + + level_increase(d); + if(level_allows_terms(d)) { + objs = enif_make_list_cell(env, curr, objs); + curr = enif_make_list(env, 0); + } d->i++; + d->empty_element = 1; break; case '[': dec_push(d, st_array); dec_push(d, st_value); - objs = enif_make_list_cell(env, curr, objs); - curr = enif_make_list(env, 0); + + level_increase(d); + if(level_allows_terms(d)) { + objs = enif_make_list_cell(env, curr, objs); + curr = enif_make_list(env, 0); + } d->i++; + d->empty_element = 1; break; case ']': - if(!enif_is_empty_list(env, curr)) { + if(!d->empty_element) { ret = dec_error(d, "invalid_json"); goto done; } @@ -867,12 +974,17 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) goto done; } dec_pop_assert(d, st_value); - val = curr; // curr is [] - if(!enif_get_list_cell(env, objs, &curr, &objs)) { - ret = dec_error(d, "internal_error"); - goto done; + if(level_allows_terms(d)) { + val = curr; // curr is [] + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + ret = dec_error(d, "internal_error"); + goto done; + } } + level_decrease(d, &val); + d->i++; + d->empty_element = 0; break; default: ret = dec_error(d, "invalid_json"); @@ -882,7 +994,9 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) dec_push(d, st_done); } else if(dec_curr(d) != st_value && dec_curr(d) != st_key) { dec_push(d, st_comma); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } } break; @@ -901,28 +1015,38 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) } dec_pop_assert(d, st_key); dec_push(d, st_colon); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } break; case '}': - if(!enif_is_empty_list(env, curr)) { + if(!d->empty_element) { ret = dec_error(d, "invalid_json"); goto done; } dec_pop_assert(d, st_key); dec_pop_assert(d, st_object); dec_pop_assert(d, st_value); - val = make_empty_object(env, d->return_maps); - if(!enif_get_list_cell(env, objs, &curr, &objs)) { - ret = dec_error(d, "internal_error"); - goto done; + if(level_allows_terms(d)) { + val = make_empty_object(env, d->return_maps); + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + ret = dec_error(d, "internal_error"); + goto done; + } } + level_decrease(d, &val); + if(dec_top(d) == 0) { dec_push(d, st_done); } else { dec_push(d, st_comma); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } } + d->i++; + d->empty_element = 0; break; default: ret = dec_error(d, "invalid_json"); @@ -979,21 +1103,28 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) goto done; } dec_pop_assert(d, st_value); - if(!make_object(env, curr, &val, - d->return_maps, d->dedupe_keys)) { - ret = dec_error(d, "internal_object_error"); - goto done; - } - if(!enif_get_list_cell(env, objs, &curr, &objs)) { - ret = dec_error(d, "internal_error"); - goto done; + if(level_allows_terms(d)) { + if(!make_object(env, curr, &val, + d->return_maps, d->dedupe_keys)) { + ret = dec_error(d, "internal_object_error"); + goto done; + } + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + ret = dec_error(d, "internal_error"); + goto done; + } } + level_decrease(d, &val); + if(dec_top(d) > 0) { dec_push(d, st_comma); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } } else { dec_push(d, st_done); } + d->i++; break; case ']': @@ -1003,17 +1134,24 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) goto done; } dec_pop_assert(d, st_value); - val = make_array(env, curr); - if(!enif_get_list_cell(env, objs, &curr, &objs)) { - ret = dec_error(d, "internal_error"); - goto done; + if(level_allows_terms(d)) { + val = make_array(env, curr); + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + ret = dec_error(d, "internal_error"); + goto done; + } } + level_decrease(d, &val); + if(dec_top(d) > 0) { dec_push(d, st_comma); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } } else { dec_push(d, st_done); } + d->i++; break; default: @@ -1042,6 +1180,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) } decode_done: + level_decrease(d, &val); if(d->i < bin.size && d->return_trailer) { trailer = enif_make_sub_binary(env, argv[0], d->i, bin.size - d->i); @@ -1064,3 +1203,4 @@ done: return ret; } + diff --git a/c_src/encoder.c b/c_src/encoder.c index 4cfb353..759d231 100644 --- a/c_src/encoder.c +++ b/c_src/encoder.c @@ -683,6 +683,8 @@ encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) continue; } else if(get_bytes_per_red(env, val, &(e->bytes_per_red))) { continue; + } else if(enif_is_identical(val, e->atoms->atom_partial)) { + // Ignore, handled in Erlang } else { return enif_make_badarg(env); } @@ -923,6 +925,11 @@ encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) termstack_push(&stack, curr); termstack_push(&stack, e->atoms->ref_array); termstack_push(&stack, item); + } else if(unwrap(env, curr, &item)) { + if(!enc_unknown(e, item)) { + ret = enc_error(e, "internal_error"); + goto done; + } } else { if(!enc_unknown(e, curr)) { ret = enc_error(e, "internal_error"); diff --git a/c_src/jiffy.c b/c_src/jiffy.c index dfca7c7..5c818e5 100644 --- a/c_src/jiffy.c +++ b/c_src/jiffy.c @@ -35,6 +35,7 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes"); st->atom_dedupe_keys = make_atom(env, "dedupe_keys"); st->atom_copy_strings = make_atom(env, "copy_strings"); + st->atom_max_levels = make_atom(env, "max_levels"); // Markers used in encoding st->ref_object = make_atom(env, "$object_ref$"); @@ -58,6 +59,15 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) NULL ); + st->res_wrapper = enif_open_resource_type( + env, + NULL, + "wrapper", + wrapper_destroy, + ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, + NULL + ); + *priv = (void*) st; return 0; @@ -87,7 +97,8 @@ static ErlNifFunc funcs[] = {"nif_decode_init", 2, decode_init}, {"nif_decode_iter", 5, decode_iter}, {"nif_encode_init", 2, encode_init}, - {"nif_encode_iter", 3, encode_iter} + {"nif_encode_iter", 3, encode_iter}, + {"nif_wrap_binary", 1, wrap_binary} }; ERL_NIF_INIT(jiffy, funcs, &load, &reload, &upgrade, &unload); diff --git a/c_src/jiffy.h b/c_src/jiffy.h index 9c97945..bfff9e8 100644 --- a/c_src/jiffy.h +++ b/c_src/jiffy.h @@ -44,12 +44,14 @@ typedef struct { ERL_NIF_TERM atom_escape_forward_slashes; ERL_NIF_TERM atom_dedupe_keys; ERL_NIF_TERM atom_copy_strings; + ERL_NIF_TERM atom_max_levels; ERL_NIF_TERM ref_object; ERL_NIF_TERM ref_array; ErlNifResourceType* res_dec; ErlNifResourceType* res_enc; + ErlNifResourceType* res_wrapper; } jiffy_st; ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name); @@ -67,9 +69,11 @@ ERL_NIF_TERM decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); +ERL_NIF_TERM wrap_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); void dec_destroy(ErlNifEnv* env, void* obj); void enc_destroy(ErlNifEnv* env, void* obj); +void wrapper_destroy(ErlNifEnv* env, void* obj); int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, int ret_map, int dedupe_keys); @@ -85,4 +89,8 @@ int unicode_from_pair(int hi, int lo); int unicode_uescape(int c, unsigned char* buf); int double_to_shortest(unsigned char *buf, size_t size, size_t* len, double val); +char* wrap_enif_make_new_binary(ErlNifEnv* env, size_t size, ERL_NIF_TERM* termp); +ERL_NIF_TERM wrap_enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size); +int unwrap(ErlNifEnv* env, ERL_NIF_TERM wrapper_resource, ERL_NIF_TERM* bin_term_p); + #endif // Included JIFFY_H diff --git a/c_src/wrapper.c b/c_src/wrapper.c new file mode 100644 index 0000000..3a77e26 --- /dev/null +++ b/c_src/wrapper.c @@ -0,0 +1,88 @@ +// This file is part of Jiffy released under the MIT license. +// See the LICENSE file for more information. + +#include "erl_nif.h" +#include "jiffy.h" + +typedef struct { + // The Wrapper is a struct intended to be used as a resource to hold a + // binary that's been validated by jiffy to be a valid JSON value + + ErlNifEnv* env; // Process independent env to hold the wrapped binary + ERL_NIF_TERM bin; +} Wrapper; + +static ERL_NIF_TERM +wrap_new(ErlNifEnv* process_env, ErlNifEnv* process_independent_env, ERL_NIF_TERM binary) +{ + jiffy_st* st = (jiffy_st*) enif_priv_data(process_env); + + Wrapper* wrapper_p = enif_alloc_resource(st->res_wrapper, sizeof(Wrapper)); + ERL_NIF_TERM wrapper_term = enif_make_resource(process_env, wrapper_p); + enif_release_resource(wrapper_p); + + wrapper_p->env = process_independent_env; + wrapper_p->bin = binary; + + return wrapper_term; +} + +ERL_NIF_TERM +wrap_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + if(argc != 1) { + return enif_make_badarg(env); + } + + ERL_NIF_TERM binary = argv[0]; + if(!enif_is_binary(env, binary)) { + return enif_make_badarg(env); + } + + ErlNifEnv* process_independent_env = enif_alloc_env(); + ERL_NIF_TERM bin_copy = enif_make_copy(process_independent_env, binary); + + return wrap_new(env, process_independent_env, bin_copy); +} + +ERL_NIF_TERM +wrap_enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size) +{ + ErlNifEnv* process_independent_env = enif_alloc_env(); + // sub_bin must be created in the same env as the parent binary and then + // copied, segfaults sometimes otherwise + ERL_NIF_TERM sub_bin = enif_make_sub_binary(env, bin_term, pos, size); + return wrap_new(env, process_independent_env, enif_make_copy(process_independent_env, sub_bin)); +} + +char* +wrap_enif_make_new_binary(ErlNifEnv* env, size_t size, ERL_NIF_TERM* termp) +{ + ErlNifEnv* process_independent_env = enif_alloc_env(); + ERL_NIF_TERM bin; + char* chrbuf = (char*) enif_make_new_binary(process_independent_env, size, &bin); + *termp = wrap_new(env, process_independent_env, bin); + return chrbuf; +} + +int +unwrap(ErlNifEnv* env, ERL_NIF_TERM wrapper_resource, ERL_NIF_TERM* bin_term_p) +{ + jiffy_st* st = (jiffy_st*) enif_priv_data(env); + + Wrapper* wrapper_p = NULL; + if(!enif_get_resource(env, wrapper_resource, st->res_wrapper, (void**) &wrapper_p)) { + return 0; + } + + *bin_term_p = enif_make_copy(env, wrapper_p->bin); + return 1; +} + +void +wrapper_destroy(ErlNifEnv* env, void* obj) +{ + Wrapper* wrapper_p = (Wrapper*) obj; + enif_free_env(wrapper_p->env); +} + diff --git a/src/jiffy.erl b/src/jiffy.erl index 900354e..6df005e 100644 --- a/src/jiffy.erl +++ b/src/jiffy.erl @@ -2,7 +2,7 @@ % See the LICENSE file for more information. -module(jiffy). --export([decode/1, decode/2, encode/1, encode/2]). +-export([decode/1, decode/2, encode/1, encode/2, validate/1, validate/2]). -define(NOT_LOADED, not_loaded(?LINE)). -compile([no_native]). @@ -18,18 +18,22 @@ | json_object() | json_array(). --type json_array() :: [json_value()]. +-type json_array() :: [json_value()] | json_raw(). -type json_string() :: atom() | binary(). -type json_number() :: integer() | float(). +%% json_raw() is only returned when using options 'partial' or 'max_levels' +-type json_raw() :: reference(). -ifdef(JIFFY_NO_MAPS). --type json_object() :: {[{json_string(),json_value()}]}. +-type json_object() :: {[{json_string(),json_value()}]} + | json_raw(). -else. -type json_object() :: {[{json_string(),json_value()}]} - | #{json_string() => json_value()}. + | #{json_string() => json_value()} + | json_raw(). -endif. @@ -42,6 +46,7 @@ | dedupe_keys | copy_strings | {null_term, any()} + | {max_levels, non_neg_integer()} | {bytes_per_iter, non_neg_integer()} | {bytes_per_red, non_neg_integer()}. @@ -50,13 +55,14 @@ | force_utf8 | use_nil | escape_forward_slashes + | partial | {bytes_per_iter, non_neg_integer()} | {bytes_per_red, non_neg_integer()}. -type decode_options() :: [decode_option()]. -type encode_options() :: [encode_option()]. --export_type([json_value/0, jiffy_decode_result/0]). +-export_type([json_value/0, json_raw/0, jiffy_decode_result/0]). -spec decode(iolist() | binary()) -> jiffy_decode_result(). @@ -80,14 +86,15 @@ decode(Data, Opts) when is_list(Data) -> decode(iolist_to_binary(Data), Opts). --spec encode(json_value()) -> iodata(). +-spec encode(json_value() | json_raw()) -> iodata() | json_raw(). encode(Data) -> encode(Data, []). --spec encode(json_value(), encode_options()) -> iodata(). +-spec encode(json_value() | json_raw(), encode_options()) -> iodata() | json_raw(). encode(Data, Options) -> ForceUTF8 = lists:member(force_utf8, Options), + ReturnPartial = lists:member(partial, Options), case nif_encode_init(Data, Options) of {error, {invalid_string, _}} when ForceUTF8 == true -> FixedData = jiffy_utf8:fix(Data), @@ -98,16 +105,34 @@ encode(Data, Options) -> {error, Error} -> error(Error); {partial, IOData} -> - finish_encode(IOData, []); + finish_encode(IOData, [], ReturnPartial); {iter, {Encoder, Stack, IOBuf}} -> encode_loop(Data, Options, Encoder, Stack, IOBuf); - [Bin] when is_binary(Bin) -> + [Bin] when is_binary(Bin), not ReturnPartial -> Bin; + RevIOData when is_list(RevIOData), not ReturnPartial -> + lists:reverse(RevIOData); RevIOData when is_list(RevIOData) -> - lists:reverse(RevIOData) + nif_wrap_binary(iolist_to_binary(lists:reverse(RevIOData))) end. +-spec validate(iolist() | binary()) -> boolean() | {has_trailer, true, binary()}. +validate(Data) -> + validate(Data, []). + + +-spec validate(iolist() | binary(), decode_options()) -> boolean() | {has_trailer, true, binary()}. +validate(Data, Opts) when is_binary(Data), is_list(Opts) -> + try decode(Data, lists:keystore(max_levels, 1, Opts, {max_levels, 0})) of + {has_trailer, _FlatEJson, Trailer} -> {has_trailer, true, Trailer}; + _FlatEJson -> true + catch _:_ -> false + end; +validate(Data, Opts) when is_list(Data) -> + validate(iolist_to_binary(Data), Opts). + + finish_decode({bignum, Value}) -> list_to_integer(binary_to_list(Value)); finish_decode({bignum_e, Value}) -> @@ -163,18 +188,22 @@ finish_decode_arr([V | Vals], Acc) -> finish_decode_arr(Vals, [finish_decode(V) | Acc]). -finish_encode([], Acc) -> +finish_encode([], Acc, false) -> %% No reverse! The NIF returned us %% the pieces in reverse order. Acc; -finish_encode([<<_/binary>>=B | Rest], Acc) -> - finish_encode(Rest, [B | Acc]); -finish_encode([Val | Rest], Acc) when is_integer(Val) -> +finish_encode([], Acc, true) -> + %% No reverse! The NIF returned us + %% the pieces in reverse order. + nif_wrap_binary(iolist_to_binary(Acc)); +finish_encode([<<_/binary>>=B | Rest], Acc, ReturnPartial) -> + finish_encode(Rest, [B | Acc], ReturnPartial); +finish_encode([Val | Rest], Acc, ReturnPartial) when is_integer(Val) -> Bin = list_to_binary(integer_to_list(Val)), - finish_encode(Rest, [Bin | Acc]); -finish_encode([InvalidEjson | _], _) -> + finish_encode(Rest, [Bin | Acc], ReturnPartial); +finish_encode([InvalidEjson | _], _, _) -> error({invalid_ejson, InvalidEjson}); -finish_encode(_, _) -> +finish_encode(_, _, _) -> error(invalid_ejson). @@ -205,6 +234,7 @@ decode_loop(Data, Decoder, Val, Objs, Curr) -> encode_loop(Data, Options, Encoder, Stack, IOBuf) -> ForceUTF8 = lists:member(force_utf8, Options), + ReturnPartial = lists:member(partial, Options), case nif_encode_iter(Encoder, Stack, IOBuf) of {error, {invalid_string, _}} when ForceUTF8 == true -> FixedData = jiffy_utf8:fix(Data), @@ -215,13 +245,15 @@ encode_loop(Data, Options, Encoder, Stack, IOBuf) -> {error, Error} -> error(Error); {partial, IOData} -> - finish_encode(IOData, []); + finish_encode(IOData, [], ReturnPartial); {iter, {NewEncoder, NewStack, NewIOBuf}} -> encode_loop(Data, Options, NewEncoder, NewStack, NewIOBuf); - [Bin] when is_binary(Bin) -> + [Bin] when is_binary(Bin), not ReturnPartial -> Bin; + RevIOData when is_list(RevIOData), not ReturnPartial -> + lists:reverse(RevIOData); RevIOData when is_list(RevIOData) -> - lists:reverse(RevIOData) + nif_wrap_binary(iolist_to_binary(lists:reverse(RevIOData))) end. @@ -239,3 +271,6 @@ nif_encode_init(_Data, _Options) -> nif_encode_iter(_Encoder, _Stack, _IoList) -> ?NOT_LOADED. + +nif_wrap_binary(_BinData) -> + ?NOT_LOADED. diff --git a/test/jiffy_01_yajl_tests.erl b/test/jiffy_01_yajl_tests.erl index 9e99ace..4aa5069 100644 --- a/test/jiffy_01_yajl_tests.erl +++ b/test/jiffy_01_yajl_tests.erl @@ -14,9 +14,9 @@ yajl_test_() -> gen({Name, Json, {error, Erl}}) -> - {Name, ?_assertError(Erl, jiffy:decode(Json))}; + {Name, [?_assertEqual(false, jiffy:validate(Json)), ?_assertError(Erl, jiffy:decode(Json))]}; gen({Name, Json, Erl}) -> - {Name, ?_assertEqual(Erl, jiffy:decode(Json))}. + {Name, [?_assertEqual(true, jiffy:validate(Json)), ?_assertEqual(Erl, jiffy:decode(Json))]}. read_cases() -> diff --git a/test/jiffy_15_return_trailer_tests.erl b/test/jiffy_15_return_trailer_tests.erl index af80a46..c8e3ec1 100644 --- a/test/jiffy_15_return_trailer_tests.erl +++ b/test/jiffy_15_return_trailer_tests.erl @@ -15,5 +15,9 @@ trailer_test_() -> {<<"1 2 3">>, {has_trailer, 1, <<"2 3">>}} ], {"Test return_trailer", lists:map(fun({Data, Result}) -> - ?_assertEqual(Result, jiffy:decode(Data, Opts)) - end, Cases)}. \ No newline at end of file + ValidateResult = if is_tuple(Result) -> setelement(2, Result, true); + true -> Result + end, + [?_assertEqual(ValidateResult, jiffy:validate(Data, Opts)), + ?_assertEqual(Result, jiffy:decode(Data, Opts))] + end, Cases)}. diff --git a/test/jiffy_18_partials_tests.erl b/test/jiffy_18_partials_tests.erl new file mode 100644 index 0000000..b32d489 --- /dev/null +++ b/test/jiffy_18_partials_tests.erl @@ -0,0 +1,97 @@ +% This file is part of Jiffy released under the MIT license. +% See the LICENSE file for more information. + +-module(jiffy_18_partials_tests). + +-include_lib("eunit/include/eunit.hrl"). + +decode_levels_test_() -> + MaxOptMaxLevels = 4, + {"Test max_levels", lists:map(fun(Json) -> + [begin + EJson = jiffy:decode(Json, [{max_levels, MaxLevels} | Opts]), + FullEJson = to_full_json(EJson, MaxLevels, Opts), + ?_assertEqual(jiffy:decode(Json, Opts), FullEJson) + end || MaxLevels <- lists:seq(0, MaxOptMaxLevels), Opts <- generate_options_groups()] + end, jsons())}. + +encode_resources_test_() -> + {"Test encode resources", lists:map(fun(Json) -> + [begin + EJsonWithResources = jiffy:decode(Json, [{max_levels, 1} | Opts]), + JsonFromResources = jiffy:encode(EJsonWithResources), + ?_assertEqual(jiffy:decode(Json, Opts), jiffy:decode(JsonFromResources, Opts)) + end || Opts <- generate_options_groups()] + end, jsons())}. + +encode_partials_test_() -> + {"Test encode partials", lists:map(fun(Json) -> + [begin + EJson = jiffy:decode(Json, Opts), + PartialResource = jiffy:encode(EJson, [partial]), + true = is_reference(PartialResource), + PartialIOData = jiffy:encode(PartialResource), + ?_assertEqual(EJson, jiffy:decode(PartialIOData, Opts)) + end || Opts <- generate_options_groups()] + end, jsons())}. + + +jsons() -> + [ + <<"{\"foo\":\"bar\"}">>, + <<"{\"foo\":[\"bar\"]}">>, + <<"[[[[]],\"foo\"], [\"bar\", []], [\"baz\"], [[], 1]]">>, + <<"{\"foo\":{},\"bar\":{\"baz\":[1,2,3], \"foo2\":{}}}">> + ]. + + +-ifndef(JIFFY_NO_MAPS). +generate_options_groups() -> generate_options_groups([copy_strings, return_maps]). +-else. +generate_options_groups() -> generate_options_groups([copy_strings]). +-endif. + +generate_options_groups(AvailableOptions) -> + generate_options_groups(AvailableOptions, [[]]). +generate_options_groups([], Acc) -> + Acc; +generate_options_groups([Option | AvailableOptions], Acc) -> + generate_options_groups(AvailableOptions, [[Option | Group] || Group <- Acc] ++ Acc). + + +to_full_json(Val, MaxDepth, DecodeOptions) -> + to_full_json(Val, 0, MaxDepth, DecodeOptions). +to_full_json(_Val, Depth, MaxDepth, _DecodeOptions) when Depth > MaxDepth -> + error(too_deep); +to_full_json(PartialResource, Depth, MaxDepth, DecodeOptions) when is_reference(PartialResource) -> + MaxDepth = Depth, + IOData = jiffy:encode(PartialResource), + [begin + ByteSize = byte_size(ValueBin), + case lists:member(copy_strings, DecodeOptions) of + true -> + ByteSize = binary:referenced_byte_size(ValueBin); + _ -> + % With small binaries, the copies between environments involve a + % full copy if the binary is small enough (thus the =) + true = ByteSize =< binary:referenced_byte_size(ValueBin) + end + end || ValueBin <- lists:flatten(IOData)], + jiffy:decode(IOData, DecodeOptions); +to_full_json({Pairs}, Depth, MaxDepth, DecodeOptions) when is_list(Pairs) -> + {[{K, to_full_json(V, Depth+1, MaxDepth, DecodeOptions)} || {K, V} <- Pairs]}; +to_full_json(Vals, Depth, MaxDepth, DecodeOptions) when is_list(Vals) -> + [to_full_json(V, Depth+1, MaxDepth, DecodeOptions) || V <- Vals]; +to_full_json(Val, Depth, MaxDepth, DecodeOptions) -> + maybe_map(Val, Depth, MaxDepth, DecodeOptions). + +-ifndef(JIFFY_NO_MAPS). +maybe_map(Obj, Depth, MaxDepth, DecodeOptions) when is_map(Obj) -> + maps:map(fun(_K, V) -> to_full_json(V, Depth+1, MaxDepth, DecodeOptions) end, Obj); +maybe_map(Val, _Depth, _MaxDepth, _DecodeOptions) -> + Val. +-else. +maybe_map(Val, _Depth, _MaxDepth, _DecodeOptions) -> + Val. +-endif. +