diff --git a/README.md b/README.md index 72f773a..adce8be 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,10 @@ The options for decode are: this option will instead allocate new binaries for each string, so the original JSON document can be garbage collected even though the decode result is still in use. +* `{max_levels, N}` where N >= 0 - This controls when to stop decoding + by depth, after N levels are decoded, the rest is returned as a + `{json, binary()}`. Note that json validation is relaxed in levels deeper + than N. * `{bytes_per_red, N}` where N >= 0 - This controls the number of bytes that Jiffy will process as an equivalent to a reduction. Each 20 reductions we consume 1% of our allocated time slice for the current diff --git a/c_src/decoder.c b/c_src/decoder.c index 8f78117..23732e3 100644 --- a/c_src/decoder.c +++ b/c_src/decoder.c @@ -64,6 +64,10 @@ typedef struct { char* st_data; int st_size; int st_top; + + unsigned int current_level; + unsigned int max_levels; + unsigned int level_start; } Decoder; Decoder* @@ -99,6 +103,10 @@ dec_new(ErlNifEnv* env) d->st_data[i] = st_invalid; } + d->current_level = 0; + d->max_levels = 0; + d->level_start = 0; + d->st_data[0] = st_value; d->st_top++; @@ -187,6 +195,35 @@ dec_pop_assert(Decoder* d, char val) (void)current; } +static void inline +level_increase(Decoder* d) { + if(d->max_levels && (d->max_levels == d->current_level++)) { + d->level_start = d->i; + } +} + +static int inline +level_decrease(Decoder* d, ERL_NIF_TERM* value) { + if (d->max_levels && d->max_levels == --d->current_level) { + ERL_NIF_TERM bin; + if(!d->copy_strings) { + bin = enif_make_sub_binary(d->env, d->arg, d->level_start, (d->i - d->level_start + 1)); + } else { + unsigned ulen = d->i - d->level_start + 1; + char* chrbuf = (char*) enif_make_new_binary(d->env, ulen, &bin); + memcpy(chrbuf, &(d->p[d->level_start]), ulen); + } + *value = enif_make_tuple2(d->env, d->atoms->atom_json, bin); + return 1; + } + return 0; +} + +static int inline +level_allows_terms(Decoder* d) { + return (!d->max_levels) || (d->max_levels >= d->current_level); +} + int dec_string(Decoder* d, ERL_NIF_TERM* value) { @@ -291,7 +328,9 @@ dec_string(Decoder* d, ERL_NIF_TERM* value) return 0; parse: - if(!has_escape && !d->copy_strings) { + if(!level_allows_terms(d)) { + return 1; + } else if(!has_escape && !d->copy_strings) { *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1)); return 1; } else if(!has_escape) { @@ -572,6 +611,9 @@ dec_number(Decoder* d, ERL_NIF_TERM* value) } parse: + if(!level_allows_terms(d)) { + return 1; + } switch(state) { case nst_init: @@ -643,6 +685,39 @@ make_array(ErlNifEnv* env, ERL_NIF_TERM list) return ret; } +int +get_max_levels(ErlNifEnv* env, ERL_NIF_TERM val, unsigned int* max_levels_p) +{ + jiffy_st* st = (jiffy_st*) enif_priv_data(env); + const ERL_NIF_TERM* tuple; + int arity; + unsigned int max_levels; + + if(!enif_get_tuple(env, val, &arity, &tuple)) { + return 0; + } + + if(arity != 2) { + return 0; + } + + if(enif_compare(tuple[0], st->atom_max_levels) != 0) { + return 0; + } + + if(!enif_get_uint(env, tuple[1], &max_levels)) { + return 0; + } + + if(max_levels == 0) { + return 0; + } + + *max_levels_p = max_levels; + + return 1; +} + ERL_NIF_TERM decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { @@ -695,6 +770,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) d->null_term = d->atoms->atom_nil; } else if(get_null_term(env, val, &(d->null_term))) { continue; + } else if(get_max_levels(env, val, &(d->max_levels))) { + continue; } else { return enif_make_badarg(env); } @@ -845,21 +922,31 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) case '{': dec_push(d, st_object); dec_push(d, st_key); - objs = enif_make_list_cell(env, curr, objs); - curr = enif_make_list(env, 0); + + level_increase(d); + if(level_allows_terms(d)) { + objs = enif_make_list_cell(env, curr, objs); + curr = enif_make_list(env, 0); + } d->i++; break; case '[': dec_push(d, st_array); dec_push(d, st_value); - objs = enif_make_list_cell(env, curr, objs); - curr = enif_make_list(env, 0); + + level_increase(d); + if(level_allows_terms(d)) { + objs = enif_make_list_cell(env, curr, objs); + curr = enif_make_list(env, 0); + } d->i++; break; case ']': - if(!enif_is_empty_list(env, curr)) { - ret = dec_error(d, "invalid_json"); - goto done; + if(level_allows_terms(d)) { + if(!enif_is_empty_list(env, curr)) { + ret = dec_error(d, "invalid_json"); + goto done; + } } dec_pop_assert(d, st_value); if(dec_pop(d) != st_array) { @@ -867,11 +954,16 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) goto done; } dec_pop_assert(d, st_value); - val = curr; // curr is [] - if(!enif_get_list_cell(env, objs, &curr, &objs)) { - ret = dec_error(d, "internal_error"); - goto done; + if(level_allows_terms(d)) { + val = curr; // curr is [] + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + ret = dec_error(d, "internal_error"); + goto done; + } } + + level_decrease(d, &val); + d->i++; break; default: @@ -882,7 +974,9 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) dec_push(d, st_done); } else if(dec_curr(d) != st_value && dec_curr(d) != st_key) { dec_push(d, st_comma); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } } break; @@ -901,27 +995,40 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) } dec_pop_assert(d, st_key); dec_push(d, st_colon); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } break; case '}': - if(!enif_is_empty_list(env, curr)) { - ret = dec_error(d, "invalid_json"); - goto done; + if(level_allows_terms(d)) { + if(!enif_is_empty_list(env, curr)) { + ret = dec_error(d, "invalid_json"); + goto done; + } } dec_pop_assert(d, st_key); dec_pop_assert(d, st_object); dec_pop_assert(d, st_value); - val = make_empty_object(env, d->return_maps); - if(!enif_get_list_cell(env, objs, &curr, &objs)) { - ret = dec_error(d, "internal_error"); - goto done; + if(level_allows_terms(d)) { + val = make_empty_object(env, d->return_maps); + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + ret = dec_error(d, "internal_error"); + goto done; + } } if(dec_top(d) == 0) { dec_push(d, st_done); } else { dec_push(d, st_comma); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } + } + + if(level_decrease(d, &val)) { curr = enif_make_list_cell(env, val, curr); } + d->i++; break; default: @@ -979,21 +1086,30 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) goto done; } dec_pop_assert(d, st_value); - if(!make_object(env, curr, &val, - d->return_maps, d->dedupe_keys)) { - ret = dec_error(d, "internal_object_error"); - goto done; - } - if(!enif_get_list_cell(env, objs, &curr, &objs)) { - ret = dec_error(d, "internal_error"); - goto done; + if(level_allows_terms(d)) { + if(!make_object(env, curr, &val, + d->return_maps, d->dedupe_keys)) { + ret = dec_error(d, "internal_object_error"); + goto done; + } + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + ret = dec_error(d, "internal_error"); + goto done; + } } if(dec_top(d) > 0) { dec_push(d, st_comma); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } } else { dec_push(d, st_done); } + + if(level_decrease(d, &val)) { + curr = enif_make_list_cell(env, val, curr); + } + d->i++; break; case ']': @@ -1003,17 +1119,26 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) goto done; } dec_pop_assert(d, st_value); - val = make_array(env, curr); - if(!enif_get_list_cell(env, objs, &curr, &objs)) { - ret = dec_error(d, "internal_error"); - goto done; + if(level_allows_terms(d)) { + val = make_array(env, curr); + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + ret = dec_error(d, "internal_error"); + goto done; + } } if(dec_top(d) > 0) { dec_push(d, st_comma); - curr = enif_make_list_cell(env, val, curr); + if(level_allows_terms(d)) { + curr = enif_make_list_cell(env, val, curr); + } } else { dec_push(d, st_done); } + + if(level_decrease(d, &val)) { + curr = enif_make_list_cell(env, val, curr); + } + d->i++; break; default: @@ -1064,3 +1189,4 @@ done: return ret; } + diff --git a/c_src/jiffy.c b/c_src/jiffy.c index 03ded3e..61b3b55 100644 --- a/c_src/jiffy.c +++ b/c_src/jiffy.c @@ -34,6 +34,8 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes"); st->atom_dedupe_keys = make_atom(env, "dedupe_keys"); st->atom_copy_strings = make_atom(env, "copy_strings"); + st->atom_json = make_atom(env, "json"); + st->atom_max_levels = make_atom(env, "max_levels"); // Markers used in encoding st->ref_object = make_atom(env, "$object_ref$"); diff --git a/c_src/jiffy.h b/c_src/jiffy.h index 6d19500..9743662 100644 --- a/c_src/jiffy.h +++ b/c_src/jiffy.h @@ -43,6 +43,8 @@ typedef struct { ERL_NIF_TERM atom_escape_forward_slashes; ERL_NIF_TERM atom_dedupe_keys; ERL_NIF_TERM atom_copy_strings; + ERL_NIF_TERM atom_json; + ERL_NIF_TERM atom_max_levels; ERL_NIF_TERM ref_object; ERL_NIF_TERM ref_array; diff --git a/src/jiffy.erl b/src/jiffy.erl index 900354e..a5e74bd 100644 --- a/src/jiffy.erl +++ b/src/jiffy.erl @@ -16,11 +16,13 @@ | json_string() | json_number() | json_object() - | json_array(). + | json_array() + | json_raw(). -type json_array() :: [json_value()]. -type json_string() :: atom() | binary(). -type json_number() :: integer() | float(). +-type json_raw() :: {json, binary()}. % Only when decoding with max_levels -ifdef(JIFFY_NO_MAPS). @@ -42,6 +44,7 @@ | dedupe_keys | copy_strings | {null_term, any()} + | {max_levels, non_neg_integer()} | {bytes_per_iter, non_neg_integer()} | {bytes_per_red, non_neg_integer()}.