Browse Source

Merge 40e8204956 into 1388db4e64

pull/195/merge
Jose Maria Perez Ramos 4 years ago
committed by GitHub
parent
commit
aa899c7ad1
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 496 additions and 70 deletions
  1. +36
    -0
      README.md
  2. +185
    -45
      c_src/decoder.c
  3. +7
    -0
      c_src/encoder.c
  4. +12
    -1
      c_src/jiffy.c
  5. +8
    -0
      c_src/jiffy.h
  6. +88
    -0
      c_src/wrapper.c
  7. +55
    -20
      src/jiffy.erl
  8. +2
    -2
      test/jiffy_01_yajl_tests.erl
  9. +6
    -2
      test/jiffy_15_return_trailer_tests.erl
  10. +97
    -0
      test/jiffy_18_partials_tests.erl

+ 36
- 0
README.md View File

@ -59,6 +59,10 @@ The options for decode are:
this option will instead allocate new binaries for each string, so this option will instead allocate new binaries for each string, so
the original JSON document can be garbage collected even though the original JSON document can be garbage collected even though
the decode result is still in use. the decode result is still in use.
* `{max_levels, N}` where N >= 0 - This controls when to stop decoding
by depth, after N levels are decoded, the rest is returned as a
`Resource::reference()`. Resources have some limitations, check [partial jsons
section](#partial-jsons).
* `{bytes_per_red, N}` where N >= 0 - This controls the number of * `{bytes_per_red, N}` where N >= 0 - This controls the number of
bytes that Jiffy will process as an equivalent to a reduction. Each bytes that Jiffy will process as an equivalent to a reduction. Each
20 reductions we consume 1% of our allocated time slice for the current 20 reductions we consume 1% of our allocated time slice for the current
@ -85,9 +89,24 @@ The options for encode are:
* `use_nil` - Encode's the atom `nil` as `null`. * `use_nil` - Encode's the atom `nil` as `null`.
* `escape_forward_slashes` - Escapes the `/` character which can be * `escape_forward_slashes` - Escapes the `/` character which can be
useful when encoding URLs in some cases. useful when encoding URLs in some cases.
* `partial` - Instead of returning an `iodata()`, returns a
`Resource::reference()` which holds the verified raw json. This resource can be used
as a block to build more complex jsons, without the need to encode these
blocks again. Resources have some limitations, check [partial jsons
section](#partial-jsons).
* `{bytes_per_red, N}` - Refer to the decode options * `{bytes_per_red, N}` - Refer to the decode options
* `{bytes_per_iter, N}` - Refer to the decode options * `{bytes_per_iter, N}` - Refer to the decode options
`jiffy:validate/1,2`
------------------
* `jiffy:validate(IoData)`
* `jiffy:validate(IoData, Options)`
Performs a fast decode to validate the correct IoData, uses the same Options as
`jiffy:decode/2` (although some may make no sense).
Returns a boolean instead of an EJSON.
Data Format Data Format
----------- -----------
@ -120,3 +139,20 @@ Jiffy should be in all ways an improvement over EEP0018. It no longer
imposes limits on the nesting depth. It is capable of encoding and imposes limits on the nesting depth. It is capable of encoding and
decoding large numbers and it does quite a bit more validation of UTF-8 in strings. decoding large numbers and it does quite a bit more validation of UTF-8 in strings.
Partial JSONs
-------------------------
`jiffy:encode/2` with option `partial` returns a `Resource::reference()`.
`jiffy:decode/2` with option `max_levels` may place a `Resource::reference()`
instead of some `json_value()`.
These resources hold a `binary()` with the verified JSON data and can be used
directly, or as a part of a larger EJSON in `jiffy:encode/1,2`. These binaries
won't be reencoded, instead, they will be placed directly in the result.
However, using resources has some limitations: The resource is only valid in
the node where it was created. If a resource is serialized and deserialized, or
if it changes nodes back and forth, it will only be still valid if the resource
was not GC'd.

+ 185
- 45
c_src/decoder.c View File

@ -64,6 +64,11 @@ typedef struct {
char* st_data; char* st_data;
int st_size; int st_size;
int st_top; int st_top;
int current_depth;
int max_levels;
unsigned int level_start;
unsigned int empty_element;
} Decoder; } Decoder;
Decoder* Decoder*
@ -99,6 +104,11 @@ dec_new(ErlNifEnv* env)
d->st_data[i] = st_invalid; d->st_data[i] = st_invalid;
} }
d->current_depth = 0;
d->max_levels = -1;
d->level_start = 0;
d->empty_element = 1;
d->st_data[0] = st_value; d->st_data[0] = st_value;
d->st_top++; d->st_top++;
@ -187,6 +197,34 @@ dec_pop_assert(Decoder* d, char val)
(void)current; (void)current;
} }
static void inline
level_increase(Decoder* d) {
if(d->max_levels >= 0 && (d->max_levels == d->current_depth++)) {
d->level_start = d->i;
}
}
static int inline
level_decrease(Decoder* d, ERL_NIF_TERM* value) {
if (d->max_levels >= 0 && d->max_levels == --d->current_depth) {
// Only builds term in threshold
unsigned ulen = d->i - d->level_start + 1;
if(!d->copy_strings) {
*value = wrap_enif_make_sub_binary(d->env, d->arg, d->level_start, ulen);
} else {
char* chrbuf = wrap_enif_make_new_binary(d->env, ulen, value);
memcpy(chrbuf, &(d->p[d->level_start]), ulen);
}
return 1;
}
return 0;
}
static int inline
level_allows_terms(Decoder* d) {
return (d->max_levels < 0) || (d->max_levels >= d->current_depth);
}
int int
dec_string(Decoder* d, ERL_NIF_TERM* value) dec_string(Decoder* d, ERL_NIF_TERM* value)
{ {
@ -197,8 +235,10 @@ dec_string(Decoder* d, ERL_NIF_TERM* value)
int ui; int ui;
int hi; int hi;
int lo; int lo;
char* chrbuf;
char* chrbuf = NULL;
char buf[4]; // Substitute for chrbuf when no term is needed
int chrpos; int chrpos;
int chrpos_increment;
if(d->p[d->i] != '\"') { if(d->p[d->i] != '\"') {
return 0; return 0;
@ -291,7 +331,11 @@ dec_string(Decoder* d, ERL_NIF_TERM* value)
return 0; return 0;
parse: parse:
if(!has_escape && !d->copy_strings) {
if(!has_escape && !level_allows_terms(d)) {
// If has_escape, the binary is still constructed as a side effect of
// the escape validation, although it's ignored by the caller
return 1;
} else if(!has_escape && !d->copy_strings) {
*value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1)); *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1));
return 1; return 1;
} else if(!has_escape) { } else if(!has_escape) {
@ -305,12 +349,22 @@ parse:
lo = 0; lo = 0;
ulen = (d->i - 1) - st - num_escapes; ulen = (d->i - 1) - st - num_escapes;
chrbuf = (char*) enif_make_new_binary(d->env, ulen, value);
chrpos = 0;
if(level_allows_terms(d)) {
chrbuf = (char*) enif_make_new_binary(d->env, ulen, value);
chrpos_increment = 1;
chrpos = -1;
} else {
// No term is created, but the string is still validated
// (Thus the chrpos_increment = 0, so we overwrite buf)
chrbuf = &buf[0];
chrpos_increment = 0;
chrpos = 0;
}
ui = st; ui = st;
while(ui < d->i - 1) { while(ui < d->i - 1) {
chrpos += chrpos_increment;
if(d->p[ui] != '\\') { if(d->p[ui] != '\\') {
chrbuf[chrpos++] = d->p[ui++];
chrbuf[chrpos] = d->p[ui++];
continue; continue;
} }
ui++; ui++;
@ -318,27 +372,27 @@ parse:
case '\"': case '\"':
case '\\': case '\\':
case '/': case '/':
chrbuf[chrpos++] = d->p[ui];
chrbuf[chrpos] = d->p[ui];
ui++; ui++;
break; break;
case 'b': case 'b':
chrbuf[chrpos++] = '\b';
chrbuf[chrpos] = '\b';
ui++; ui++;
break; break;
case 'f': case 'f':
chrbuf[chrpos++] = '\f';
chrbuf[chrpos] = '\f';
ui++; ui++;
break; break;
case 'n': case 'n':
chrbuf[chrpos++] = '\n';
chrbuf[chrpos] = '\n';
ui++; ui++;
break; break;
case 'r': case 'r':
chrbuf[chrpos++] = '\r';
chrbuf[chrpos] = '\r';
ui++; ui++;
break; break;
case 't': case 't':
chrbuf[chrpos++] = '\t';
chrbuf[chrpos] = '\t';
ui++; ui++;
break; break;
case 'u': case 'u':
@ -357,11 +411,11 @@ parse:
} else { } else {
ui += 4; ui += 4;
} }
hi = unicode_to_utf8(hi, (unsigned char*) chrbuf+chrpos);
hi = unicode_to_utf8(hi, (unsigned char*) &chrbuf[chrpos]);
if(hi < 0) { if(hi < 0) {
return 0; return 0;
} }
chrpos += hi;
chrpos += (hi-1) * chrpos_increment;
break; break;
default: default:
return 0; return 0;
@ -572,7 +626,6 @@ dec_number(Decoder* d, ERL_NIF_TERM* value)
} }
parse: parse:
switch(state) { switch(state) {
case nst_init: case nst_init:
case nst_sign: case nst_sign:
@ -583,6 +636,10 @@ parse:
break; break;
} }
if(!level_allows_terms(d)) {
return 1;
}
errno = 0; errno = 0;
if(d->i - st < NUM_BUF_LEN) { if(d->i - st < NUM_BUF_LEN) {
@ -643,6 +700,39 @@ make_array(ErlNifEnv* env, ERL_NIF_TERM list)
return ret; return ret;
} }
int
get_max_levels(ErlNifEnv* env, ERL_NIF_TERM val, int* max_levels_p)
{
jiffy_st* st = (jiffy_st*) enif_priv_data(env);
const ERL_NIF_TERM* tuple;
int arity;
int max_levels;
if(!enif_get_tuple(env, val, &arity, &tuple)) {
return 0;
}
if(arity != 2) {
return 0;
}
if(enif_compare(tuple[0], st->atom_max_levels) != 0) {
return 0;
}
if(!enif_get_int(env, tuple[1], &max_levels)) {
return 0;
}
if(max_levels < 0) {
return 0;
}
*max_levels_p = max_levels;
return 1;
}
ERL_NIF_TERM ERL_NIF_TERM
decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{ {
@ -695,6 +785,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
d->null_term = d->atoms->atom_nil; d->null_term = d->atoms->atom_nil;
} else if(get_null_term(env, val, &(d->null_term))) { } else if(get_null_term(env, val, &(d->null_term))) {
continue; continue;
} else if(get_max_levels(env, val, &(d->max_levels))) {
continue;
} else { } else {
return enif_make_badarg(env); return enif_make_badarg(env);
} }
@ -791,6 +883,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
val = d->null_term; val = d->null_term;
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
d->i += 4; d->i += 4;
d->empty_element = 0;
break; break;
case 't': case 't':
if(d->i + 3 >= d->len) { if(d->i + 3 >= d->len) {
@ -804,6 +897,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
val = d->atoms->atom_true; val = d->atoms->atom_true;
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
d->i += 4; d->i += 4;
d->empty_element = 0;
break; break;
case 'f': case 'f':
if(d->i + 4 >= bin.size) { if(d->i + 4 >= bin.size) {
@ -817,6 +911,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
val = d->atoms->atom_false; val = d->atoms->atom_false;
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
d->i += 5; d->i += 5;
d->empty_element = 0;
break; break;
case '\"': case '\"':
if(!dec_string(d, &val)) { if(!dec_string(d, &val)) {
@ -824,6 +919,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
goto done; goto done;
} }
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
d->empty_element = 0;
break; break;
case '-': case '-':
case '0': case '0':
@ -841,23 +937,34 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
goto done; goto done;
} }
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
d->empty_element = 0;
break; break;
case '{': case '{':
dec_push(d, st_object); dec_push(d, st_object);
dec_push(d, st_key); dec_push(d, st_key);
objs = enif_make_list_cell(env, curr, objs);
curr = enif_make_list(env, 0);
level_increase(d);
if(level_allows_terms(d)) {
objs = enif_make_list_cell(env, curr, objs);
curr = enif_make_list(env, 0);
}
d->i++; d->i++;
d->empty_element = 1;
break; break;
case '[': case '[':
dec_push(d, st_array); dec_push(d, st_array);
dec_push(d, st_value); dec_push(d, st_value);
objs = enif_make_list_cell(env, curr, objs);
curr = enif_make_list(env, 0);
level_increase(d);
if(level_allows_terms(d)) {
objs = enif_make_list_cell(env, curr, objs);
curr = enif_make_list(env, 0);
}
d->i++; d->i++;
d->empty_element = 1;
break; break;
case ']': case ']':
if(!enif_is_empty_list(env, curr)) {
if(!d->empty_element) {
ret = dec_error(d, "invalid_json"); ret = dec_error(d, "invalid_json");
goto done; goto done;
} }
@ -867,12 +974,17 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
goto done; goto done;
} }
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
val = curr; // curr is []
if(!enif_get_list_cell(env, objs, &curr, &objs)) {
ret = dec_error(d, "internal_error");
goto done;
if(level_allows_terms(d)) {
val = curr; // curr is []
if(!enif_get_list_cell(env, objs, &curr, &objs)) {
ret = dec_error(d, "internal_error");
goto done;
}
} }
level_decrease(d, &val);
d->i++; d->i++;
d->empty_element = 0;
break; break;
default: default:
ret = dec_error(d, "invalid_json"); ret = dec_error(d, "invalid_json");
@ -882,7 +994,9 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
dec_push(d, st_done); dec_push(d, st_done);
} else if(dec_curr(d) != st_value && dec_curr(d) != st_key) { } else if(dec_curr(d) != st_value && dec_curr(d) != st_key) {
dec_push(d, st_comma); dec_push(d, st_comma);
curr = enif_make_list_cell(env, val, curr);
if(level_allows_terms(d)) {
curr = enif_make_list_cell(env, val, curr);
}
} }
break; break;
@ -901,28 +1015,38 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
} }
dec_pop_assert(d, st_key); dec_pop_assert(d, st_key);
dec_push(d, st_colon); dec_push(d, st_colon);
curr = enif_make_list_cell(env, val, curr);
if(level_allows_terms(d)) {
curr = enif_make_list_cell(env, val, curr);
}
break; break;
case '}': case '}':
if(!enif_is_empty_list(env, curr)) {
if(!d->empty_element) {
ret = dec_error(d, "invalid_json"); ret = dec_error(d, "invalid_json");
goto done; goto done;
} }
dec_pop_assert(d, st_key); dec_pop_assert(d, st_key);
dec_pop_assert(d, st_object); dec_pop_assert(d, st_object);
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
val = make_empty_object(env, d->return_maps);
if(!enif_get_list_cell(env, objs, &curr, &objs)) {
ret = dec_error(d, "internal_error");
goto done;
if(level_allows_terms(d)) {
val = make_empty_object(env, d->return_maps);
if(!enif_get_list_cell(env, objs, &curr, &objs)) {
ret = dec_error(d, "internal_error");
goto done;
}
} }
level_decrease(d, &val);
if(dec_top(d) == 0) { if(dec_top(d) == 0) {
dec_push(d, st_done); dec_push(d, st_done);
} else { } else {
dec_push(d, st_comma); dec_push(d, st_comma);
curr = enif_make_list_cell(env, val, curr);
if(level_allows_terms(d)) {
curr = enif_make_list_cell(env, val, curr);
}
} }
d->i++; d->i++;
d->empty_element = 0;
break; break;
default: default:
ret = dec_error(d, "invalid_json"); ret = dec_error(d, "invalid_json");
@ -979,21 +1103,28 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
goto done; goto done;
} }
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
if(!make_object(env, curr, &val,
d->return_maps, d->dedupe_keys)) {
ret = dec_error(d, "internal_object_error");
goto done;
}
if(!enif_get_list_cell(env, objs, &curr, &objs)) {
ret = dec_error(d, "internal_error");
goto done;
if(level_allows_terms(d)) {
if(!make_object(env, curr, &val,
d->return_maps, d->dedupe_keys)) {
ret = dec_error(d, "internal_object_error");
goto done;
}
if(!enif_get_list_cell(env, objs, &curr, &objs)) {
ret = dec_error(d, "internal_error");
goto done;
}
} }
level_decrease(d, &val);
if(dec_top(d) > 0) { if(dec_top(d) > 0) {
dec_push(d, st_comma); dec_push(d, st_comma);
curr = enif_make_list_cell(env, val, curr);
if(level_allows_terms(d)) {
curr = enif_make_list_cell(env, val, curr);
}
} else { } else {
dec_push(d, st_done); dec_push(d, st_done);
} }
d->i++; d->i++;
break; break;
case ']': case ']':
@ -1003,17 +1134,24 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
goto done; goto done;
} }
dec_pop_assert(d, st_value); dec_pop_assert(d, st_value);
val = make_array(env, curr);
if(!enif_get_list_cell(env, objs, &curr, &objs)) {
ret = dec_error(d, "internal_error");
goto done;
if(level_allows_terms(d)) {
val = make_array(env, curr);
if(!enif_get_list_cell(env, objs, &curr, &objs)) {
ret = dec_error(d, "internal_error");
goto done;
}
} }
level_decrease(d, &val);
if(dec_top(d) > 0) { if(dec_top(d) > 0) {
dec_push(d, st_comma); dec_push(d, st_comma);
curr = enif_make_list_cell(env, val, curr);
if(level_allows_terms(d)) {
curr = enif_make_list_cell(env, val, curr);
}
} else { } else {
dec_push(d, st_done); dec_push(d, st_done);
} }
d->i++; d->i++;
break; break;
default: default:
@ -1042,6 +1180,7 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
} }
decode_done: decode_done:
level_decrease(d, &val);
if(d->i < bin.size && d->return_trailer) { if(d->i < bin.size && d->return_trailer) {
trailer = enif_make_sub_binary(env, argv[0], d->i, bin.size - d->i); trailer = enif_make_sub_binary(env, argv[0], d->i, bin.size - d->i);
@ -1064,3 +1203,4 @@ done:
return ret; return ret;
} }

+ 7
- 0
c_src/encoder.c View File

@ -683,6 +683,8 @@ encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
continue; continue;
} else if(get_bytes_per_red(env, val, &(e->bytes_per_red))) { } else if(get_bytes_per_red(env, val, &(e->bytes_per_red))) {
continue; continue;
} else if(enif_is_identical(val, e->atoms->atom_partial)) {
// Ignore, handled in Erlang
} else { } else {
return enif_make_badarg(env); return enif_make_badarg(env);
} }
@ -923,6 +925,11 @@ encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
termstack_push(&stack, curr); termstack_push(&stack, curr);
termstack_push(&stack, e->atoms->ref_array); termstack_push(&stack, e->atoms->ref_array);
termstack_push(&stack, item); termstack_push(&stack, item);
} else if(unwrap(env, curr, &item)) {
if(!enc_unknown(e, item)) {
ret = enc_error(e, "internal_error");
goto done;
}
} else { } else {
if(!enc_unknown(e, curr)) { if(!enc_unknown(e, curr)) {
ret = enc_error(e, "internal_error"); ret = enc_error(e, "internal_error");

+ 12
- 1
c_src/jiffy.c View File

@ -35,6 +35,7 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes"); st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes");
st->atom_dedupe_keys = make_atom(env, "dedupe_keys"); st->atom_dedupe_keys = make_atom(env, "dedupe_keys");
st->atom_copy_strings = make_atom(env, "copy_strings"); st->atom_copy_strings = make_atom(env, "copy_strings");
st->atom_max_levels = make_atom(env, "max_levels");
// Markers used in encoding // Markers used in encoding
st->ref_object = make_atom(env, "$object_ref$"); st->ref_object = make_atom(env, "$object_ref$");
@ -58,6 +59,15 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
NULL NULL
); );
st->res_wrapper = enif_open_resource_type(
env,
NULL,
"wrapper",
wrapper_destroy,
ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER,
NULL
);
*priv = (void*) st; *priv = (void*) st;
return 0; return 0;
@ -87,7 +97,8 @@ static ErlNifFunc funcs[] =
{"nif_decode_init", 2, decode_init}, {"nif_decode_init", 2, decode_init},
{"nif_decode_iter", 5, decode_iter}, {"nif_decode_iter", 5, decode_iter},
{"nif_encode_init", 2, encode_init}, {"nif_encode_init", 2, encode_init},
{"nif_encode_iter", 3, encode_iter}
{"nif_encode_iter", 3, encode_iter},
{"nif_wrap_binary", 1, wrap_binary}
}; };
ERL_NIF_INIT(jiffy, funcs, &load, &reload, &upgrade, &unload); ERL_NIF_INIT(jiffy, funcs, &load, &reload, &upgrade, &unload);

+ 8
- 0
c_src/jiffy.h View File

@ -44,12 +44,14 @@ typedef struct {
ERL_NIF_TERM atom_escape_forward_slashes; ERL_NIF_TERM atom_escape_forward_slashes;
ERL_NIF_TERM atom_dedupe_keys; ERL_NIF_TERM atom_dedupe_keys;
ERL_NIF_TERM atom_copy_strings; ERL_NIF_TERM atom_copy_strings;
ERL_NIF_TERM atom_max_levels;
ERL_NIF_TERM ref_object; ERL_NIF_TERM ref_object;
ERL_NIF_TERM ref_array; ERL_NIF_TERM ref_array;
ErlNifResourceType* res_dec; ErlNifResourceType* res_dec;
ErlNifResourceType* res_enc; ErlNifResourceType* res_enc;
ErlNifResourceType* res_wrapper;
} jiffy_st; } jiffy_st;
ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name); ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name);
@ -67,9 +69,11 @@ ERL_NIF_TERM decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
ERL_NIF_TERM wrap_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
void dec_destroy(ErlNifEnv* env, void* obj); void dec_destroy(ErlNifEnv* env, void* obj);
void enc_destroy(ErlNifEnv* env, void* obj); void enc_destroy(ErlNifEnv* env, void* obj);
void wrapper_destroy(ErlNifEnv* env, void* obj);
int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
int ret_map, int dedupe_keys); int ret_map, int dedupe_keys);
@ -85,4 +89,8 @@ int unicode_from_pair(int hi, int lo);
int unicode_uescape(int c, unsigned char* buf); int unicode_uescape(int c, unsigned char* buf);
int double_to_shortest(unsigned char *buf, size_t size, size_t* len, double val); int double_to_shortest(unsigned char *buf, size_t size, size_t* len, double val);
char* wrap_enif_make_new_binary(ErlNifEnv* env, size_t size, ERL_NIF_TERM* termp);
ERL_NIF_TERM wrap_enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size);
int unwrap(ErlNifEnv* env, ERL_NIF_TERM wrapper_resource, ERL_NIF_TERM* bin_term_p);
#endif // Included JIFFY_H #endif // Included JIFFY_H

+ 88
- 0
c_src/wrapper.c View File

@ -0,0 +1,88 @@
// This file is part of Jiffy released under the MIT license.
// See the LICENSE file for more information.
#include "erl_nif.h"
#include "jiffy.h"
typedef struct {
// The Wrapper is a struct intended to be used as a resource to hold a
// binary that's been validated by jiffy to be a valid JSON value
ErlNifEnv* env; // Process independent env to hold the wrapped binary
ERL_NIF_TERM bin;
} Wrapper;
static ERL_NIF_TERM
wrap_new(ErlNifEnv* process_env, ErlNifEnv* process_independent_env, ERL_NIF_TERM binary)
{
jiffy_st* st = (jiffy_st*) enif_priv_data(process_env);
Wrapper* wrapper_p = enif_alloc_resource(st->res_wrapper, sizeof(Wrapper));
ERL_NIF_TERM wrapper_term = enif_make_resource(process_env, wrapper_p);
enif_release_resource(wrapper_p);
wrapper_p->env = process_independent_env;
wrapper_p->bin = binary;
return wrapper_term;
}
ERL_NIF_TERM
wrap_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
if(argc != 1) {
return enif_make_badarg(env);
}
ERL_NIF_TERM binary = argv[0];
if(!enif_is_binary(env, binary)) {
return enif_make_badarg(env);
}
ErlNifEnv* process_independent_env = enif_alloc_env();
ERL_NIF_TERM bin_copy = enif_make_copy(process_independent_env, binary);
return wrap_new(env, process_independent_env, bin_copy);
}
ERL_NIF_TERM
wrap_enif_make_sub_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, size_t pos, size_t size)
{
ErlNifEnv* process_independent_env = enif_alloc_env();
// sub_bin must be created in the same env as the parent binary and then
// copied, segfaults sometimes otherwise
ERL_NIF_TERM sub_bin = enif_make_sub_binary(env, bin_term, pos, size);
return wrap_new(env, process_independent_env, enif_make_copy(process_independent_env, sub_bin));
}
char*
wrap_enif_make_new_binary(ErlNifEnv* env, size_t size, ERL_NIF_TERM* termp)
{
ErlNifEnv* process_independent_env = enif_alloc_env();
ERL_NIF_TERM bin;
char* chrbuf = (char*) enif_make_new_binary(process_independent_env, size, &bin);
*termp = wrap_new(env, process_independent_env, bin);
return chrbuf;
}
int
unwrap(ErlNifEnv* env, ERL_NIF_TERM wrapper_resource, ERL_NIF_TERM* bin_term_p)
{
jiffy_st* st = (jiffy_st*) enif_priv_data(env);
Wrapper* wrapper_p = NULL;
if(!enif_get_resource(env, wrapper_resource, st->res_wrapper, (void**) &wrapper_p)) {
return 0;
}
*bin_term_p = enif_make_copy(env, wrapper_p->bin);
return 1;
}
void
wrapper_destroy(ErlNifEnv* env, void* obj)
{
Wrapper* wrapper_p = (Wrapper*) obj;
enif_free_env(wrapper_p->env);
}

+ 55
- 20
src/jiffy.erl View File

@ -2,7 +2,7 @@
% See the LICENSE file for more information. % See the LICENSE file for more information.
-module(jiffy). -module(jiffy).
-export([decode/1, decode/2, encode/1, encode/2]).
-export([decode/1, decode/2, encode/1, encode/2, validate/1, validate/2]).
-define(NOT_LOADED, not_loaded(?LINE)). -define(NOT_LOADED, not_loaded(?LINE)).
-compile([no_native]). -compile([no_native]).
@ -18,18 +18,22 @@
| json_object() | json_object()
| json_array(). | json_array().
-type json_array() :: [json_value()].
-type json_array() :: [json_value()] | json_raw().
-type json_string() :: atom() | binary(). -type json_string() :: atom() | binary().
-type json_number() :: integer() | float(). -type json_number() :: integer() | float().
%% json_raw() is only returned when using options 'partial' or 'max_levels'
-type json_raw() :: reference().
-ifdef(JIFFY_NO_MAPS). -ifdef(JIFFY_NO_MAPS).
-type json_object() :: {[{json_string(),json_value()}]}.
-type json_object() :: {[{json_string(),json_value()}]}
| json_raw().
-else. -else.
-type json_object() :: {[{json_string(),json_value()}]} -type json_object() :: {[{json_string(),json_value()}]}
| #{json_string() => json_value()}.
| #{json_string() => json_value()}
| json_raw().
-endif. -endif.
@ -42,6 +46,7 @@
| dedupe_keys | dedupe_keys
| copy_strings | copy_strings
| {null_term, any()} | {null_term, any()}
| {max_levels, non_neg_integer()}
| {bytes_per_iter, non_neg_integer()} | {bytes_per_iter, non_neg_integer()}
| {bytes_per_red, non_neg_integer()}. | {bytes_per_red, non_neg_integer()}.
@ -50,13 +55,14 @@
| force_utf8 | force_utf8
| use_nil | use_nil
| escape_forward_slashes | escape_forward_slashes
| partial
| {bytes_per_iter, non_neg_integer()} | {bytes_per_iter, non_neg_integer()}
| {bytes_per_red, non_neg_integer()}. | {bytes_per_red, non_neg_integer()}.
-type decode_options() :: [decode_option()]. -type decode_options() :: [decode_option()].
-type encode_options() :: [encode_option()]. -type encode_options() :: [encode_option()].
-export_type([json_value/0, jiffy_decode_result/0]).
-export_type([json_value/0, json_raw/0, jiffy_decode_result/0]).
-spec decode(iolist() | binary()) -> jiffy_decode_result(). -spec decode(iolist() | binary()) -> jiffy_decode_result().
@ -80,14 +86,15 @@ decode(Data, Opts) when is_list(Data) ->
decode(iolist_to_binary(Data), Opts). decode(iolist_to_binary(Data), Opts).
-spec encode(json_value()) -> iodata().
-spec encode(json_value() | json_raw()) -> iodata() | json_raw().
encode(Data) -> encode(Data) ->
encode(Data, []). encode(Data, []).
-spec encode(json_value(), encode_options()) -> iodata().
-spec encode(json_value() | json_raw(), encode_options()) -> iodata() | json_raw().
encode(Data, Options) -> encode(Data, Options) ->
ForceUTF8 = lists:member(force_utf8, Options), ForceUTF8 = lists:member(force_utf8, Options),
ReturnPartial = lists:member(partial, Options),
case nif_encode_init(Data, Options) of case nif_encode_init(Data, Options) of
{error, {invalid_string, _}} when ForceUTF8 == true -> {error, {invalid_string, _}} when ForceUTF8 == true ->
FixedData = jiffy_utf8:fix(Data), FixedData = jiffy_utf8:fix(Data),
@ -98,16 +105,34 @@ encode(Data, Options) ->
{error, Error} -> {error, Error} ->
error(Error); error(Error);
{partial, IOData} -> {partial, IOData} ->
finish_encode(IOData, []);
finish_encode(IOData, [], ReturnPartial);
{iter, {Encoder, Stack, IOBuf}} -> {iter, {Encoder, Stack, IOBuf}} ->
encode_loop(Data, Options, Encoder, Stack, IOBuf); encode_loop(Data, Options, Encoder, Stack, IOBuf);
[Bin] when is_binary(Bin) ->
[Bin] when is_binary(Bin), not ReturnPartial ->
Bin; Bin;
RevIOData when is_list(RevIOData), not ReturnPartial ->
lists:reverse(RevIOData);
RevIOData when is_list(RevIOData) -> RevIOData when is_list(RevIOData) ->
lists:reverse(RevIOData)
nif_wrap_binary(iolist_to_binary(lists:reverse(RevIOData)))
end. end.
-spec validate(iolist() | binary()) -> boolean() | {has_trailer, true, binary()}.
validate(Data) ->
validate(Data, []).
-spec validate(iolist() | binary(), decode_options()) -> boolean() | {has_trailer, true, binary()}.
validate(Data, Opts) when is_binary(Data), is_list(Opts) ->
try decode(Data, lists:keystore(max_levels, 1, Opts, {max_levels, 0})) of
{has_trailer, _FlatEJson, Trailer} -> {has_trailer, true, Trailer};
_FlatEJson -> true
catch _:_ -> false
end;
validate(Data, Opts) when is_list(Data) ->
validate(iolist_to_binary(Data), Opts).
finish_decode({bignum, Value}) -> finish_decode({bignum, Value}) ->
list_to_integer(binary_to_list(Value)); list_to_integer(binary_to_list(Value));
finish_decode({bignum_e, Value}) -> finish_decode({bignum_e, Value}) ->
@ -163,18 +188,22 @@ finish_decode_arr([V | Vals], Acc) ->
finish_decode_arr(Vals, [finish_decode(V) | Acc]). finish_decode_arr(Vals, [finish_decode(V) | Acc]).
finish_encode([], Acc) ->
finish_encode([], Acc, false) ->
%% No reverse! The NIF returned us %% No reverse! The NIF returned us
%% the pieces in reverse order. %% the pieces in reverse order.
Acc; Acc;
finish_encode([<<_/binary>>=B | Rest], Acc) ->
finish_encode(Rest, [B | Acc]);
finish_encode([Val | Rest], Acc) when is_integer(Val) ->
finish_encode([], Acc, true) ->
%% No reverse! The NIF returned us
%% the pieces in reverse order.
nif_wrap_binary(iolist_to_binary(Acc));
finish_encode([<<_/binary>>=B | Rest], Acc, ReturnPartial) ->
finish_encode(Rest, [B | Acc], ReturnPartial);
finish_encode([Val | Rest], Acc, ReturnPartial) when is_integer(Val) ->
Bin = list_to_binary(integer_to_list(Val)), Bin = list_to_binary(integer_to_list(Val)),
finish_encode(Rest, [Bin | Acc]);
finish_encode([InvalidEjson | _], _) ->
finish_encode(Rest, [Bin | Acc], ReturnPartial);
finish_encode([InvalidEjson | _], _, _) ->
error({invalid_ejson, InvalidEjson}); error({invalid_ejson, InvalidEjson});
finish_encode(_, _) ->
finish_encode(_, _, _) ->
error(invalid_ejson). error(invalid_ejson).
@ -205,6 +234,7 @@ decode_loop(Data, Decoder, Val, Objs, Curr) ->
encode_loop(Data, Options, Encoder, Stack, IOBuf) -> encode_loop(Data, Options, Encoder, Stack, IOBuf) ->
ForceUTF8 = lists:member(force_utf8, Options), ForceUTF8 = lists:member(force_utf8, Options),
ReturnPartial = lists:member(partial, Options),
case nif_encode_iter(Encoder, Stack, IOBuf) of case nif_encode_iter(Encoder, Stack, IOBuf) of
{error, {invalid_string, _}} when ForceUTF8 == true -> {error, {invalid_string, _}} when ForceUTF8 == true ->
FixedData = jiffy_utf8:fix(Data), FixedData = jiffy_utf8:fix(Data),
@ -215,13 +245,15 @@ encode_loop(Data, Options, Encoder, Stack, IOBuf) ->
{error, Error} -> {error, Error} ->
error(Error); error(Error);
{partial, IOData} -> {partial, IOData} ->
finish_encode(IOData, []);
finish_encode(IOData, [], ReturnPartial);
{iter, {NewEncoder, NewStack, NewIOBuf}} -> {iter, {NewEncoder, NewStack, NewIOBuf}} ->
encode_loop(Data, Options, NewEncoder, NewStack, NewIOBuf); encode_loop(Data, Options, NewEncoder, NewStack, NewIOBuf);
[Bin] when is_binary(Bin) ->
[Bin] when is_binary(Bin), not ReturnPartial ->
Bin; Bin;
RevIOData when is_list(RevIOData), not ReturnPartial ->
lists:reverse(RevIOData);
RevIOData when is_list(RevIOData) -> RevIOData when is_list(RevIOData) ->
lists:reverse(RevIOData)
nif_wrap_binary(iolist_to_binary(lists:reverse(RevIOData)))
end. end.
@ -239,3 +271,6 @@ nif_encode_init(_Data, _Options) ->
nif_encode_iter(_Encoder, _Stack, _IoList) -> nif_encode_iter(_Encoder, _Stack, _IoList) ->
?NOT_LOADED. ?NOT_LOADED.
nif_wrap_binary(_BinData) ->
?NOT_LOADED.

+ 2
- 2
test/jiffy_01_yajl_tests.erl View File

@ -14,9 +14,9 @@ yajl_test_() ->
gen({Name, Json, {error, Erl}}) -> gen({Name, Json, {error, Erl}}) ->
{Name, ?_assertError(Erl, jiffy:decode(Json))};
{Name, [?_assertEqual(false, jiffy:validate(Json)), ?_assertError(Erl, jiffy:decode(Json))]};
gen({Name, Json, Erl}) -> gen({Name, Json, Erl}) ->
{Name, ?_assertEqual(Erl, jiffy:decode(Json))}.
{Name, [?_assertEqual(true, jiffy:validate(Json)), ?_assertEqual(Erl, jiffy:decode(Json))]}.
read_cases() -> read_cases() ->

+ 6
- 2
test/jiffy_15_return_trailer_tests.erl View File

@ -15,5 +15,9 @@ trailer_test_() ->
{<<"1 2 3">>, {has_trailer, 1, <<"2 3">>}} {<<"1 2 3">>, {has_trailer, 1, <<"2 3">>}}
], ],
{"Test return_trailer", lists:map(fun({Data, Result}) -> {"Test return_trailer", lists:map(fun({Data, Result}) ->
?_assertEqual(Result, jiffy:decode(Data, Opts))
end, Cases)}.
ValidateResult = if is_tuple(Result) -> setelement(2, Result, true);
true -> Result
end,
[?_assertEqual(ValidateResult, jiffy:validate(Data, Opts)),
?_assertEqual(Result, jiffy:decode(Data, Opts))]
end, Cases)}.

+ 97
- 0
test/jiffy_18_partials_tests.erl View File

@ -0,0 +1,97 @@
% This file is part of Jiffy released under the MIT license.
% See the LICENSE file for more information.
-module(jiffy_18_partials_tests).
-include_lib("eunit/include/eunit.hrl").
decode_levels_test_() ->
MaxOptMaxLevels = 4,
{"Test max_levels", lists:map(fun(Json) ->
[begin
EJson = jiffy:decode(Json, [{max_levels, MaxLevels} | Opts]),
FullEJson = to_full_json(EJson, MaxLevels, Opts),
?_assertEqual(jiffy:decode(Json, Opts), FullEJson)
end || MaxLevels <- lists:seq(0, MaxOptMaxLevels), Opts <- generate_options_groups()]
end, jsons())}.
encode_resources_test_() ->
{"Test encode resources", lists:map(fun(Json) ->
[begin
EJsonWithResources = jiffy:decode(Json, [{max_levels, 1} | Opts]),
JsonFromResources = jiffy:encode(EJsonWithResources),
?_assertEqual(jiffy:decode(Json, Opts), jiffy:decode(JsonFromResources, Opts))
end || Opts <- generate_options_groups()]
end, jsons())}.
encode_partials_test_() ->
{"Test encode partials", lists:map(fun(Json) ->
[begin
EJson = jiffy:decode(Json, Opts),
PartialResource = jiffy:encode(EJson, [partial]),
true = is_reference(PartialResource),
PartialIOData = jiffy:encode(PartialResource),
?_assertEqual(EJson, jiffy:decode(PartialIOData, Opts))
end || Opts <- generate_options_groups()]
end, jsons())}.
jsons() ->
[
<<"{\"foo\":\"bar\"}">>,
<<"{\"foo\":[\"bar\"]}">>,
<<"[[[[]],\"foo\"], [\"bar\", []], [\"baz\"], [[], 1]]">>,
<<"{\"foo\":{},\"bar\":{\"baz\":[1,2,3], \"foo2\":{}}}">>
].
-ifndef(JIFFY_NO_MAPS).
generate_options_groups() -> generate_options_groups([copy_strings, return_maps]).
-else.
generate_options_groups() -> generate_options_groups([copy_strings]).
-endif.
generate_options_groups(AvailableOptions) ->
generate_options_groups(AvailableOptions, [[]]).
generate_options_groups([], Acc) ->
Acc;
generate_options_groups([Option | AvailableOptions], Acc) ->
generate_options_groups(AvailableOptions, [[Option | Group] || Group <- Acc] ++ Acc).
to_full_json(Val, MaxDepth, DecodeOptions) ->
to_full_json(Val, 0, MaxDepth, DecodeOptions).
to_full_json(_Val, Depth, MaxDepth, _DecodeOptions) when Depth > MaxDepth ->
error(too_deep);
to_full_json(PartialResource, Depth, MaxDepth, DecodeOptions) when is_reference(PartialResource) ->
MaxDepth = Depth,
IOData = jiffy:encode(PartialResource),
[begin
ByteSize = byte_size(ValueBin),
case lists:member(copy_strings, DecodeOptions) of
true ->
ByteSize = binary:referenced_byte_size(ValueBin);
_ ->
% With small binaries, the copies between environments involve a
% full copy if the binary is small enough (thus the =)
true = ByteSize =< binary:referenced_byte_size(ValueBin)
end
end || ValueBin <- lists:flatten(IOData)],
jiffy:decode(IOData, DecodeOptions);
to_full_json({Pairs}, Depth, MaxDepth, DecodeOptions) when is_list(Pairs) ->
{[{K, to_full_json(V, Depth+1, MaxDepth, DecodeOptions)} || {K, V} <- Pairs]};
to_full_json(Vals, Depth, MaxDepth, DecodeOptions) when is_list(Vals) ->
[to_full_json(V, Depth+1, MaxDepth, DecodeOptions) || V <- Vals];
to_full_json(Val, Depth, MaxDepth, DecodeOptions) ->
maybe_map(Val, Depth, MaxDepth, DecodeOptions).
-ifndef(JIFFY_NO_MAPS).
maybe_map(Obj, Depth, MaxDepth, DecodeOptions) when is_map(Obj) ->
maps:map(fun(_K, V) -> to_full_json(V, Depth+1, MaxDepth, DecodeOptions) end, Obj);
maybe_map(Val, _Depth, _MaxDepth, _DecodeOptions) ->
Val.
-else.
maybe_map(Val, _Depth, _MaxDepth, _DecodeOptions) ->
Val.
-endif.

Loading…
Cancel
Save