Переглянути джерело

Improve support for bignums.

Any number that can't be decoded in C is now passed back
to Erlang for decoding.

Large numbers passed to the encoder will make it through
and be processed in Erlang after the main encoding
process.
pull/8/merge
Paul J. Davis 14 роки тому
джерело
коміт
7aac77bb29
8 змінених файлів з 210 додано та 87 видалено
  1. +36
    -33
      c_src/decoder.c
  2. +80
    -22
      c_src/encoder.c
  3. +3
    -0
      c_src/jiffy.c
  4. +4
    -0
      c_src/jiffy.h
  5. +52
    -22
      src/jiffy.erl
  6. +28
    -7
      test/003-numbers.t
  7. +1
    -1
      test/005-arrays.t
  8. +6
    -2
      test/util.erl

+ 36
- 33
c_src/decoder.c Переглянути файл

@ -11,7 +11,7 @@
#define ERROR(i, msg) make_error(st, env, msg)
#define STACK_SIZE_INC 64
#define NUM_BUF_LEN 256
#define NUM_BUF_LEN 32
enum {
st_value=0,
@ -42,7 +42,7 @@ typedef struct {
ERL_NIF_TERM arg;
ErlNifBinary bin;
int has_bignum;
int is_partial;
char* p;
unsigned char* u;
@ -63,7 +63,7 @@ dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin)
d->atoms = enif_priv_data(env);
d->arg = arg;
d->has_bignum = 0;
d->is_partial = 0;
d->p = (char*) bin->data;
d->u = bin->data;
@ -353,10 +353,12 @@ parse:
int
dec_number(Decoder* d, ERL_NIF_TERM* value)
{
ERL_NIF_TERM num_type = d->atoms->atom_error;
char state = nst_init;
char nbuf[NUM_BUF_LEN];
int st = d->i;
int is_double = 0;
int has_frac = 0;
int has_exp = 0;
double dval;
long lval;
@ -457,7 +459,7 @@ dec_number(Decoder* d, ERL_NIF_TERM* value)
break;
case nst_frac1:
is_double = 1;
has_frac = 1;
switch(d->p[d->i]) {
case '0':
case '1':
@ -502,7 +504,7 @@ dec_number(Decoder* d, ERL_NIF_TERM* value)
break;
case nst_esign:
is_double = 1;
has_exp = 1;
switch(d->p[d->i]) {
case '-':
case '+':
@ -560,37 +562,38 @@ parse:
break;
}
errno = 0;
if(st - d->i > NUM_BUF_LEN && is_double) {
return 0;
} else if(st - d->i > NUM_BUF_LEN) {
d->has_bignum = 1;
*value = enif_make_sub_binary(d->env, d->arg, st, d->i - st);
*value = enif_make_tuple2(d->env, d->atoms->atom_bignum, *value);
return 1;
}
memset(nbuf, 0, NUM_BUF_LEN);
memcpy(nbuf, &(d->p[st]), d->i - st);
if(d->i - st < NUM_BUF_LEN) {
memset(nbuf, 0, NUM_BUF_LEN);
memcpy(nbuf, &(d->p[st]), d->i - st);
errno = 0;
if(is_double) {
dval = strtod(nbuf, NULL);
if(errno == ERANGE) {
return 0;
if(has_frac || has_exp) {
dval = strtod(nbuf, NULL);
if(errno != ERANGE) {
*value = enif_make_double(d->env, dval);
return 1;
}
} else {
lval = strtol(nbuf, NULL, 10);
if(errno != ERANGE) {
*value = enif_make_int64(d->env, lval);
return 1;
}
}
*value = enif_make_double(d->env, dval);
return 1;
}
lval = strtol(nbuf, NULL, 10);
if(errno == ERANGE) {
d->has_bignum = 1;
*value = enif_make_sub_binary(d->env, d->arg, st, d->i - st);
*value = enif_make_tuple2(d->env, d->atoms->atom_bignum, *value);
if(!has_frac && !has_exp) {
num_type = d->atoms->atom_bignum;
} else if(has_exp) {
num_type = d->atoms->atom_bignum_e;
} else {
*value = enif_make_int64(d->env, lval);
num_type = d->atoms->atom_bigdbl;
}
d->is_partial = 1;
*value = enif_make_sub_binary(d->env, d->arg, st, d->i - st);
*value = enif_make_tuple2(d->env, num_type, *value);
return 1;
}
@ -921,8 +924,8 @@ decode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
if(dec_curr(d) != st_done) {
ret = dec_error(d, "truncated_json");
} else if(d->has_bignum) {
ret = enif_make_tuple2(env, d->atoms->atom_bignum, val);
} else if(d->is_partial) {
ret = enif_make_tuple2(env, d->atoms->atom_partial, val);
} else {
ret = enif_make_tuple2(env, d->atoms->atom_ok, val);
}

+ 80
- 22
c_src/encoder.c Переглянути файл

@ -14,9 +14,11 @@ typedef struct {
int count;
int iolen;
ERL_NIF_TERM iolist;
ErlNifBinary* curr;
char* p;
unsigned char* u;
size_t i;
@ -30,6 +32,7 @@ enc_init(Encoder* e, ErlNifEnv* env, ErlNifBinary* bin)
e->count = 0;
e->iolen = 0;
e->iolist = enif_make_list(env, 0);
e->curr = bin;
if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
@ -56,10 +59,34 @@ enc_destroy(Encoder* e)
ERL_NIF_TERM
enc_error(Encoder* e, const char* msg)
{
assert(0 && msg);
//assert(0 && msg);
return make_error(e->atoms, e->env, msg);
}
int
enc_ensure(Encoder* e, size_t req)
{
size_t new_sz;
if(req < e->curr->size - e->i) {
return 1;
}
new_sz = req - (e->curr->size - e->i) + e->curr->size;
new_sz += BIN_INC_SIZE - (new_sz % BIN_INC_SIZE);
assert(new_sz > e->curr->size && "Invalid size calculation.");
if(!enif_realloc_binary(e->curr, new_sz)) {
return 0;
}
e->p = (char*) e->curr->data;
e->u = (unsigned char*) e->curr->data;
memset(&(e->u[e->i]), 0, e->curr->size - e->i);
return 1;
}
int
enc_result(Encoder* e, ERL_NIF_TERM* value)
{
@ -75,25 +102,56 @@ enc_result(Encoder* e, ERL_NIF_TERM* value)
}
int
enc_ensure(Encoder* e, size_t req)
enc_done(Encoder* e, ERL_NIF_TERM* value)
{
size_t new_sz;
ERL_NIF_TERM last;
if(req < e->curr->size - e->i) {
return 1;
if(e->iolen == 0) {
return enc_result(e, value);
}
new_sz = req - (e->curr->size - e->i) + e->curr->size;
new_sz += BIN_INC_SIZE - (new_sz % BIN_INC_SIZE);
assert(new_sz > e->curr->size && "Invalid size calculation.");
if(!enif_realloc_binary(e->curr, new_sz)) {
if(e->i > 0 ) {
if(!enc_result(e, &last)) {
return 0;
}
e->iolist = enif_make_list_cell(e->env, last, e->iolist);
e->iolen++;
}
*value = e->iolist;
return 1;
}
int
enc_unknown(Encoder* e, ERL_NIF_TERM value)
{
ErlNifBinary* bin = e->curr;
ERL_NIF_TERM curr;
if(e->i > 0) {
if(!enc_result(e, &curr)) {
return 0;
}
e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
e->iolen++;
}
e->iolist = enif_make_list_cell(e->env, value, e->iolist);
e->iolen++;
// Reinitialize our binary for the next buffer.
e->curr = bin;
if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
return 0;
}
memset(e->curr->data, 0, e->curr->size);
e->p = (char*) e->curr->data;
e->u = (unsigned char*) e->curr->data;
memset(&(e->u[e->i]), 0, e->curr->size - e->i);
e->i = 0;
return 1;
}
@ -290,7 +348,7 @@ enc_double(Encoder* e, double val)
return 0;
}
snprintf(&(e->p[e->i]), 31, "%g", val);
snprintf(&(e->p[e->i]), 31, "%0.20g", val);
e->i += strlen(&(e->p[e->i]));
e->count++;
@ -363,7 +421,7 @@ encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
double dval;
long lval;
int has_unknown = 0;
int is_partial = 0;
if(argc != 1) {
return enif_make_badarg(env);
@ -540,24 +598,24 @@ encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
stack = enif_make_list_cell(env, item, stack);
} else {
has_unknown = 1;
ret = enc_error(e, "invalid_ejson");
goto done;
/*
if(!enc_unknown(env, curr)) {
is_partial = 1;
if(!enc_unknown(e, curr)) {
ret = enc_error(e, "internal_error");
goto done;
}
*/
}
} while(!enif_is_empty_list(env, stack));
if(!enc_result(e, &item)) {
if(!enc_done(e, &item)) {
ret = enc_error(e, "internal_error");
goto done;
}
ret = enif_make_tuple2(env, e->atoms->atom_ok, item);
if(!is_partial) {
ret = enif_make_tuple2(env, e->atoms->atom_ok, item);
} else {
ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
}
done:
enc_destroy(e);

+ 3
- 0
c_src/jiffy.c Переглянути файл

@ -14,6 +14,9 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
st->atom_true = make_atom(env, "true");
st->atom_false = make_atom(env, "false");
st->atom_bignum = make_atom(env, "bignum");
st->atom_bignum_e = make_atom(env, "bignum_e");
st->atom_bigdbl = make_atom(env, "bigdbl");
st->atom_partial = make_atom(env, "partial");
// Markers used in encoding
st->ref_object = make_atom(env, "$object_ref$");

+ 4
- 0
c_src/jiffy.h Переглянути файл

@ -10,6 +10,10 @@ typedef struct {
ERL_NIF_TERM atom_true;
ERL_NIF_TERM atom_false;
ERL_NIF_TERM atom_bignum;
ERL_NIF_TERM atom_bignum_e;
ERL_NIF_TERM atom_bigdbl;
ERL_NIF_TERM atom_partial;
ERL_NIF_TERM ref_object;
ERL_NIF_TERM ref_array;
} jiffy_st;

+ 52
- 22
src/jiffy.erl Переглянути файл

@ -6,41 +6,64 @@
decode(Data) ->
case nif_decode(Data) of
{bignum, EJson} ->
{ok, debignum(EJson)};
{partial, EJson} ->
{ok, finish_decode(EJson)};
Else ->
Else
end.
encode(Data) ->
nif_encode(Data).
nif_decode(_Data) ->
?NOT_LOADED.
nif_encode(_Data) ->
?NOT_LOADED.
case nif_encode(Data) of
{partial, IOData} ->
finish_encode(IOData, []);
Else ->
Else
end.
debignum({bignum, Value}) ->
finish_decode({bignum, Value}) ->
list_to_integer(binary_to_list(Value));
debignum({Pairs}) when is_list(Pairs) ->
debignum_obj(Pairs, []);
debignum(Vals) when is_list(Vals) ->
debignum_arr(Vals, []);
debignum(Val) ->
finish_decode({bignum_e, Value}) ->
{IVal, EVal} = case string:to_integer(binary_to_list(Value)) of
{I, [$e | ExpStr]} ->
{E, []} = string:to_integer(ExpStr),
{I, E};
{I, [$E | ExpStr]} ->
{E, []} = string:to_integer(ExpStr),
{I, E}
end,
IVal * math:pow(10, EVal);
finish_decode({bigdbl, Value}) ->
list_to_float(binary_to_list(Value));
finish_decode({Pairs}) when is_list(Pairs) ->
finish_decode_obj(Pairs, []);
finish_decode(Vals) when is_list(Vals) ->
finish_decode_arr(Vals, []);
finish_decode(Val) ->
Val.
debignum_obj([], Acc) ->
finish_decode_obj([], Acc) ->
{lists:reverse(Acc)};
debignum_obj([{K, V} | Pairs], Acc) ->
debignum_obj(Pairs, [{K, debignum(V)} | Acc]).
finish_decode_obj([{K, V} | Pairs], Acc) ->
finish_decode_obj(Pairs, [{K, finish_decode(V)} | Acc]).
debignum_arr([], Acc) ->
finish_decode_arr([], Acc) ->
lists:reverse(Acc);
debignum_arr([V | Vals], Acc) ->
debignum_arr(Vals, [debignum(V) | Acc]).
finish_decode_arr([V | Vals], Acc) ->
finish_decode_arr(Vals, [finish_decode(V) | Acc]).
finish_encode([], Acc) ->
%% No reverse! The NIF returned us
%% the pieces in reverse order.
{ok, Acc};
finish_encode([<<_/binary>>=B | Rest], Acc) ->
finish_encode(Rest, [B | Acc]);
finish_encode([Val | Rest], Acc) when is_integer(Val) ->
Bin = list_to_binary(integer_to_list(Val)),
finish_encode(Rest, [Bin | Acc]);
finish_encode(_, _) ->
{error, invalid_ejson}.
init() ->
@ -60,3 +83,10 @@ init() ->
not_loaded(Line) ->
exit({not_loaded, [{module, ?MODULE}, {line, Line}]}).
nif_decode(_Data) ->
?NOT_LOADED.
nif_encode(_Data) ->
?NOT_LOADED.

+ 28
- 7
test/003-numbers.t Переглянути файл

@ -4,7 +4,7 @@ main([]) ->
code:add_pathz("ebin"),
code:add_pathz("test"),
etap:plan(47),
etap:plan(57),
util:test_good(good()),
util:test_errors(errors()),
etap:end_tests().
@ -16,19 +16,40 @@ good() ->
{<<"1">>, 1},
{<<"12">>, 12},
{<<"-3">>, -3},
{<<"309230948234098">>, 309230948234098},
{<<"1234567890123456789012345">>, 1234567890123456789012345},
{
<<"1234567890123456789012345.0">>,
1.23456789012345678e24,
<<"1.2345678901234568245e+24">>
},
{
<<"1234567890123456789012345.0E3">>,
1.2345678901234569e27,
<<"1.2345678901234568502e+27">>
},
{
<<"1234567890123456789012345E2">>,
123456789012345678901234500,
<<"123456789012345678901234500">>
},
{
<<"0.000000000000000000000000000000000001">>,
1.0E-36,
<<"9.9999999999999994104e-37">>
},
{<<"1.0">>, 1.0, <<"1">>},
{<<"0.3">>, 0.3},
{<<"2.4234324">>, 2.4234324, <<"2.42343">>},
{<<"-3.1416">>, -3.1416},
{<<"0.75">>, 0.75},
{<<"2.0123456789">>, 2.0123456789, <<"2.0123456789000000455">>},
{<<"2.4234324E24">>, 2.4234324E24, <<"2.4234323999999998107e+24">>},
{<<"-3.1416">>, -3.1416, <<"-3.1415999999999999481">>},
{<<"1E4">>, 10000.0, <<"10000">>},
{<<"1.0E+01">>, 10.0, <<"10">>},
{<<"1e1">>, 10.0, <<"10">>},
{<<"3.0E2">>, 300.0, <<"300">>},
{<<"0E3">>, 0.0, <<"0">>},
{<<"1.5E3">>, 1500.0, <<"1500">>},
{<<"1.5E-1">>, 0.15, <<"0.15">>},
{<<"-0.323E+2">>, -32.3, <<"-32.3">>}
{<<"2.5E-1">>, 0.25, <<"0.25">>},
{<<"-0.325E+2">>, -32.5, <<"-32.5">>}
].
errors() ->

+ 1
- 1
test/005-arrays.t Переглянути файл

@ -15,7 +15,7 @@ good() ->
{<<"[\t[\n]\r]">>, [[]], <<"[[]]">>},
{<<"[\t123, \r true\n]">>, [123, true], <<"[123,true]">>},
{<<"[1,\"foo\"]">>, [1, <<"foo">>]},
{<<"[1199344435545.0,1]">>, [1199344435545.0,1], <<"[1.19934e+12,1]">>},
{<<"[11993444355.0,1]">>, [11993444355.0,1], <<"[11993444355,1]">>},
{
<<"[\"\\u00A1\",\"\\u00FC\"]">>,
[<<194, 161>>, <<195, 188>>],

+ 6
- 2
test/util.erl Переглянути файл

@ -13,15 +13,19 @@ ok_dec(J, _E) ->
ok_enc(E, _J) ->
lists:flatten(io_lib:format("Encoded ~p", [E])).
do_encode(E) ->
{ok, Data} = jiffy:encode(E),
{ok, iolist_to_binary(Data)}.
error_mesg(J) ->
lists:flatten(io_lib:format("Decoding ~p returns an error.", [J])).
check_good({J, E}) ->
etap:is(jiffy:decode(J), {ok, E}, ok_dec(J, E)),
etap:is(jiffy:encode(E), {ok, J}, ok_enc(E, J));
etap:is(do_encode(E), {ok, J}, ok_enc(E, J));
check_good({J, E, J2}) ->
etap:is(jiffy:decode(J), {ok, E}, ok_dec(J, E)),
etap:is(jiffy:encode(E), {ok, J2}, ok_enc(E, J2)).
etap:is(do_encode(E), {ok, J2}, ok_enc(E, J2)).
check_error(J) ->
etap:fun_is(

Завантаження…
Відмінити
Зберегти