diff --git a/c_src/decoder.c b/c_src/decoder.c index 7968c39..38ae858 100644 --- a/c_src/decoder.c +++ b/c_src/decoder.c @@ -11,7 +11,7 @@ #define ERROR(i, msg) make_error(st, env, msg) #define STACK_SIZE_INC 64 -#define NUM_BUF_LEN 256 +#define NUM_BUF_LEN 32 enum { st_value=0, @@ -42,7 +42,7 @@ typedef struct { ERL_NIF_TERM arg; ErlNifBinary bin; - int has_bignum; + int is_partial; char* p; unsigned char* u; @@ -63,7 +63,7 @@ dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin) d->atoms = enif_priv_data(env); d->arg = arg; - d->has_bignum = 0; + d->is_partial = 0; d->p = (char*) bin->data; d->u = bin->data; @@ -353,10 +353,12 @@ parse: int dec_number(Decoder* d, ERL_NIF_TERM* value) { + ERL_NIF_TERM num_type = d->atoms->atom_error; char state = nst_init; char nbuf[NUM_BUF_LEN]; int st = d->i; - int is_double = 0; + int has_frac = 0; + int has_exp = 0; double dval; long lval; @@ -457,7 +459,7 @@ dec_number(Decoder* d, ERL_NIF_TERM* value) break; case nst_frac1: - is_double = 1; + has_frac = 1; switch(d->p[d->i]) { case '0': case '1': @@ -502,7 +504,7 @@ dec_number(Decoder* d, ERL_NIF_TERM* value) break; case nst_esign: - is_double = 1; + has_exp = 1; switch(d->p[d->i]) { case '-': case '+': @@ -560,37 +562,38 @@ parse: break; } + errno = 0; - if(st - d->i > NUM_BUF_LEN && is_double) { - return 0; - } else if(st - d->i > NUM_BUF_LEN) { - d->has_bignum = 1; - *value = enif_make_sub_binary(d->env, d->arg, st, d->i - st); - *value = enif_make_tuple2(d->env, d->atoms->atom_bignum, *value); - return 1; - } - - memset(nbuf, 0, NUM_BUF_LEN); - memcpy(nbuf, &(d->p[st]), d->i - st); + if(d->i - st < NUM_BUF_LEN) { + memset(nbuf, 0, NUM_BUF_LEN); + memcpy(nbuf, &(d->p[st]), d->i - st); - errno = 0; - if(is_double) { - dval = strtod(nbuf, NULL); - if(errno == ERANGE) { - return 0; + if(has_frac || has_exp) { + dval = strtod(nbuf, NULL); + if(errno != ERANGE) { + *value = enif_make_double(d->env, dval); + return 1; + } + } else { + lval = strtol(nbuf, NULL, 10); + if(errno != ERANGE) { + *value = enif_make_int64(d->env, lval); + return 1; + } } - *value = enif_make_double(d->env, dval); - return 1; } - - lval = strtol(nbuf, NULL, 10); - if(errno == ERANGE) { - d->has_bignum = 1; - *value = enif_make_sub_binary(d->env, d->arg, st, d->i - st); - *value = enif_make_tuple2(d->env, d->atoms->atom_bignum, *value); + + if(!has_frac && !has_exp) { + num_type = d->atoms->atom_bignum; + } else if(has_exp) { + num_type = d->atoms->atom_bignum_e; } else { - *value = enif_make_int64(d->env, lval); + num_type = d->atoms->atom_bigdbl; } + + d->is_partial = 1; + *value = enif_make_sub_binary(d->env, d->arg, st, d->i - st); + *value = enif_make_tuple2(d->env, num_type, *value); return 1; } @@ -921,8 +924,8 @@ decode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) if(dec_curr(d) != st_done) { ret = dec_error(d, "truncated_json"); - } else if(d->has_bignum) { - ret = enif_make_tuple2(env, d->atoms->atom_bignum, val); + } else if(d->is_partial) { + ret = enif_make_tuple2(env, d->atoms->atom_partial, val); } else { ret = enif_make_tuple2(env, d->atoms->atom_ok, val); } diff --git a/c_src/encoder.c b/c_src/encoder.c index 67c87c5..b00784c 100644 --- a/c_src/encoder.c +++ b/c_src/encoder.c @@ -14,9 +14,11 @@ typedef struct { int count; + int iolen; ERL_NIF_TERM iolist; ErlNifBinary* curr; + char* p; unsigned char* u; size_t i; @@ -30,6 +32,7 @@ enc_init(Encoder* e, ErlNifEnv* env, ErlNifBinary* bin) e->count = 0; + e->iolen = 0; e->iolist = enif_make_list(env, 0); e->curr = bin; if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) { @@ -56,10 +59,34 @@ enc_destroy(Encoder* e) ERL_NIF_TERM enc_error(Encoder* e, const char* msg) { - assert(0 && msg); + //assert(0 && msg); return make_error(e->atoms, e->env, msg); } +int +enc_ensure(Encoder* e, size_t req) +{ + size_t new_sz; + + if(req < e->curr->size - e->i) { + return 1; + } + + new_sz = req - (e->curr->size - e->i) + e->curr->size; + new_sz += BIN_INC_SIZE - (new_sz % BIN_INC_SIZE); + assert(new_sz > e->curr->size && "Invalid size calculation."); + + if(!enif_realloc_binary(e->curr, new_sz)) { + return 0; + } + e->p = (char*) e->curr->data; + e->u = (unsigned char*) e->curr->data; + + memset(&(e->u[e->i]), 0, e->curr->size - e->i); + + return 1; +} + int enc_result(Encoder* e, ERL_NIF_TERM* value) { @@ -75,25 +102,56 @@ enc_result(Encoder* e, ERL_NIF_TERM* value) } int -enc_ensure(Encoder* e, size_t req) +enc_done(Encoder* e, ERL_NIF_TERM* value) { - size_t new_sz; + ERL_NIF_TERM last; - if(req < e->curr->size - e->i) { - return 1; + if(e->iolen == 0) { + return enc_result(e, value); } - new_sz = req - (e->curr->size - e->i) + e->curr->size; - new_sz += BIN_INC_SIZE - (new_sz % BIN_INC_SIZE); - assert(new_sz > e->curr->size && "Invalid size calculation."); - - if(!enif_realloc_binary(e->curr, new_sz)) { + if(e->i > 0 ) { + if(!enc_result(e, &last)) { + return 0; + } + + e->iolist = enif_make_list_cell(e->env, last, e->iolist); + e->iolen++; + } + + *value = e->iolist; + return 1; +} + +int +enc_unknown(Encoder* e, ERL_NIF_TERM value) +{ + ErlNifBinary* bin = e->curr; + ERL_NIF_TERM curr; + + if(e->i > 0) { + if(!enc_result(e, &curr)) { + return 0; + } + + e->iolist = enif_make_list_cell(e->env, curr, e->iolist); + e->iolen++; + } + + e->iolist = enif_make_list_cell(e->env, value, e->iolist); + e->iolen++; + + // Reinitialize our binary for the next buffer. + e->curr = bin; + if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) { return 0; } + + memset(e->curr->data, 0, e->curr->size); + e->p = (char*) e->curr->data; e->u = (unsigned char*) e->curr->data; - - memset(&(e->u[e->i]), 0, e->curr->size - e->i); + e->i = 0; return 1; } @@ -290,7 +348,7 @@ enc_double(Encoder* e, double val) return 0; } - snprintf(&(e->p[e->i]), 31, "%g", val); + snprintf(&(e->p[e->i]), 31, "%0.20g", val); e->i += strlen(&(e->p[e->i])); e->count++; @@ -363,7 +421,7 @@ encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) double dval; long lval; - int has_unknown = 0; + int is_partial = 0; if(argc != 1) { return enif_make_badarg(env); @@ -540,24 +598,24 @@ encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) stack = enif_make_list_cell(env, e->atoms->ref_array, stack); stack = enif_make_list_cell(env, item, stack); } else { - has_unknown = 1; - ret = enc_error(e, "invalid_ejson"); - goto done; - /* - if(!enc_unknown(env, curr)) { + is_partial = 1; + if(!enc_unknown(e, curr)) { ret = enc_error(e, "internal_error"); goto done; } - */ } } while(!enif_is_empty_list(env, stack)); - if(!enc_result(e, &item)) { + if(!enc_done(e, &item)) { ret = enc_error(e, "internal_error"); goto done; } - ret = enif_make_tuple2(env, e->atoms->atom_ok, item); + if(!is_partial) { + ret = enif_make_tuple2(env, e->atoms->atom_ok, item); + } else { + ret = enif_make_tuple2(env, e->atoms->atom_partial, item); + } done: enc_destroy(e); diff --git a/c_src/jiffy.c b/c_src/jiffy.c index fd67e62..3b737f8 100644 --- a/c_src/jiffy.c +++ b/c_src/jiffy.c @@ -14,6 +14,9 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) st->atom_true = make_atom(env, "true"); st->atom_false = make_atom(env, "false"); st->atom_bignum = make_atom(env, "bignum"); + st->atom_bignum_e = make_atom(env, "bignum_e"); + st->atom_bigdbl = make_atom(env, "bigdbl"); + st->atom_partial = make_atom(env, "partial"); // Markers used in encoding st->ref_object = make_atom(env, "$object_ref$"); diff --git a/c_src/jiffy.h b/c_src/jiffy.h index 7fa0b81..1844312 100644 --- a/c_src/jiffy.h +++ b/c_src/jiffy.h @@ -10,6 +10,10 @@ typedef struct { ERL_NIF_TERM atom_true; ERL_NIF_TERM atom_false; ERL_NIF_TERM atom_bignum; + ERL_NIF_TERM atom_bignum_e; + ERL_NIF_TERM atom_bigdbl; + ERL_NIF_TERM atom_partial; + ERL_NIF_TERM ref_object; ERL_NIF_TERM ref_array; } jiffy_st; diff --git a/src/jiffy.erl b/src/jiffy.erl index 144980b..e313bbd 100644 --- a/src/jiffy.erl +++ b/src/jiffy.erl @@ -6,41 +6,64 @@ decode(Data) -> case nif_decode(Data) of - {bignum, EJson} -> - {ok, debignum(EJson)}; + {partial, EJson} -> + {ok, finish_decode(EJson)}; Else -> Else end. encode(Data) -> - nif_encode(Data). - - -nif_decode(_Data) -> - ?NOT_LOADED. - -nif_encode(_Data) -> - ?NOT_LOADED. + case nif_encode(Data) of + {partial, IOData} -> + finish_encode(IOData, []); + Else -> + Else + end. -debignum({bignum, Value}) -> +finish_decode({bignum, Value}) -> list_to_integer(binary_to_list(Value)); -debignum({Pairs}) when is_list(Pairs) -> - debignum_obj(Pairs, []); -debignum(Vals) when is_list(Vals) -> - debignum_arr(Vals, []); -debignum(Val) -> +finish_decode({bignum_e, Value}) -> + {IVal, EVal} = case string:to_integer(binary_to_list(Value)) of + {I, [$e | ExpStr]} -> + {E, []} = string:to_integer(ExpStr), + {I, E}; + {I, [$E | ExpStr]} -> + {E, []} = string:to_integer(ExpStr), + {I, E} + end, + IVal * math:pow(10, EVal); +finish_decode({bigdbl, Value}) -> + list_to_float(binary_to_list(Value)); +finish_decode({Pairs}) when is_list(Pairs) -> + finish_decode_obj(Pairs, []); +finish_decode(Vals) when is_list(Vals) -> + finish_decode_arr(Vals, []); +finish_decode(Val) -> Val. -debignum_obj([], Acc) -> +finish_decode_obj([], Acc) -> {lists:reverse(Acc)}; -debignum_obj([{K, V} | Pairs], Acc) -> - debignum_obj(Pairs, [{K, debignum(V)} | Acc]). +finish_decode_obj([{K, V} | Pairs], Acc) -> + finish_decode_obj(Pairs, [{K, finish_decode(V)} | Acc]). -debignum_arr([], Acc) -> +finish_decode_arr([], Acc) -> lists:reverse(Acc); -debignum_arr([V | Vals], Acc) -> - debignum_arr(Vals, [debignum(V) | Acc]). +finish_decode_arr([V | Vals], Acc) -> + finish_decode_arr(Vals, [finish_decode(V) | Acc]). + + +finish_encode([], Acc) -> + %% No reverse! The NIF returned us + %% the pieces in reverse order. + {ok, Acc}; +finish_encode([<<_/binary>>=B | Rest], Acc) -> + finish_encode(Rest, [B | Acc]); +finish_encode([Val | Rest], Acc) when is_integer(Val) -> + Bin = list_to_binary(integer_to_list(Val)), + finish_encode(Rest, [Bin | Acc]); +finish_encode(_, _) -> + {error, invalid_ejson}. init() -> @@ -60,3 +83,10 @@ init() -> not_loaded(Line) -> exit({not_loaded, [{module, ?MODULE}, {line, Line}]}). + +nif_decode(_Data) -> + ?NOT_LOADED. + +nif_encode(_Data) -> + ?NOT_LOADED. + diff --git a/test/003-numbers.t b/test/003-numbers.t index a596725..a854ae4 100755 --- a/test/003-numbers.t +++ b/test/003-numbers.t @@ -4,7 +4,7 @@ main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - etap:plan(47), + etap:plan(57), util:test_good(good()), util:test_errors(errors()), etap:end_tests(). @@ -16,19 +16,40 @@ good() -> {<<"1">>, 1}, {<<"12">>, 12}, {<<"-3">>, -3}, - {<<"309230948234098">>, 309230948234098}, + {<<"1234567890123456789012345">>, 1234567890123456789012345}, + { + <<"1234567890123456789012345.0">>, + 1.23456789012345678e24, + <<"1.2345678901234568245e+24">> + }, + { + <<"1234567890123456789012345.0E3">>, + 1.2345678901234569e27, + <<"1.2345678901234568502e+27">> + }, + { + <<"1234567890123456789012345E2">>, + 123456789012345678901234500, + <<"123456789012345678901234500">> + }, + { + <<"0.000000000000000000000000000000000001">>, + 1.0E-36, + <<"9.9999999999999994104e-37">> + }, {<<"1.0">>, 1.0, <<"1">>}, - {<<"0.3">>, 0.3}, - {<<"2.4234324">>, 2.4234324, <<"2.42343">>}, - {<<"-3.1416">>, -3.1416}, + {<<"0.75">>, 0.75}, + {<<"2.0123456789">>, 2.0123456789, <<"2.0123456789000000455">>}, + {<<"2.4234324E24">>, 2.4234324E24, <<"2.4234323999999998107e+24">>}, + {<<"-3.1416">>, -3.1416, <<"-3.1415999999999999481">>}, {<<"1E4">>, 10000.0, <<"10000">>}, {<<"1.0E+01">>, 10.0, <<"10">>}, {<<"1e1">>, 10.0, <<"10">>}, {<<"3.0E2">>, 300.0, <<"300">>}, {<<"0E3">>, 0.0, <<"0">>}, {<<"1.5E3">>, 1500.0, <<"1500">>}, - {<<"1.5E-1">>, 0.15, <<"0.15">>}, - {<<"-0.323E+2">>, -32.3, <<"-32.3">>} + {<<"2.5E-1">>, 0.25, <<"0.25">>}, + {<<"-0.325E+2">>, -32.5, <<"-32.5">>} ]. errors() -> diff --git a/test/005-arrays.t b/test/005-arrays.t index 0a0dd5e..78120a4 100755 --- a/test/005-arrays.t +++ b/test/005-arrays.t @@ -15,7 +15,7 @@ good() -> {<<"[\t[\n]\r]">>, [[]], <<"[[]]">>}, {<<"[\t123, \r true\n]">>, [123, true], <<"[123,true]">>}, {<<"[1,\"foo\"]">>, [1, <<"foo">>]}, - {<<"[1199344435545.0,1]">>, [1199344435545.0,1], <<"[1.19934e+12,1]">>}, + {<<"[11993444355.0,1]">>, [11993444355.0,1], <<"[11993444355,1]">>}, { <<"[\"\\u00A1\",\"\\u00FC\"]">>, [<<194, 161>>, <<195, 188>>], diff --git a/test/util.erl b/test/util.erl index fecc49b..0a9fa36 100644 --- a/test/util.erl +++ b/test/util.erl @@ -13,15 +13,19 @@ ok_dec(J, _E) -> ok_enc(E, _J) -> lists:flatten(io_lib:format("Encoded ~p", [E])). +do_encode(E) -> + {ok, Data} = jiffy:encode(E), + {ok, iolist_to_binary(Data)}. + error_mesg(J) -> lists:flatten(io_lib:format("Decoding ~p returns an error.", [J])). check_good({J, E}) -> etap:is(jiffy:decode(J), {ok, E}, ok_dec(J, E)), - etap:is(jiffy:encode(E), {ok, J}, ok_enc(E, J)); + etap:is(do_encode(E), {ok, J}, ok_enc(E, J)); check_good({J, E, J2}) -> etap:is(jiffy:decode(J), {ok, E}, ok_dec(J, E)), - etap:is(jiffy:encode(E), {ok, J2}, ok_enc(E, J2)). + etap:is(do_encode(E), {ok, J2}, ok_enc(E, J2)). check_error(J) -> etap:fun_is(