Browse Source

Add `dedupe_keys` option

You can no optionally request that keys are deduplicate inside of Jiffy
instead of having to perform that operation in Erlang.
pull/158/head
Paul J. Davis 7 years ago
parent
commit
128811a7cf
5 changed files with 132 additions and 37 deletions
  1. +6
    -37
      c_src/decoder.c
  2. +1
    -0
      c_src/jiffy.c
  3. +4
    -0
      c_src/jiffy.h
  4. +71
    -0
      c_src/objects.cc
  5. +50
    -0
      test/jiffy_16_dedupe_keys_tests.erl

+ 6
- 37
c_src/decoder.c View File

@ -53,6 +53,7 @@ typedef struct {
int is_partial;
int return_maps;
int return_trailer;
int dedupe_keys;
ERL_NIF_TERM null_term;
char* p;
@ -82,6 +83,7 @@ dec_new(ErlNifEnv* env)
d->is_partial = 0;
d->return_maps = 0;
d->return_trailer = 0;
d->dedupe_keys = 0;
d->null_term = d->atoms->atom_null;
d->p = NULL;
@ -623,42 +625,6 @@ make_empty_object(ErlNifEnv* env, int ret_map)
return enif_make_tuple1(env, enif_make_list(env, 0));
}
int
make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, int ret_map)
{
ERL_NIF_TERM ret;
ERL_NIF_TERM key;
ERL_NIF_TERM val;
#if MAP_TYPE_PRESENT
if(ret_map) {
ret = enif_make_new_map(env);
while(enif_get_list_cell(env, pairs, &val, &pairs)) {
if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
assert(0 == 1 && "Unbalanced object pairs.");
}
if(!enif_make_map_put(env, ret, key, val, &ret)) {
return 0;
}
}
*out = ret;
return 1;
}
#endif
ret = enif_make_list(env, 0);
while(enif_get_list_cell(env, pairs, &val, &pairs)) {
if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
assert(0 == 1 && "Unbalanced object pairs.");
}
val = enif_make_tuple2(env, key, val);
ret = enif_make_list_cell(env, val, ret);
}
*out = enif_make_tuple1(env, ret);
return 1;
}
ERL_NIF_TERM
make_array(ErlNifEnv* env, ERL_NIF_TERM list)
{
@ -716,6 +682,8 @@ decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
#endif
} else if(enif_compare(val, d->atoms->atom_return_trailer) == 0) {
d->return_trailer = 1;
} else if(enif_compare(val, d->atoms->atom_dedupe_keys) == 0) {
d->dedupe_keys = 1;
} else if(enif_compare(val, d->atoms->atom_use_nil) == 0) {
d->null_term = d->atoms->atom_nil;
} else if(get_null_term(env, val, &(d->null_term))) {
@ -984,7 +952,8 @@ decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
}
dec_pop(d, st_object);
dec_pop(d, st_value);
if(!make_object(env, curr, &val, d->return_maps)) {
if(!make_object(env, curr, &val,
d->return_maps, d->dedupe_keys)) {
ret = dec_error(d, "internal_object_error");
goto done;
}

+ 1
- 0
c_src/jiffy.c View File

@ -32,6 +32,7 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
st->atom_use_nil = make_atom(env, "use_nil");
st->atom_null_term = make_atom(env, "null_term");
st->atom_escape_forward_slashes = make_atom(env, "escape_forward_slashes");
st->atom_dedupe_keys = make_atom(env, "dedupe_keys");
// Markers used in encoding
st->ref_object = make_atom(env, "$object_ref$");

+ 4
- 0
c_src/jiffy.h View File

@ -35,6 +35,7 @@ typedef struct {
ERL_NIF_TERM atom_use_nil;
ERL_NIF_TERM atom_null_term;
ERL_NIF_TERM atom_escape_forward_slashes;
ERL_NIF_TERM atom_dedupe_keys;
ERL_NIF_TERM ref_object;
ERL_NIF_TERM ref_array;
@ -61,6 +62,9 @@ ERL_NIF_TERM encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
void dec_destroy(ErlNifEnv* env, void* obj);
void enc_destroy(ErlNifEnv* env, void* obj);
int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
int ret_map, int dedupe_keys);
int int_from_hex(const unsigned char* p);
int int_to_hex(int val, char* p);
int utf8_len(int c);

+ 71
- 0
c_src/objects.cc View File

@ -0,0 +1,71 @@
// This file is part of Jiffy released under the MIT license.
// See the LICENSE file for more information.
#include <set>
#include <string>
#include <assert.h>
#include "erl_nif.h"
#define MAP_TYPE_PRESENT \
((ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION >= 6) \
|| (ERL_NIF_MAJOR_VERSION > 2))
#define BEGIN_C extern "C" {
#define END_C }
BEGIN_C
int
make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out,
int ret_map, int dedupe_keys)
{
ERL_NIF_TERM ret;
ERL_NIF_TERM key;
ERL_NIF_TERM val;
#if MAP_TYPE_PRESENT
if(ret_map) {
ret = enif_make_new_map(env);
while(enif_get_list_cell(env, pairs, &val, &pairs)) {
if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
assert(0 == 1 && "Unbalanced object pairs.");
}
if(!enif_make_map_put(env, ret, key, val, &ret)) {
return 0;
}
}
*out = ret;
return 1;
}
#endif
std::set<std::string> seen;
ret = enif_make_list(env, 0);
while(enif_get_list_cell(env, pairs, &val, &pairs)) {
if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
assert(0 == 1 && "Unbalanced object pairs.");
}
if(dedupe_keys) {
ErlNifBinary bin;
if(!enif_inspect_binary(env, key, &bin)) {
return 0;
}
std::string skey((char*) bin.data, bin.size);
if(seen.count(skey) == 0) {
seen.insert(skey);
val = enif_make_tuple2(env, key, val);
ret = enif_make_list_cell(env, val, ret);
}
} else {
val = enif_make_tuple2(env, key, val);
ret = enif_make_list_cell(env, val, ret);
}
}
*out = enif_make_tuple1(env, ret);
return 1;
}
END_C

+ 50
- 0
test/jiffy_16_dedupe_keys_tests.erl View File

@ -0,0 +1,50 @@
% This file is part of Jiffy released under the MIT license.
% See the LICENSE file for more information.
-module(jiffy_16_dedupe_keys_tests).
-include_lib("eunit/include/eunit.hrl").
dedupe_keys_test_() ->
Opts = [dedupe_keys],
Cases = [
% Simple sanity check
{
{[{<<"foo">>, 1}]},
{[{<<"foo">>, 1}]}
},
% Basic test
{
{[{<<"foo">>, 1}, {<<"foo">>, 2}]},
{[{<<"foo">>, 2}]}
},
% Non-repeated keys are fine
{
{[{<<"foo">>, 1}, {<<"bar">>, 2}]},
{[{<<"foo">>, 1}, {<<"bar">>, 2}]}
},
% Key order stays the same other than deduped keys
{
{[{<<"bar">>, 1}, {<<"foo">>, 2}, {<<"baz">>, 3}, {<<"foo">>, 4}]},
{[{<<"bar">>, 1}, {<<"baz">>, 3}, {<<"foo">>, 4}]}
},
% Multiple repeats are handled
{
{[{<<"foo">>, 1}, {<<"foo">>, 2}, {<<"foo">>, 3}]},
{[{<<"foo">>, 3}]}
},
% Embedded NULL bytes are handled
{
{[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000baz">>, 2}]},
{[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000baz">>, 2}]}
},
% Can dedupe with embedded NULL bytes
{
{[{<<"foo\\u0000bar">>, 1}, {<<"foo\\u0000bar">>, 2}]},
{[{<<"foo\\u0000bar">>, 2}]}
}
],
{"Test dedupe_keys", lists:map(fun({Data, Result}) ->
Json = jiffy:encode(Data),
?_assertEqual(Result, jiffy:decode(Json, Opts))
end, Cases)}.

Loading…
Cancel
Save