From 33afb923166525bb30d5a937be7987d1d832b238 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 31 Oct 2011 16:07:39 -0500 Subject: [PATCH 1/3] Enforce Unicode constraints more strictly It was possible to pass some types of invalid UTF-8 through Jiffy's encoder. Specifically, if uescaping isn't used, values that would decode from 0xD800 to 0xDFFFF, 0xFFFE, 0xFFFF, and values greater than 0x10FFFF would not be flagged as invalid. Now they are. --- c_src/utf8.c | 16 ++++++++++++++++ test/004-strings.t | 7 ++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/c_src/utf8.c b/c_src/utf8.c index 198b3a8..230f631 100644 --- a/c_src/utf8.c +++ b/c_src/utf8.c @@ -132,6 +132,22 @@ utf8_validate(unsigned char* data, size_t size) if((data[0] & 0x07) + (data[1] & 0x30) == 0) return -1; } + + // Lastly we need to check some miscellaneous ranges for + // some of the larger code point values. + if(ulen >= 3) { + ui = utf8_to_unicode(data, ulen); + if(ui < 0) { + return -1; + } else if(ui >= 0xD800 && ui <= 0xDFFF) { + return -1; + } else if(ui == 0xFFFE || ui == 0xFFFF) { + return -1; + } else if(ui > 0x10FFFF) { + return -1; + } + } + return ulen; } diff --git a/test/004-strings.t b/test/004-strings.t index 6a69586..00d6d77 100755 --- a/test/004-strings.t +++ b/test/004-strings.t @@ -6,7 +6,7 @@ main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - etap:plan(78), + etap:plan(80), util:test_good(good()), util:test_good(uescaped(), [uescape]), util:test_errors(errors()), @@ -45,6 +45,7 @@ errors() -> <<"\"", 0, "\"">>, <<"\"\\g\"">>, <<"\"\\uFFFF\"">>, + <<"\"\\uFFFE\"">>, <<"\"\\uD834foo\\uDD1E\"">>, % CouchDB-345 <<34,78,69,73,77,69,78,32,70,216,82,82,32,70,65,69,78,33,34>> @@ -71,6 +72,10 @@ utf8_cases() -> % Stray continuation byte <<16#C2, 16#81, 16#80>>, <<"foo", 16#80, "bar">>, + + % Invalid Unicode code points + <<239, 191, 190>>, + <<237, 160, 129>>, % Not enough extension bytes <<16#C0>>, From e9b85b08c99f827e9a51d20ca66c201f8526e76e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 31 Oct 2011 16:23:46 -0500 Subject: [PATCH 2/3] Remove trailing whitespace --- c_src/decoder.c | 14 +++++++------- c_src/encoder.c | 20 ++++++++++---------- c_src/jiffy.c | 4 ++-- c_src/jiffy.h | 4 ++-- c_src/utf8.c | 10 +++++----- c_src/util.c | 2 +- src/jiffy.erl | 2 +- test/001-yajl-tests.t | 4 ++-- test/002-literals.t | 8 ++++---- test/003-numbers.t | 4 ++-- test/004-strings.t | 22 +++++++++++----------- test/005-arrays.t | 4 ++-- test/006-maps.t | 4 ++-- test/007-compound.t | 4 ++-- test/008-halfword.t | 6 +++--- test/cases/lonely_minus_sign.json | 2 +- test/etap.erl | 24 ++++++++++++------------ 17 files changed, 69 insertions(+), 69 deletions(-) diff --git a/c_src/decoder.c b/c_src/decoder.c index 3cef4cc..e8d3ac9 100644 --- a/c_src/decoder.c +++ b/c_src/decoder.c @@ -1,4 +1,4 @@ -// This file is part of Jiffy released under the MIT license. +// This file is part of Jiffy released under the MIT license. // See the LICENSE file for more information. #include @@ -76,7 +76,7 @@ dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin) d->st_data = (char*) enif_alloc(STACK_SIZE_INC * sizeof(char)); d->st_size = STACK_SIZE_INC; d->st_top = 0; - + for(i = 0; i < d->st_size; i++) { d->st_data[i] = st_invalid; } @@ -122,7 +122,7 @@ dec_push(Decoder* d, char val) int i; if(d->st_top >= d->st_size) { - new_sz = d->st_size + STACK_SIZE_INC; + new_sz = d->st_size + STACK_SIZE_INC; tmp = (char*) enif_alloc(new_sz * sizeof(char)); memcpy(tmp, d->st_data, d->st_size * sizeof(char)); enif_free(d->st_data); @@ -523,7 +523,7 @@ dec_number(Decoder* d, ERL_NIF_TERM* value) } parse: - + switch(state) { case nst_init: case nst_sign: @@ -554,7 +554,7 @@ parse: } } } - + if(!has_frac && !has_exp) { num_type = d->atoms->atom_bignum; } else if(has_exp) { @@ -604,7 +604,7 @@ decode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { Decoder dec; Decoder* d = &dec; - + ErlNifBinary bin; ERL_NIF_TERM objs = enif_make_list(env, 0); @@ -739,7 +739,7 @@ decode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) curr = enif_make_list_cell(env, val, curr); } break; - + case st_key: switch(d->p[d->i]) { case ' ': diff --git a/c_src/encoder.c b/c_src/encoder.c index c46c12d..a751fd7 100644 --- a/c_src/encoder.c +++ b/c_src/encoder.c @@ -1,4 +1,4 @@ -// This file is part of Jiffy released under the MIT license. +// This file is part of Jiffy released under the MIT license. // See the LICENSE file for more information. #include @@ -21,7 +21,7 @@ typedef struct { int iolen; ERL_NIF_TERM iolist; ErlNifBinary* curr; - + char* p; unsigned char* u; @@ -32,7 +32,7 @@ int enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin) { ERL_NIF_TERM val; - + e->env = env; e->atoms = enif_priv_data(env); e->uescape = 0; @@ -41,7 +41,7 @@ enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin) if(!enif_is_list(env, opts)) { return 0; } - + while(enif_get_list_cell(env, opts, &val, &opts)) { if(enif_compare(val, e->atoms->atom_uescape) == 0) { e->uescape = 1; @@ -148,7 +148,7 @@ enc_unknown(Encoder* e, ERL_NIF_TERM value) e->iolist = enif_make_list_cell(e->env, curr, e->iolist); e->iolen++; } - + e->iolist = enif_make_list_cell(e->env, value, e->iolist); e->iolen++; @@ -157,7 +157,7 @@ enc_unknown(Encoder* e, ERL_NIF_TERM value) if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) { return 0; } - + memset(e->curr->data, 0, e->curr->size); e->p = (char*) e->curr->data; @@ -176,7 +176,7 @@ enc_literal(Encoder* e, const char* literal, size_t len) memcpy(&(e->p[e->i]), literal, len); e->i += len; - e->count++; + e->count++; return 1; } @@ -305,13 +305,13 @@ enc_string(Encoder* e, ERL_NIF_TERM val) if(uval < 0) { return 0; } - + ulen = unicode_uescape(uval, &(e->p[e->i])); if(ulen < 0) { return 0; } e->i += ulen; - + ulen = utf8_len(uval); if(ulen < 0) { return 0; @@ -434,7 +434,7 @@ encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) if(argc != 2) { return enif_make_badarg(env); } - + if(!enc_init(e, env, argv[1], &bin)) { return enif_make_badarg(env); } diff --git a/c_src/jiffy.c b/c_src/jiffy.c index 9ab25a2..c1dd8b1 100644 --- a/c_src/jiffy.c +++ b/c_src/jiffy.c @@ -1,4 +1,4 @@ -// This file is part of Jiffy released under the MIT license. +// This file is part of Jiffy released under the MIT license. // See the LICENSE file for more information. #include "jiffy.h" @@ -10,7 +10,7 @@ load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) if(st == NULL) { return 1; } - + st->atom_ok = make_atom(env, "ok"); st->atom_error = make_atom(env, "error"); st->atom_null = make_atom(env, "null"); diff --git a/c_src/jiffy.h b/c_src/jiffy.h index 3f25df2..9b1f700 100644 --- a/c_src/jiffy.h +++ b/c_src/jiffy.h @@ -1,4 +1,4 @@ -// This file is part of Jiffy released under the MIT license. +// This file is part of Jiffy released under the MIT license. // See the LICENSE file for more information. #ifndef JIFFY_H @@ -17,7 +17,7 @@ typedef struct { ERL_NIF_TERM atom_bigdbl; ERL_NIF_TERM atom_partial; ERL_NIF_TERM atom_uescape; - + ERL_NIF_TERM ref_object; ERL_NIF_TERM ref_array; } jiffy_st; diff --git a/c_src/utf8.c b/c_src/utf8.c index 230f631..53fc1b5 100644 --- a/c_src/utf8.c +++ b/c_src/utf8.c @@ -1,4 +1,4 @@ -// This file is part of Jiffy released under the MIT license. +// This file is part of Jiffy released under the MIT license. // See the LICENSE file for more information. #include "jiffy.h" #include @@ -93,7 +93,7 @@ utf8_validate(unsigned char* data, size_t size) int ui; if((data[0] & 0x80) == 0x00) { - ulen = 1; + ulen = 1; } if((data[0] & 0xE0) == 0xC0) { ulen = 2; } else if((data[0] & 0xF0) == 0xE0) { @@ -104,12 +104,12 @@ utf8_validate(unsigned char* data, size_t size) if(ulen < 0 || ulen > size) { return -1; } - + // Check each continuation byte. for(ui = 1; ui < ulen; ui++) { if((data[ui] & 0xC0) != 0x80) return -1; } - + // Wikipedia says I have to check that a UTF-8 encoding // uses as few bits as possible. This means that we // can't do things like encode 't' in three bytes. @@ -120,7 +120,7 @@ utf8_validate(unsigned char* data, size_t size) // 2: 110xxxxy 10yyyyyy // 3: 1110xxxx 10xyyyyy 10yyyyyy // 4: 11110xxx 10xxyyyy 10yyyyyy 10yyyyyy - + // ulen == 1 passes by definition if(ulen == 2) { if((data[0] & 0x1E) == 0) diff --git a/c_src/util.c b/c_src/util.c index 44845a2..f1be3ec 100644 --- a/c_src/util.c +++ b/c_src/util.c @@ -1,4 +1,4 @@ -// This file is part of Jiffy released under the MIT license. +// This file is part of Jiffy released under the MIT license. // See the LICENSE file for more information. #include "jiffy.h" diff --git a/src/jiffy.erl b/src/jiffy.erl index fb080eb..7e7084e 100644 --- a/src/jiffy.erl +++ b/src/jiffy.erl @@ -1,4 +1,4 @@ -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. -module(jiffy). diff --git a/test/001-yajl-tests.t b/test/001-yajl-tests.t index 14529f9..c8db641 100755 --- a/test/001-yajl-tests.t +++ b/test/001-yajl-tests.t @@ -1,11 +1,11 @@ #! /usr/bin/env escript -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. main([]) -> code:add_pathz("test"), code:add_pathz("ebin"), - + Cases = read_cases(), etap:plan(length(Cases)), diff --git a/test/002-literals.t b/test/002-literals.t index 2cdf28f..8df7255 100755 --- a/test/002-literals.t +++ b/test/002-literals.t @@ -1,18 +1,18 @@ #! /usr/bin/env escript -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - + etap:plan(6), etap:is(jiffy:decode(<<"true">>), true, "DEC: true -> true"), etap:is(jiffy:encode(true), <<"true">>, "ENC: true -> true"), - + etap:is(jiffy:decode(<<"false">>), false, "DEC: false -> false"), etap:is(jiffy:encode(false), <<"false">>, "ENC: false -> false"), - + etap:is(jiffy:decode(<<"null">>), null, "DEC: null -> null"), etap:is(jiffy:encode(null), <<"null">>, "ENC: null -> null"), diff --git a/test/003-numbers.t b/test/003-numbers.t index a64fa6b..42d26fd 100755 --- a/test/003-numbers.t +++ b/test/003-numbers.t @@ -1,11 +1,11 @@ #! /usr/bin/env escript -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - + etap:plan(59), util:test_good(good()), util:test_errors(errors()), diff --git a/test/004-strings.t b/test/004-strings.t index 00d6d77..ff092cd 100755 --- a/test/004-strings.t +++ b/test/004-strings.t @@ -1,18 +1,18 @@ #! /usr/bin/env escript -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - + etap:plan(80), util:test_good(good()), util:test_good(uescaped(), [uescape]), util:test_errors(errors()), - + test_utf8(utf8_cases()), - + etap:end_tests(). good() -> @@ -76,13 +76,13 @@ utf8_cases() -> % Invalid Unicode code points <<239, 191, 190>>, <<237, 160, 129>>, - + % Not enough extension bytes <<16#C0>>, - + <<16#E0>>, <<16#E0, 16#80>>, - + <<16#F0>>, <<16#F0, 16#80>>, <<16#F0, 16#80, 16#80>>, @@ -91,7 +91,7 @@ utf8_cases() -> <<16#F8, 16#80>>, <<16#F8, 16#80, 16#80>>, <<16#F8, 16#80, 16#80, 16#80>>, - + <<16#FC>>, <<16#FC, 16#80>>, <<16#FC, 16#80, 16#80>>, @@ -101,16 +101,16 @@ utf8_cases() -> % No data in high bits. <<16#C0, 16#80>>, <<16#C1, 16#80>>, - + <<16#E0, 16#80, 16#80>>, <<16#E0, 16#90, 16#80>>, - + <<16#F0, 16#80, 16#80, 16#80>>, <<16#F0, 16#88, 16#80, 16#80>>, <<16#F8, 16#80, 16#80, 16#80, 16#80>>, <<16#F8, 16#84, 16#80, 16#80, 16#80>>, - + <<16#FC, 16#80, 16#80, 16#80, 16#80, 16#80>>, <<16#FC, 16#82, 16#80, 16#80, 16#80, 16#80>> ]. diff --git a/test/005-arrays.t b/test/005-arrays.t index 921c00a..53ffd2f 100755 --- a/test/005-arrays.t +++ b/test/005-arrays.t @@ -1,11 +1,11 @@ #! /usr/bin/env escript -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - + etap:plan(18), util:test_good(good()), util:test_errors(errors()), diff --git a/test/006-maps.t b/test/006-maps.t index 37aece4..45e715c 100755 --- a/test/006-maps.t +++ b/test/006-maps.t @@ -1,11 +1,11 @@ #! /usr/bin/env escript -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - + etap:plan(15), util:test_good(good()), util:test_errors(errors()), diff --git a/test/007-compound.t b/test/007-compound.t index 6964f47..2770971 100755 --- a/test/007-compound.t +++ b/test/007-compound.t @@ -1,11 +1,11 @@ #! /usr/bin/env escript -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - + etap:plan(12), util:test_good(good()), util:test_errors(errors()), diff --git a/test/008-halfword.t b/test/008-halfword.t index 0152812..56f0439 100755 --- a/test/008-halfword.t +++ b/test/008-halfword.t @@ -1,13 +1,13 @@ #! /usr/bin/env escript -% This file is part of Jiffy released under the MIT license. +% This file is part of Jiffy released under the MIT license. % See the LICENSE file for more information. main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - + etap:plan(unknown), - + etap:is(jiffy:decode(<<"1">>) =:= 1, true, "1 =:= 1"), etap:is(jiffy:decode(<<"1">>) == 1, true, "1 == 1"), diff --git a/test/cases/lonely_minus_sign.json b/test/cases/lonely_minus_sign.json index 85f69bd..c343683 100644 --- a/test/cases/lonely_minus_sign.json +++ b/test/cases/lonely_minus_sign.json @@ -2,6 +2,6 @@ "foo", true, true, "blue", "baby where are you?", "oh boo hoo!", - - + - ] diff --git a/test/etap.erl b/test/etap.erl index 82e0cfe..6924d09 100644 --- a/test/etap.erl +++ b/test/etap.erl @@ -1,5 +1,5 @@ %% Copyright (c) 2008-2009 Nick Gerakines -%% +%% %% Permission is hereby granted, free of charge, to any person %% obtaining a copy of this software and associated documentation %% files (the "Software"), to deal in the Software without @@ -8,10 +8,10 @@ %% copies of the Software, and to permit persons to whom the %% Software is furnished to do so, subject to the following %% conditions: -%% +%% %% The above copyright notice and this permission notice shall be %% included in all copies or substantial portions of the Software. -%% +%% %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, %% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES %% OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -20,7 +20,7 @@ %% WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING %% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR %% OTHER DEALINGS IN THE SOFTWARE. -%% +%% %% @author Nick Gerakines [http://socklabs.com/] %% @author Jeremy Wall %% @version 0.3.4 @@ -32,14 +32,14 @@ %% @todo Explain in documentation why we use a process to handle test input. %% @doc etap is a TAP testing module for Erlang components and applications. %% This module allows developers to test their software using the TAP method. -%% +%% %%

%% TAP, the Test Anything Protocol, is a simple text-based interface between %% testing modules in a test harness. TAP started life as part of the test %% harness for Perl but now has implementations in C/C++, Python, PHP, Perl %% and probably others by the time you read this. %%

-%% +%% %% The testing process begins by defining a plan using etap:plan/1, running %% a number of etap tests and then calling eta:end_tests/0. Please refer to %% the Erlang modules in the t directory of this project for example tests. @@ -335,7 +335,7 @@ skip(TestFun, Reason) when is_function(TestFun), is_list(Reason) -> ok. %% @spec skip(Q, TestFun, Reason) -> ok -%% Q = true | false | function() +%% Q = true | false | function() %% TestFun = function() %% Reason = string() %% @doc Skips a test conditionally. The first argument to this function can @@ -551,7 +551,7 @@ test_server(State) -> count = State#test_state.count + 1, pass = State#test_state.pass + 1 }; - + {_From, fail, Desc} -> FullMessage = skip_diag( " - " ++ Desc, @@ -587,11 +587,11 @@ mk_tap(Result, Desc) -> case [IsSkip, Result] of [_, true] -> etap_server ! {self(), pass, Desc}, - true; - [1, _] -> + true; + [1, _] -> etap_server ! {self(), pass, Desc}, - true; - _ -> + true; + _ -> etap_server ! {self(), fail, Desc}, false end. From b30cd375f5b00cbd01f8bbdcb1d1715b0f55ba9a Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 31 Oct 2011 16:24:27 -0500 Subject: [PATCH 3/3] Fix test plan for 004-strings.t --- test/004-strings.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/004-strings.t b/test/004-strings.t index ff092cd..8396bbd 100755 --- a/test/004-strings.t +++ b/test/004-strings.t @@ -6,7 +6,7 @@ main([]) -> code:add_pathz("ebin"), code:add_pathz("test"), - etap:plan(80), + etap:plan(83), util:test_good(good()), util:test_good(uescaped(), [uescape]), util:test_errors(errors()),