Browse Source

Enforce Unicode constraints more strictly

It was possible to pass some types of invalid UTF-8 through Jiffy's
encoder. Specifically, if uescaping isn't used, values that would decode
from 0xD800 to 0xDFFFF, 0xFFFE, 0xFFFF, and values greater than 0x10FFFF
would not be flagged as invalid. Now they are.
pull/8/merge
Paul J. Davis 13 years ago
parent
commit
33afb92316
2 changed files with 22 additions and 1 deletions
  1. +16
    -0
      c_src/utf8.c
  2. +6
    -1
      test/004-strings.t

+ 16
- 0
c_src/utf8.c View File

@ -132,6 +132,22 @@ utf8_validate(unsigned char* data, size_t size)
if((data[0] & 0x07) + (data[1] & 0x30) == 0) if((data[0] & 0x07) + (data[1] & 0x30) == 0)
return -1; return -1;
} }
// Lastly we need to check some miscellaneous ranges for
// some of the larger code point values.
if(ulen >= 3) {
ui = utf8_to_unicode(data, ulen);
if(ui < 0) {
return -1;
} else if(ui >= 0xD800 && ui <= 0xDFFF) {
return -1;
} else if(ui == 0xFFFE || ui == 0xFFFF) {
return -1;
} else if(ui > 0x10FFFF) {
return -1;
}
}
return ulen; return ulen;
} }

+ 6
- 1
test/004-strings.t View File

@ -6,7 +6,7 @@ main([]) ->
code:add_pathz("ebin"), code:add_pathz("ebin"),
code:add_pathz("test"), code:add_pathz("test"),
etap:plan(78),
etap:plan(80),
util:test_good(good()), util:test_good(good()),
util:test_good(uescaped(), [uescape]), util:test_good(uescaped(), [uescape]),
util:test_errors(errors()), util:test_errors(errors()),
@ -45,6 +45,7 @@ errors() ->
<<"\"", 0, "\"">>, <<"\"", 0, "\"">>,
<<"\"\\g\"">>, <<"\"\\g\"">>,
<<"\"\\uFFFF\"">>, <<"\"\\uFFFF\"">>,
<<"\"\\uFFFE\"">>,
<<"\"\\uD834foo\\uDD1E\"">>, <<"\"\\uD834foo\\uDD1E\"">>,
% CouchDB-345 % CouchDB-345
<<34,78,69,73,77,69,78,32,70,216,82,82,32,70,65,69,78,33,34>> <<34,78,69,73,77,69,78,32,70,216,82,82,32,70,65,69,78,33,34>>
@ -71,6 +72,10 @@ utf8_cases() ->
% Stray continuation byte % Stray continuation byte
<<16#C2, 16#81, 16#80>>, <<16#C2, 16#81, 16#80>>,
<<"foo", 16#80, "bar">>, <<"foo", 16#80, "bar">>,
% Invalid Unicode code points
<<239, 191, 190>>,
<<237, 160, 129>>,
% Not enough extension bytes % Not enough extension bytes
<<16#C0>>, <<16#C0>>,

Loading…
Cancel
Save