From 137d3d94b6ee10001d761d412cbbe7f665680c98 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 12 Nov 2014 18:46:06 -0600 Subject: [PATCH] Account for char possibly being unsigned This sounds rather insane to me but I've managed to show that `(char) -1` is converted to 255 on some platforms. This was reproduced on ppc64el via Qemu on OS X. A simple program that does `fprintf(stderr, "%d\r\n", (char) -1);` prints 255 to the console. Rather than rely on the signedness of a char I've just updated things to use an unsigned char (which hopefully is never signed) and replaced -1 with 255 for the sentinel value when converting hex values. Thanks to Balint Reczey (@rbalint) for the report. Fixes #74 --- c_src/utf8.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/c_src/utf8.c b/c_src/utf8.c index e251bc6..cd0c717 100644 --- a/c_src/utf8.c +++ b/c_src/utf8.c @@ -3,15 +3,23 @@ #include "jiffy.h" #include -static const char hexvals[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, - -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +static const unsigned char hexvals[256] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255 }; static const char hexdigits[16] = { @@ -27,10 +35,10 @@ int_from_hex(const unsigned char* p) unsigned char* h = (unsigned char*) p; int ret; - if(hexvals[*(h+0)] < 0) return -1; - if(hexvals[*(h+1)] < 0) return -1; - if(hexvals[*(h+2)] < 0) return -1; - if(hexvals[*(h+3)] < 0) return -1; + if(hexvals[*(h+0)] == 255) return -1; + if(hexvals[*(h+1)] == 255) return -1; + if(hexvals[*(h+2)] == 255) return -1; + if(hexvals[*(h+3)] == 255) return -1; ret = (hexvals[*(h+0)] << 12) + (hexvals[*(h+1)] << 8)