From 23376b58a7dffa266f8de6ae07972b34b72e90ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= Date: Thu, 11 Apr 2019 13:32:45 +0200 Subject: [PATCH] Get rid of separate unsigned/signed buffers --- c_src/decoder.c | 21 +++++++++------------ c_src/doubles.cc | 4 ++-- c_src/encoder.c | 37 ++++++++++++++++--------------------- c_src/jiffy.h | 6 +++--- c_src/utf8.c | 26 +++++++++++++------------- 5 files changed, 43 insertions(+), 51 deletions(-) diff --git a/c_src/decoder.c b/c_src/decoder.c index 1d2f44d..e01a4fd 100644 --- a/c_src/decoder.c +++ b/c_src/decoder.c @@ -57,8 +57,7 @@ typedef struct { int copy_strings; ERL_NIF_TERM null_term; - char* p; - unsigned char* u; + unsigned char* p; int i; int len; @@ -89,7 +88,6 @@ dec_new(ErlNifEnv* env) d->null_term = d->atoms->atom_null; d->p = NULL; - d->u = NULL; d->len = -1; d->i = -1; @@ -113,8 +111,7 @@ dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin) d->env = env; d->arg = arg; - d->p = (char*) bin->data; - d->u = bin->data; + d->p = bin->data; d->len = bin->size; // I'd like to be more forceful on this check so that when @@ -221,7 +218,7 @@ dec_string(Decoder* d, ERL_NIF_TERM* value) st = d->i; while(d->i < d->len) { - if(d->u[d->i] < 0x20) { + if(d->p[d->i] < 0x20) { return 0; } else if(d->p[d->i] == '\"') { d->i++; @@ -251,7 +248,7 @@ dec_string(Decoder* d, ERL_NIF_TERM* value) if(d->i + 4 >= d->len) { return 0; } - hi = int_from_hex(&(d->u[d->i])); + hi = int_from_hex(&(d->p[d->i])); if(hi < 0) { return 0; } @@ -265,7 +262,7 @@ dec_string(Decoder* d, ERL_NIF_TERM* value) } else if(d->p[d->i++] != 'u') { return 0; } - lo = int_from_hex(&(d->u[d->i])); + lo = int_from_hex(&(d->p[d->i])); if(lo < 0) { return 0; } @@ -287,10 +284,10 @@ dec_string(Decoder* d, ERL_NIF_TERM* value) default: return 0; } - } else if(d->u[d->i] < 0x80) { + } else if(d->p[d->i] < 0x80) { d->i++; } else { - ulen = utf8_validate(&(d->u[d->i]), d->len - d->i); + ulen = utf8_validate(&(d->p[d->i]), d->len - d->i); if(ulen < 0) { return 0; } @@ -356,12 +353,12 @@ parse: break; case 'u': ui++; - hi = int_from_hex(&(d->u[ui])); + hi = int_from_hex(&(d->p[ui])); if(hi < 0) { return 0; } if(hi >= 0xD800 && hi < 0xDC00) { - lo = int_from_hex(&(d->u[ui+6])); + lo = int_from_hex(&(d->p[ui+6])); if(lo < 0) { return 0; } diff --git a/c_src/doubles.cc b/c_src/doubles.cc index 0cedfd3..05c6443 100644 --- a/c_src/doubles.cc +++ b/c_src/doubles.cc @@ -10,14 +10,14 @@ namespace dc = double_conversion; BEGIN_C int -double_to_shortest(char* buf, size_t size, size_t* len, double val) +double_to_shortest(unsigned char* buf, size_t size, size_t* len, double val) { int flags = dc::DoubleToStringConverter::UNIQUE_ZERO | dc::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN | dc::DoubleToStringConverter::EMIT_TRAILING_DECIMAL_POINT | dc::DoubleToStringConverter::EMIT_TRAILING_ZERO_AFTER_POINT; - dc::StringBuilder builder(buf, size); + dc::StringBuilder builder(reinterpret_cast(buf), size); dc::DoubleToStringConverter conv(flags, NULL, NULL, 'e', -6, 21, 6, 0); if(!conv.ToShortest(val, &builder)) { diff --git a/c_src/encoder.c b/c_src/encoder.c index 605a48a..06838ca 100644 --- a/c_src/encoder.c +++ b/c_src/encoder.c @@ -46,8 +46,7 @@ typedef struct { ErlNifBinary buffer; int have_buffer; - char* p; - unsigned char* u; + unsigned char* p; size_t i; } Encoder; @@ -94,8 +93,7 @@ enc_new(ErlNifEnv* env) e->have_buffer = 1; - e->p = (char*)e->buffer.data; - e->u = (unsigned char*)e->buffer.data; + e->p = e->buffer.data; e->i = 0; return e; @@ -176,8 +174,7 @@ enc_ensure(Encoder* e, size_t req) e->have_buffer = 1; - e->p = (char*)e->buffer.data; - e->u = (unsigned char*)e->buffer.data; + e->p = e->buffer.data; e->i = 0; return 1; @@ -285,7 +282,7 @@ enc_special_character(Encoder* e, int val) { case '\"': case '\\': e->p[e->i++] = '\\'; - e->u[e->i++] = val; + e->p[e->i++] = val; return 1; case '\b': e->p[e->i++] = '\\'; @@ -311,7 +308,7 @@ enc_special_character(Encoder* e, int val) { if(e->escape_forward_slashes) { e->p[e->i++] = '\\'; } - e->u[e->i++] = '/'; + e->p[e->i++] = '/'; return 1; default: if(val < 0x20) { @@ -327,19 +324,17 @@ static int enc_atom(Encoder* e, ERL_NIF_TERM val) { static const int MAX_ESCAPE_LEN = 12; - char atom[512]; + unsigned char data[512]; - unsigned char* data; size_t size; int i; - if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) { + if(!enif_get_atom(e->env, val, (char*)data, 512, ERL_NIF_LATIN1)) { return 0; } - data = (unsigned char*) atom; - size = strlen(atom); + size = strlen((const char*)data); /* Reserve space for the first quotation mark and most of the output. */ if(!enc_ensure(e, size + MAX_ESCAPE_LEN + 1)) { @@ -359,15 +354,15 @@ enc_atom(Encoder* e, ERL_NIF_TERM val) if(enc_special_character(e, val)) { i++; } else if(val < 0x80) { - e->u[e->i++] = val; + e->p[e->i++] = val; i++; } else if(val >= 0x80) { /* The atom encoding is latin1, so we don't need validation * as all latin1 characters are valid UTF-8 characters. */ if (!e->uescape) { - e->i += unicode_to_utf8(val, &e->u[e->i]); + e->i += unicode_to_utf8(val, &e->p[e->i]); } else { - e->i += unicode_uescape(val, &(e->p[e->i])); + e->i += unicode_uescape(val, &e->p[e->i]); } i++; @@ -420,7 +415,7 @@ enc_string(Encoder* e, ERL_NIF_TERM val) if(enc_special_character(e, data[i])) { i++; } else if(data[i] < 0x80) { - e->u[e->i++] = data[i++]; + e->p[e->i++] = data[i++]; } else if(data[i] >= 0x80) { ulen = utf8_validate(&(data[i]), size - i); @@ -441,7 +436,7 @@ enc_string(Encoder* e, ERL_NIF_TERM val) e->i += esc_len; } else { - memcpy(&e->u[e->i], &data[i], ulen); + memcpy(&e->p[e->i], &data[i], ulen); e->i += ulen; } @@ -507,7 +502,7 @@ digits10(ErlNifUInt64 v) } unsigned int -u64ToAsciiTable(char *dst, ErlNifUInt64 value) +u64ToAsciiTable(unsigned char *dst, ErlNifUInt64 value) { static const char digits[201] = "0001020304050607080910111213141516171819" @@ -536,7 +531,7 @@ u64ToAsciiTable(char *dst, ErlNifUInt64 value) } unsigned -i64ToAsciiTable(char *dst, ErlNifSInt64 value) +i64ToAsciiTable(unsigned char *dst, ErlNifSInt64 value) { if (value < 0) { *dst++ = '-'; @@ -562,7 +557,7 @@ enc_long(Encoder* e, ErlNifSInt64 val) static inline int enc_double(Encoder* e, double val) { - char* start; + unsigned char* start; size_t len; if(!enc_ensure(e, 32)) { diff --git a/c_src/jiffy.h b/c_src/jiffy.h index 9d1f486..c310ad7 100644 --- a/c_src/jiffy.h +++ b/c_src/jiffy.h @@ -68,14 +68,14 @@ int make_object(ErlNifEnv* env, ERL_NIF_TERM pairs, ERL_NIF_TERM* out, int ret_map, int dedupe_keys); int int_from_hex(const unsigned char* p); -int int_to_hex(int val, char* p); +int int_to_hex(int val, unsigned char* p); int utf8_len(int c); int utf8_esc_len(int c); int utf8_validate(unsigned char* data, size_t size); int utf8_to_unicode(unsigned char* buf, size_t size); int unicode_to_utf8(int c, unsigned char* buf); int unicode_from_pair(int hi, int lo); -int unicode_uescape(int c, char* buf); -int double_to_shortest(char *buf, size_t size, size_t* len, double val); +int unicode_uescape(int c, unsigned char* buf); +int double_to_shortest(unsigned char *buf, size_t size, size_t* len, double val); #endif // Included JIFFY_H diff --git a/c_src/utf8.c b/c_src/utf8.c index 878a4f0..2f970e9 100644 --- a/c_src/utf8.c +++ b/c_src/utf8.c @@ -49,7 +49,7 @@ int_from_hex(const unsigned char* p) } int -int_to_hex(int val, char* p) +int_to_hex(int val, unsigned char* p) { if(val < 0 || val > 65535) return -1; @@ -163,7 +163,7 @@ utf8_to_unicode(unsigned char* buf, size_t size) int ret; if((buf[0] & 0x80) == 0x00) { // 0xxxxxxx - ret = (int) buf[0]; + ret = buf[0]; } else if((buf[0] & 0xE0) == 0xC0 && size >= 2) { // 110xxxxy 10yyyyyy ret = ((buf[0] & 0x1F) << 6) @@ -192,26 +192,26 @@ int unicode_to_utf8(int c, unsigned char* buf) { if(c < 0x80) { - buf[0] = (unsigned char) c; + buf[0] = c; return 1; } else if(c < 0x800) { - buf[0] = (unsigned char) 0xC0 + (c >> 6); - buf[1] = (unsigned char) 0x80 + (c & 0x3F); + buf[0] = 0xC0 + (c >> 6); + buf[1] = 0x80 + (c & 0x3F); return 2; } else if(c < 0x10000) { if(c < 0xD800 || (c > 0xDFFF)) { - buf[0] = (unsigned char) 0xE0 + (c >> 12); - buf[1] = (unsigned char) 0x80 + ((c >> 6) & 0x3F); - buf[2] = (unsigned char) 0x80 + (c & 0x3F); + buf[0] = 0xE0 + (c >> 12); + buf[1] = 0x80 + ((c >> 6) & 0x3F); + buf[2] = 0x80 + (c & 0x3F); return 3; } else { return -1; } } else if(c <= 0x10FFFF) { - buf[0] = (unsigned char) 0xF0 + (c >> 18); - buf[1] = (unsigned char) 0x80 + ((c >> 12) & 0x3F); - buf[2] = (unsigned char) 0x80 + ((c >> 6) & 0x3F); - buf[3] = (unsigned char) 0x80 + (c & 0x3F); + buf[0] = 0xF0 + (c >> 18); + buf[1] = 0x80 + ((c >> 12) & 0x3F); + buf[2] = 0x80 + ((c >> 6) & 0x3F); + buf[3] = 0x80 + (c & 0x3F); return 4; } return -1; @@ -226,7 +226,7 @@ unicode_from_pair(int hi, int lo) } int -unicode_uescape(int val, char* p) +unicode_uescape(int val, unsigned char* p) { int n; if(val < 0x10000) {