浏览代码

Walk through strings once when encoding

pull/176/head
John Högberg 6 年前
父节点
当前提交
ae14731f72
共有 1 个文件被更改,包括 35 次插入60 次删除
  1. +35
    -60
      c_src/encoder.c

+ 35
- 60
c_src/encoder.c 查看文件

@ -326,13 +326,13 @@ enc_literal(Encoder* e, const char* literal, size_t len)
static inline int static inline int
enc_string(Encoder* e, ERL_NIF_TERM val) enc_string(Encoder* e, ERL_NIF_TERM val)
{ {
static const int MAX_ESCAPE_LEN = 12;
ErlNifBinary bin; ErlNifBinary bin;
char atom[512]; char atom[512];
unsigned char* data; unsigned char* data;
size_t size; size_t size;
int esc_extra = 0;
int ulen; int ulen;
int uval; int uval;
int i; int i;
@ -353,50 +353,8 @@ enc_string(Encoder* e, ERL_NIF_TERM val)
return 0; return 0;
} }
i = 0;
while(i < size) {
switch((char) data[i]) {
case '\"':
case '\\':
case '\b':
case '\f':
case '\n':
case '\r':
case '\t':
esc_extra += 1;
i++;
continue;
case '/':
if(e->escape_forward_slashes) {
esc_extra += 1;
i++;
continue;
}
default:
if(data[i] < 0x20) {
esc_extra += 5;
i++;
continue;
} else if(data[i] < 0x80) {
i++;
continue;
}
ulen = utf8_validate(&(data[i]), size - i);
if(ulen < 0) {
return 0;
}
if(e->uescape) {
uval = utf8_to_unicode(&(data[i]), ulen);
if(uval < 0) {
return 0;
}
esc_extra += utf8_esc_len(uval) - ulen;
}
i += ulen;
}
}
if(!enc_ensure(e, size + esc_extra + 2)) {
/* Reserve space for the first quotation mark and most of the output. */
if(!enc_ensure(e, size + MAX_ESCAPE_LEN + 1)) {
return 0; return 0;
} }
@ -404,6 +362,10 @@ enc_string(Encoder* e, ERL_NIF_TERM val)
i = 0; i = 0;
while(i < size) { while(i < size) {
if(!enc_ensure(e, MAX_ESCAPE_LEN)) {
return 0;
}
switch((char) data[i]) { switch((char) data[i]) {
case '\"': case '\"':
case '\\': case '\\':
@ -439,34 +401,43 @@ enc_string(Encoder* e, ERL_NIF_TERM val)
case '/': case '/':
if(e->escape_forward_slashes) { if(e->escape_forward_slashes) {
e->p[e->i++] = '\\'; e->p[e->i++] = '\\';
e->u[e->i++] = data[i];
i++;
continue;
} }
e->u[e->i++] = '/';
i++;
continue;
default: default:
if(data[i] < 0x20) { if(data[i] < 0x20) {
ulen = unicode_uescape(data[i], &(e->p[e->i])); ulen = unicode_uescape(data[i], &(e->p[e->i]));
if(ulen < 0) { if(ulen < 0) {
return 0; return 0;
} }
e->i += ulen; e->i += ulen;
i++; i++;
} else if((data[i] & 0x80) && e->uescape) {
uval = utf8_to_unicode(&(data[i]), size-i);
if(uval < 0) {
return 0;
}
} else if(data[i] & 0x80) {
ulen = utf8_validate(&(data[i]), size - i);
ulen = unicode_uescape(uval, &(e->p[e->i]));
if(ulen < 0) {
if (ulen < 0) {
return 0; return 0;
} else if (e->uescape) {
int esc_len;
uval = utf8_to_unicode(&(data[i]), size-i);
if(uval < 0) {
return 0;
}
esc_len = unicode_uescape(uval, &(e->p[e->i]));
if(esc_len < 0) {
return 0;
}
e->i += esc_len;
} else {
memcpy(&e->u[e->i], &data[i], ulen);
e->i += ulen;
} }
e->i += ulen;
ulen = utf8_len(uval);
if(ulen < 0) {
return 0;
}
i += ulen; i += ulen;
} else { } else {
e->u[e->i++] = data[i++]; e->u[e->i++] = data[i++];
@ -474,6 +445,10 @@ enc_string(Encoder* e, ERL_NIF_TERM val)
} }
} }
if(!enc_ensure(e, 1)) {
return 0;
}
e->p[e->i++] = '\"'; e->p[e->i++] = '\"';
e->count++; e->count++;

正在加载...
取消
保存