您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

630 行
16 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. typedef struct {
  10. ErlNifEnv* env;
  11. jiffy_st* atoms;
  12. int uescape;
  13. int count;
  14. int iolen;
  15. ERL_NIF_TERM iolist;
  16. ErlNifBinary* curr;
  17. char* p;
  18. unsigned char* u;
  19. size_t i;
  20. } Encoder;
  21. int
  22. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin)
  23. {
  24. ERL_NIF_TERM val;
  25. e->env = env;
  26. e->atoms = enif_priv_data(env);
  27. e->uescape = 0;
  28. e->count = 0;
  29. if(!enif_is_list(env, opts)) {
  30. return 0;
  31. }
  32. while(enif_get_list_cell(env, opts, &val, &opts)) {
  33. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  34. e->uescape = 1;
  35. } else {
  36. return 0;
  37. }
  38. }
  39. e->iolen = 0;
  40. e->iolist = enif_make_list(env, 0);
  41. e->curr = bin;
  42. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  43. return 0;
  44. }
  45. memset(e->curr->data, 0, e->curr->size);
  46. e->p = (char*) e->curr->data;
  47. e->u = (unsigned char*) e->curr->data;
  48. e->i = 0;
  49. return 1;
  50. }
  51. void
  52. enc_destroy(Encoder* e)
  53. {
  54. if(e->curr != NULL) {
  55. enif_release_binary(e->curr);
  56. }
  57. }
  58. ERL_NIF_TERM
  59. enc_error(Encoder* e, const char* msg)
  60. {
  61. //assert(0 && msg);
  62. return make_error(e->atoms, e->env, msg);
  63. }
  64. static inline int
  65. enc_ensure(Encoder* e, size_t req)
  66. {
  67. size_t need = e->curr->size;
  68. while(req >= (need - e->i)) need <<= 1;
  69. if(need != e->curr->size) {
  70. if(!enif_realloc_binary(e->curr, need)) {
  71. return 0;
  72. }
  73. e->p = (char*) e->curr->data;
  74. e->u = (unsigned char*) e->curr->data;
  75. }
  76. return 1;
  77. }
  78. int
  79. enc_result(Encoder* e, ERL_NIF_TERM* value)
  80. {
  81. if(e->i != e->curr->size) {
  82. if(!enif_realloc_binary(e->curr, e->i)) {
  83. return 0;
  84. }
  85. }
  86. *value = enif_make_binary(e->env, e->curr);
  87. e->curr = NULL;
  88. return 1;
  89. }
  90. int
  91. enc_done(Encoder* e, ERL_NIF_TERM* value)
  92. {
  93. ERL_NIF_TERM last;
  94. if(e->iolen == 0) {
  95. return enc_result(e, value);
  96. }
  97. if(e->i > 0 ) {
  98. if(!enc_result(e, &last)) {
  99. return 0;
  100. }
  101. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  102. e->iolen++;
  103. }
  104. *value = e->iolist;
  105. return 1;
  106. }
  107. static inline int
  108. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  109. {
  110. ErlNifBinary* bin = e->curr;
  111. ERL_NIF_TERM curr;
  112. if(e->i > 0) {
  113. if(!enc_result(e, &curr)) {
  114. return 0;
  115. }
  116. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  117. e->iolen++;
  118. }
  119. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  120. e->iolen++;
  121. // Reinitialize our binary for the next buffer.
  122. e->curr = bin;
  123. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  124. return 0;
  125. }
  126. memset(e->curr->data, 0, e->curr->size);
  127. e->p = (char*) e->curr->data;
  128. e->u = (unsigned char*) e->curr->data;
  129. e->i = 0;
  130. return 1;
  131. }
  132. static inline int
  133. enc_literal(Encoder* e, const char* literal, size_t len)
  134. {
  135. if(!enc_ensure(e, len)) {
  136. return 0;
  137. }
  138. memcpy(&(e->p[e->i]), literal, len);
  139. e->i += len;
  140. e->count++;
  141. return 1;
  142. }
  143. static inline int
  144. enc_string(Encoder* e, ERL_NIF_TERM val)
  145. {
  146. ErlNifBinary bin;
  147. char atom[512];
  148. unsigned char* data;
  149. size_t size;
  150. int esc_extra = 0;
  151. int ulen;
  152. int uval;
  153. int i;
  154. if(enif_is_binary(e->env, val)) {
  155. if(!enif_inspect_binary(e->env, val, &bin)) {
  156. return 0;
  157. }
  158. data = bin.data;
  159. size = bin.size;
  160. } else if(enif_is_atom(e->env, val)) {
  161. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  162. return 0;
  163. }
  164. data = (unsigned char*) atom;
  165. size = strlen(atom);
  166. } else {
  167. return 0;
  168. }
  169. i = 0;
  170. while(i < size) {
  171. switch((char) data[i]) {
  172. case '\"':
  173. case '\\':
  174. case '/':
  175. case '\b':
  176. case '\f':
  177. case '\n':
  178. case '\r':
  179. case '\t':
  180. esc_extra += 1;
  181. i++;
  182. continue;
  183. default:
  184. if(data[i] < 0x20) {
  185. esc_extra += 5;
  186. i++;
  187. continue;
  188. } else if(data[i] < 0x80) {
  189. i++;
  190. continue;
  191. }
  192. ulen = utf8_validate(&(data[i]), size - i);
  193. if(ulen < 0) {
  194. return 0;
  195. }
  196. if(e->uescape) {
  197. uval = utf8_to_unicode(&(data[i]), ulen);
  198. if(uval < 0) {
  199. return 0;
  200. }
  201. ulen = utf8_esc_len(uval);
  202. if(ulen < 0) {
  203. return 0;
  204. }
  205. }
  206. i += ulen;
  207. }
  208. }
  209. if(!enc_ensure(e, size + esc_extra + 2)) {
  210. return 0;
  211. }
  212. e->p[e->i++] = '\"';
  213. i = 0;
  214. while(i < size) {
  215. switch((char) data[i]) {
  216. case '\"':
  217. case '\\':
  218. case '/':
  219. e->p[e->i++] = '\\';
  220. e->u[e->i++] = data[i];
  221. i++;
  222. continue;
  223. case '\b':
  224. e->p[e->i++] = '\\';
  225. e->p[e->i++] = 'b';
  226. i++;
  227. continue;
  228. case '\f':
  229. e->p[e->i++] = '\\';
  230. e->p[e->i++] = 'f';
  231. i++;
  232. continue;
  233. case '\n':
  234. e->p[e->i++] = '\\';
  235. e->p[e->i++] = 'n';
  236. i++;
  237. continue;
  238. case '\r':
  239. e->p[e->i++] = '\\';
  240. e->p[e->i++] = 'r';
  241. i++;
  242. continue;
  243. case '\t':
  244. e->p[e->i++] = '\\';
  245. e->p[e->i++] = 't';
  246. i++;
  247. continue;
  248. default:
  249. if(data[i] < 0x20) {
  250. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  251. if(ulen < 0) {
  252. return 0;
  253. }
  254. e->i += ulen;
  255. i++;
  256. } else if((data[i] & 0x80) && e->uescape) {
  257. uval = utf8_to_unicode(&(data[i]), size-i);
  258. if(uval < 0) {
  259. return 0;
  260. }
  261. ulen = unicode_uescape(uval, &(e->p[e->i]));
  262. if(ulen < 0) {
  263. return 0;
  264. }
  265. e->i += ulen;
  266. ulen = utf8_len(uval);
  267. if(ulen < 0) {
  268. return 0;
  269. }
  270. i += ulen;
  271. } else {
  272. e->u[e->i++] = data[i++];
  273. }
  274. }
  275. }
  276. e->p[e->i++] = '\"';
  277. e->count++;
  278. return 1;
  279. }
  280. static inline int
  281. enc_long(Encoder* e, ErlNifSInt64 val)
  282. {
  283. if(!enc_ensure(e, 32)) {
  284. return 0;
  285. }
  286. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  287. snprintf(&(e->p[e->i]), 32, "%ld", val);
  288. #elif SIZEOF_LONG == 8
  289. snprintf(&(e->p[e->i]), 32, "%ld", val);
  290. #else
  291. snprintf(&(e->p[e->i]), 32, "%lld", val);
  292. #endif
  293. e->i += strlen(&(e->p[e->i]));
  294. e->count++;
  295. return 1;
  296. }
  297. static inline int
  298. enc_double(Encoder* e, double val)
  299. {
  300. if(!enc_ensure(e, 32)) {
  301. return 0;
  302. }
  303. //snprintf(&(e->p[e->i]), 31, "%0.20g", val);
  304. sprintf(&(e->p[e->i]), "%.20g", val);
  305. e->i += strlen(&(e->p[e->i]));
  306. e->count++;
  307. return 1;
  308. }
  309. static inline int
  310. enc_char(Encoder* e, char c)
  311. {
  312. if(!enc_ensure(e, 1)) {
  313. return 0;
  314. }
  315. e->p[e->i++] = c;
  316. return 1;
  317. }
  318. static inline int
  319. enc_start_object(Encoder* e)
  320. {
  321. e->count++;
  322. return enc_char(e, '{');
  323. }
  324. static inline int
  325. enc_end_object(Encoder* e)
  326. {
  327. return enc_char(e, '}');
  328. }
  329. static inline int
  330. enc_start_array(Encoder* e)
  331. {
  332. e->count++;
  333. return enc_char(e, '[');
  334. }
  335. static inline int
  336. enc_end_array(Encoder* e)
  337. {
  338. return enc_char(e, ']');
  339. }
  340. static inline int
  341. enc_colon(Encoder* e)
  342. {
  343. return enc_char(e, ':');
  344. }
  345. static inline int
  346. enc_comma(Encoder* e)
  347. {
  348. return enc_char(e, ',');
  349. }
  350. ERL_NIF_TERM
  351. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  352. {
  353. Encoder enc;
  354. Encoder* e = &enc;
  355. ErlNifBinary bin;
  356. ERL_NIF_TERM ret;
  357. ERL_NIF_TERM stack;
  358. ERL_NIF_TERM curr;
  359. ERL_NIF_TERM item;
  360. const ERL_NIF_TERM* tuple;
  361. int arity;
  362. ErlNifSInt64 lval;
  363. double dval;
  364. if(argc != 2) {
  365. return enif_make_badarg(env);
  366. }
  367. if(!enc_init(e, env, argv[1], &bin)) {
  368. return enif_make_badarg(env);
  369. }
  370. stack = enif_make_list(env, 1, argv[0]);
  371. while(!enif_is_empty_list(env, stack)) {
  372. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  373. ret = enc_error(e, "internal_error");
  374. goto done;
  375. }
  376. if(enif_is_identical(curr, e->atoms->ref_object)) {
  377. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  378. ret = enc_error(e, "internal_error");
  379. goto done;
  380. }
  381. if(enif_is_empty_list(env, curr)) {
  382. if(!enc_end_object(e)) {
  383. ret = enc_error(e, "internal_error");
  384. goto done;
  385. }
  386. continue;
  387. }
  388. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  389. ret = enc_error(e, "internal_error");
  390. goto done;
  391. }
  392. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  393. ret = enc_error(e, "invalid_object_pair");
  394. goto done;
  395. }
  396. if(arity != 2) {
  397. ret = enc_error(e, "invalid_object_pair");
  398. goto done;
  399. }
  400. if(!enc_comma(e)) {
  401. ret = enc_error(e, "internal_error");
  402. goto done;
  403. }
  404. if(!enc_string(e, tuple[0])) {
  405. ret = enc_error(e, "invalid_object_key");
  406. goto done;
  407. }
  408. if(!enc_colon(e)) {
  409. ret = enc_error(e, "internal_error");
  410. goto done;
  411. }
  412. stack = enif_make_list_cell(env, curr, stack);
  413. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  414. stack = enif_make_list_cell(env, tuple[1], stack);
  415. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  416. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  417. ret = enc_error(e, "internal_error");
  418. goto done;
  419. }
  420. if(enif_is_empty_list(env, curr)) {
  421. if(!enc_end_array(e)) {
  422. ret = enc_error(e, "internal_error");
  423. goto done;
  424. }
  425. continue;
  426. }
  427. if(!enc_comma(e)) {
  428. ret = enc_error(e, "internal_error");
  429. goto done;
  430. }
  431. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  432. ret = enc_error(e, "internal_error");
  433. goto done;
  434. }
  435. stack = enif_make_list_cell(env, curr, stack);
  436. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  437. stack = enif_make_list_cell(env, item, stack);
  438. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  439. if(!enc_literal(e, "null", 4)) {
  440. ret = enc_error(e, "null");
  441. goto done;
  442. }
  443. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  444. if(!enc_literal(e, "true", 4)) {
  445. ret = enc_error(e, "true");
  446. goto done;
  447. }
  448. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  449. if(!enc_literal(e, "false", 5)) {
  450. ret = enc_error(e, "false");
  451. goto done;
  452. }
  453. } else if(enif_is_binary(env, curr)) {
  454. if(!enc_string(e, curr)) {
  455. ret = enc_error(e, "invalid_string");
  456. goto done;
  457. }
  458. } else if(enif_is_atom(env, curr)) {
  459. if(!enc_string(e, curr)) {
  460. ret = enc_error(e, "invalid_string");
  461. goto done;
  462. }
  463. } else if(enif_get_int64(env, curr, &lval)) {
  464. if(!enc_long(e, lval)) {
  465. ret = enc_error(e, "internal_error");
  466. goto done;
  467. }
  468. } else if(enif_get_double(env, curr, &dval)) {
  469. if(!enc_double(e, dval)) {
  470. ret = enc_error(e, "internal_error");
  471. goto done;
  472. }
  473. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  474. if(arity != 1) {
  475. ret = enc_error(e, "invalid_ejson");
  476. goto done;
  477. }
  478. if(!enif_is_list(env, tuple[0])) {
  479. ret = enc_error(e, "invalid_object");
  480. goto done;
  481. }
  482. if(!enc_start_object(e)) {
  483. ret = enc_error(e, "internal_error");
  484. goto done;
  485. }
  486. if(enif_is_empty_list(env, tuple[0])) {
  487. if(!enc_end_object(e)) {
  488. ret = enc_error(e, "internal_error");
  489. goto done;
  490. }
  491. continue;
  492. }
  493. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  494. ret = enc_error(e, "internal_error");
  495. goto done;
  496. }
  497. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  498. ret = enc_error(e, "invalid_object_member");
  499. goto done;
  500. }
  501. if(arity != 2) {
  502. ret = enc_error(e, "invalid_object_member_arity");
  503. goto done;
  504. }
  505. if(!enc_string(e, tuple[0])) {
  506. ret = enc_error(e, "invalid_object_member_key");
  507. goto done;
  508. }
  509. if(!enc_colon(e)) {
  510. ret = enc_error(e, "internal_error");
  511. goto done;
  512. }
  513. stack = enif_make_list_cell(env, curr, stack);
  514. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  515. stack = enif_make_list_cell(env, tuple[1], stack);
  516. } else if(enif_is_list(env, curr)) {
  517. if(!enc_start_array(e)) {
  518. ret = enc_error(e, "internal_error");
  519. goto done;
  520. }
  521. if(enif_is_empty_list(env, curr)) {
  522. if(!enc_end_array(e)) {
  523. ret = enc_error(e, "internal_error");
  524. goto done;
  525. }
  526. continue;
  527. }
  528. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  529. ret = enc_error(e, "internal_error");
  530. goto done;
  531. }
  532. stack = enif_make_list_cell(env, curr, stack);
  533. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  534. stack = enif_make_list_cell(env, item, stack);
  535. } else {
  536. if(!enc_unknown(e, curr)) {
  537. ret = enc_error(e, "internal_error");
  538. goto done;
  539. }
  540. }
  541. } while(!enif_is_empty_list(env, stack));
  542. if(!enc_done(e, &item)) {
  543. ret = enc_error(e, "internal_error");
  544. goto done;
  545. }
  546. if(e->iolen == 0) {
  547. ret = item;
  548. } else {
  549. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  550. }
  551. done:
  552. enc_destroy(e);
  553. return ret;
  554. }