Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

789 linhas
19 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  10. #define MAYBE_PRETTY(e) \
  11. do { \
  12. if(e->pretty) { \
  13. if(!enc_shift(e)) \
  14. return 0; \
  15. } \
  16. } while(0)
  17. #if WINDOWS || WIN32
  18. #define inline __inline
  19. #define snprintf _snprintf
  20. #endif
  21. typedef struct {
  22. ErlNifEnv* env;
  23. jiffy_st* atoms;
  24. size_t bytes_per_iter;
  25. int uescape;
  26. int pretty;
  27. int shiftcnt;
  28. int count;
  29. size_t iolen;
  30. size_t iosize;
  31. ERL_NIF_TERM iolist;
  32. ErlNifBinary bin;
  33. ErlNifBinary* curr;
  34. char* p;
  35. unsigned char* u;
  36. size_t i;
  37. } Encoder;
  38. // String constants for pretty printing.
  39. // Every string starts with its length.
  40. #define NUM_SHIFTS 8
  41. static char* shifts[NUM_SHIFTS] = {
  42. "\x01\n",
  43. "\x03\n ",
  44. "\x05\n ",
  45. "\x07\n ",
  46. "\x09\n ",
  47. "\x0b\n ",
  48. "\x0d\n ",
  49. "\x0f\n "
  50. };
  51. Encoder*
  52. enc_new(ErlNifEnv* env)
  53. {
  54. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  55. Encoder* e = enif_alloc_resource(st->res_enc, sizeof(Encoder));
  56. e->atoms = st;
  57. e->bytes_per_iter = DEFAULT_BYTES_PER_ITER;
  58. e->uescape = 0;
  59. e->pretty = 0;
  60. e->shiftcnt = 0;
  61. e->count = 0;
  62. e->iolen = 0;
  63. e->iosize = 0;
  64. e->curr = &(e->bin);
  65. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  66. e->curr = NULL;
  67. enif_release_resource(e);
  68. return NULL;
  69. }
  70. memset(e->curr->data, 0, e->curr->size);
  71. e->p = (char*) e->curr->data;
  72. e->u = (unsigned char*) e->curr->data;
  73. e->i = 0;
  74. return e;
  75. }
  76. int
  77. enc_init(Encoder* e, ErlNifEnv* env)
  78. {
  79. e->env = env;
  80. return 1;
  81. }
  82. void
  83. enc_destroy(ErlNifEnv* env, void* obj)
  84. {
  85. Encoder* e = (Encoder*) obj;
  86. if(e->curr != NULL) {
  87. enif_release_binary(e->curr);
  88. }
  89. }
  90. ERL_NIF_TERM
  91. enc_error(Encoder* e, const char* msg)
  92. {
  93. //assert(0 && msg);
  94. return make_error(e->atoms, e->env, msg);
  95. }
  96. static inline int
  97. enc_ensure(Encoder* e, size_t req)
  98. {
  99. size_t need = e->curr->size;
  100. while(req >= (need - e->i)) need <<= 1;
  101. if(need != e->curr->size) {
  102. if(!enif_realloc_binary(e->curr, need)) {
  103. return 0;
  104. }
  105. e->p = (char*) e->curr->data;
  106. e->u = (unsigned char*) e->curr->data;
  107. }
  108. return 1;
  109. }
  110. int
  111. enc_result(Encoder* e, ERL_NIF_TERM* value)
  112. {
  113. if(e->i != e->curr->size) {
  114. if(!enif_realloc_binary(e->curr, e->i)) {
  115. return 0;
  116. }
  117. }
  118. *value = enif_make_binary(e->env, e->curr);
  119. e->curr = NULL;
  120. return 1;
  121. }
  122. int
  123. enc_done(Encoder* e, ERL_NIF_TERM* value)
  124. {
  125. ERL_NIF_TERM last;
  126. if(e->iolen == 0) {
  127. return enc_result(e, value);
  128. }
  129. if(e->i > 0 ) {
  130. if(!enc_result(e, &last)) {
  131. return 0;
  132. }
  133. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  134. e->iolen++;
  135. }
  136. *value = e->iolist;
  137. return 1;
  138. }
  139. static inline int
  140. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  141. {
  142. ErlNifBinary* bin = e->curr;
  143. ERL_NIF_TERM curr;
  144. if(e->i > 0) {
  145. if(!enc_result(e, &curr)) {
  146. return 0;
  147. }
  148. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  149. e->iolen++;
  150. }
  151. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  152. e->iolen++;
  153. // Track the total number of bytes produced before
  154. // splitting our IO buffer. We add 16 to this value
  155. // as a rough estimate of the number of bytes that
  156. // a bignum might produce when encoded.
  157. e->iosize += e->i + 16;
  158. // Reinitialize our binary for the next buffer.
  159. e->curr = bin;
  160. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  161. return 0;
  162. }
  163. memset(e->curr->data, 0, e->curr->size);
  164. e->p = (char*) e->curr->data;
  165. e->u = (unsigned char*) e->curr->data;
  166. e->i = 0;
  167. return 1;
  168. }
  169. static inline int
  170. enc_literal(Encoder* e, const char* literal, size_t len)
  171. {
  172. if(!enc_ensure(e, len)) {
  173. return 0;
  174. }
  175. memcpy(&(e->p[e->i]), literal, len);
  176. e->i += len;
  177. e->count++;
  178. return 1;
  179. }
  180. static inline int
  181. enc_string(Encoder* e, ERL_NIF_TERM val)
  182. {
  183. ErlNifBinary bin;
  184. char atom[512];
  185. unsigned char* data;
  186. size_t size;
  187. int esc_extra = 0;
  188. int ulen;
  189. int uval;
  190. int i;
  191. if(enif_is_binary(e->env, val)) {
  192. if(!enif_inspect_binary(e->env, val, &bin)) {
  193. return 0;
  194. }
  195. data = bin.data;
  196. size = bin.size;
  197. } else if(enif_is_atom(e->env, val)) {
  198. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  199. return 0;
  200. }
  201. data = (unsigned char*) atom;
  202. size = strlen(atom);
  203. } else {
  204. return 0;
  205. }
  206. i = 0;
  207. while(i < size) {
  208. switch((char) data[i]) {
  209. case '\"':
  210. case '\\':
  211. case '\b':
  212. case '\f':
  213. case '\n':
  214. case '\r':
  215. case '\t':
  216. esc_extra += 1;
  217. i++;
  218. continue;
  219. default:
  220. if(data[i] < 0x20) {
  221. esc_extra += 5;
  222. i++;
  223. continue;
  224. } else if(data[i] < 0x80) {
  225. i++;
  226. continue;
  227. }
  228. ulen = utf8_validate(&(data[i]), size - i);
  229. if(ulen < 0) {
  230. return 0;
  231. }
  232. if(e->uescape) {
  233. uval = utf8_to_unicode(&(data[i]), ulen);
  234. if(uval < 0) {
  235. return 0;
  236. }
  237. esc_extra += utf8_esc_len(uval);
  238. if(ulen < 0) {
  239. return 0;
  240. }
  241. }
  242. i += ulen;
  243. }
  244. }
  245. if(!enc_ensure(e, size + esc_extra + 2)) {
  246. return 0;
  247. }
  248. e->p[e->i++] = '\"';
  249. i = 0;
  250. while(i < size) {
  251. switch((char) data[i]) {
  252. case '\"':
  253. case '\\':
  254. e->p[e->i++] = '\\';
  255. e->u[e->i++] = data[i];
  256. i++;
  257. continue;
  258. case '\b':
  259. e->p[e->i++] = '\\';
  260. e->p[e->i++] = 'b';
  261. i++;
  262. continue;
  263. case '\f':
  264. e->p[e->i++] = '\\';
  265. e->p[e->i++] = 'f';
  266. i++;
  267. continue;
  268. case '\n':
  269. e->p[e->i++] = '\\';
  270. e->p[e->i++] = 'n';
  271. i++;
  272. continue;
  273. case '\r':
  274. e->p[e->i++] = '\\';
  275. e->p[e->i++] = 'r';
  276. i++;
  277. continue;
  278. case '\t':
  279. e->p[e->i++] = '\\';
  280. e->p[e->i++] = 't';
  281. i++;
  282. continue;
  283. default:
  284. if(data[i] < 0x20) {
  285. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  286. if(ulen < 0) {
  287. return 0;
  288. }
  289. e->i += ulen;
  290. i++;
  291. } else if((data[i] & 0x80) && e->uescape) {
  292. uval = utf8_to_unicode(&(data[i]), size-i);
  293. if(uval < 0) {
  294. return 0;
  295. }
  296. ulen = unicode_uescape(uval, &(e->p[e->i]));
  297. if(ulen < 0) {
  298. return 0;
  299. }
  300. e->i += ulen;
  301. ulen = utf8_len(uval);
  302. if(ulen < 0) {
  303. return 0;
  304. }
  305. i += ulen;
  306. } else {
  307. e->u[e->i++] = data[i++];
  308. }
  309. }
  310. }
  311. e->p[e->i++] = '\"';
  312. e->count++;
  313. return 1;
  314. }
  315. static inline int
  316. enc_long(Encoder* e, ErlNifSInt64 val)
  317. {
  318. if(!enc_ensure(e, 32)) {
  319. return 0;
  320. }
  321. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  322. snprintf(&(e->p[e->i]), 32, "%ld", val);
  323. #elif SIZEOF_LONG == 8
  324. snprintf(&(e->p[e->i]), 32, "%ld", val);
  325. #else
  326. snprintf(&(e->p[e->i]), 32, "%lld", val);
  327. #endif
  328. e->i += strlen(&(e->p[e->i]));
  329. e->count++;
  330. return 1;
  331. }
  332. static inline int
  333. enc_double(Encoder* e, double val)
  334. {
  335. char* start;
  336. size_t len;
  337. if(!enc_ensure(e, 32)) {
  338. return 0;
  339. }
  340. start = &(e->p[e->i]);
  341. if(!double_to_shortest(start, e->curr->size, &len, val)) {
  342. return 0;
  343. }
  344. e->i += len;
  345. e->count++;
  346. return 1;
  347. }
  348. static inline int
  349. enc_char(Encoder* e, char c)
  350. {
  351. if(!enc_ensure(e, 1)) {
  352. return 0;
  353. }
  354. e->p[e->i++] = c;
  355. return 1;
  356. }
  357. static int
  358. enc_shift(Encoder* e) {
  359. int i;
  360. char* shift;
  361. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  362. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  363. if(!enc_literal(e, shift + 1, *shift))
  364. return 0;
  365. // Finish the rest of this shift it's it bigger than
  366. // our largest predefined constant.
  367. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  368. if(!enc_literal(e, " ", 2))
  369. return 0;
  370. }
  371. return 1;
  372. }
  373. static inline int
  374. enc_start_object(Encoder* e)
  375. {
  376. e->count++;
  377. e->shiftcnt++;
  378. if(!enc_char(e, '{'))
  379. return 0;
  380. MAYBE_PRETTY(e);
  381. return 1;
  382. }
  383. static inline int
  384. enc_end_object(Encoder* e)
  385. {
  386. e->shiftcnt--;
  387. MAYBE_PRETTY(e);
  388. return enc_char(e, '}');
  389. }
  390. static inline int
  391. enc_start_array(Encoder* e)
  392. {
  393. e->count++;
  394. e->shiftcnt++;
  395. if(!enc_char(e, '['))
  396. return 0;
  397. MAYBE_PRETTY(e);
  398. return 1;
  399. }
  400. static inline int
  401. enc_end_array(Encoder* e)
  402. {
  403. e->shiftcnt--;
  404. MAYBE_PRETTY(e);
  405. return enc_char(e, ']');
  406. }
  407. static inline int
  408. enc_colon(Encoder* e)
  409. {
  410. if(e->pretty)
  411. return enc_literal(e, " : ", 3);
  412. return enc_char(e, ':');
  413. }
  414. static inline int
  415. enc_comma(Encoder* e)
  416. {
  417. if(!enc_char(e, ','))
  418. return 0;
  419. MAYBE_PRETTY(e);
  420. return 1;
  421. }
  422. ERL_NIF_TERM
  423. encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  424. {
  425. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  426. Encoder* e;
  427. ERL_NIF_TERM opts;
  428. ERL_NIF_TERM val;
  429. ERL_NIF_TERM tmp_argv[3];
  430. if(argc != 2) {
  431. return enif_make_badarg(env);
  432. }
  433. e = enc_new(env);
  434. if(e == NULL) {
  435. return make_error(st, env, "internal_error");
  436. }
  437. tmp_argv[0] = enif_make_resource(env, e);
  438. tmp_argv[1] = enif_make_list(env, 1, argv[0]);
  439. tmp_argv[2] = enif_make_list(env, 0);
  440. enif_release_resource(e);
  441. opts = argv[1];
  442. if(!enif_is_list(env, opts)) {
  443. return enif_make_badarg(env);
  444. }
  445. while(enif_get_list_cell(env, opts, &val, &opts)) {
  446. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  447. e->uescape = 1;
  448. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  449. e->pretty = 1;
  450. } else if(enif_compare(val, e->atoms->atom_force_utf8) == 0) {
  451. // Ignore, handled in Erlang
  452. } else if(get_bytes_per_iter(env, val, &(e->bytes_per_iter))) {
  453. continue;
  454. } else {
  455. return enif_make_badarg(env);
  456. }
  457. }
  458. return encode_iter(env, 3, tmp_argv);
  459. }
  460. ERL_NIF_TERM
  461. encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  462. {
  463. Encoder* e;
  464. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  465. ERL_NIF_TERM ret = 0;
  466. ERL_NIF_TERM stack;
  467. ERL_NIF_TERM curr;
  468. ERL_NIF_TERM item;
  469. const ERL_NIF_TERM* tuple;
  470. int arity;
  471. ErlNifSInt64 lval;
  472. double dval;
  473. size_t start;
  474. size_t processed;
  475. if(argc != 3) {
  476. return enif_make_badarg(env);
  477. } else if(!enif_get_resource(env, argv[0], st->res_enc, (void**) &e)) {
  478. return enif_make_badarg(env);
  479. } else if(!enif_is_list(env, argv[1])) {
  480. return enif_make_badarg(env);
  481. } else if(!enif_is_list(env, argv[2])) {
  482. return enif_make_badarg(env);
  483. }
  484. if(!enc_init(e, env)) {
  485. return enif_make_badarg(env);
  486. }
  487. stack = argv[1];
  488. e->iolist = argv[2];
  489. start = e->iosize + e->i;
  490. while(!enif_is_empty_list(env, stack)) {
  491. processed = (e->iosize + e->i) - start;
  492. if(should_yield(processed, e->bytes_per_iter)) {
  493. consume_timeslice(env, processed, e->bytes_per_iter);
  494. return enif_make_tuple4(
  495. env,
  496. st->atom_iter,
  497. argv[0],
  498. stack,
  499. e->iolist
  500. );
  501. }
  502. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  503. ret = enc_error(e, "internal_error");
  504. goto done;
  505. }
  506. if(enif_is_identical(curr, e->atoms->ref_object)) {
  507. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  508. ret = enc_error(e, "internal_error");
  509. goto done;
  510. }
  511. if(enif_is_empty_list(env, curr)) {
  512. if(!enc_end_object(e)) {
  513. ret = enc_error(e, "internal_error");
  514. goto done;
  515. }
  516. continue;
  517. }
  518. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  519. ret = enc_error(e, "internal_error");
  520. goto done;
  521. }
  522. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  523. ret = enc_error(e, "invalid_object_pair");
  524. goto done;
  525. }
  526. if(arity != 2) {
  527. ret = enc_error(e, "invalid_object_pair");
  528. goto done;
  529. }
  530. if(!enc_comma(e)) {
  531. ret = enc_error(e, "internal_error");
  532. goto done;
  533. }
  534. if(!enc_string(e, tuple[0])) {
  535. ret = enc_error(e, "invalid_object_key");
  536. goto done;
  537. }
  538. if(!enc_colon(e)) {
  539. ret = enc_error(e, "internal_error");
  540. goto done;
  541. }
  542. stack = enif_make_list_cell(env, curr, stack);
  543. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  544. stack = enif_make_list_cell(env, tuple[1], stack);
  545. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  546. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  547. ret = enc_error(e, "internal_error");
  548. goto done;
  549. }
  550. if(enif_is_empty_list(env, curr)) {
  551. if(!enc_end_array(e)) {
  552. ret = enc_error(e, "internal_error");
  553. goto done;
  554. }
  555. continue;
  556. }
  557. if(!enc_comma(e)) {
  558. ret = enc_error(e, "internal_error");
  559. goto done;
  560. }
  561. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  562. ret = enc_error(e, "internal_error");
  563. goto done;
  564. }
  565. stack = enif_make_list_cell(env, curr, stack);
  566. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  567. stack = enif_make_list_cell(env, item, stack);
  568. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  569. if(!enc_literal(e, "null", 4)) {
  570. ret = enc_error(e, "null");
  571. goto done;
  572. }
  573. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  574. if(!enc_literal(e, "true", 4)) {
  575. ret = enc_error(e, "true");
  576. goto done;
  577. }
  578. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  579. if(!enc_literal(e, "false", 5)) {
  580. ret = enc_error(e, "false");
  581. goto done;
  582. }
  583. } else if(enif_is_binary(env, curr)) {
  584. if(!enc_string(e, curr)) {
  585. ret = enc_error(e, "invalid_string");
  586. goto done;
  587. }
  588. } else if(enif_is_atom(env, curr)) {
  589. if(!enc_string(e, curr)) {
  590. ret = enc_error(e, "invalid_string");
  591. goto done;
  592. }
  593. } else if(enif_get_int64(env, curr, &lval)) {
  594. if(!enc_long(e, lval)) {
  595. ret = enc_error(e, "internal_error");
  596. goto done;
  597. }
  598. } else if(enif_get_double(env, curr, &dval)) {
  599. if(!enc_double(e, dval)) {
  600. ret = enc_error(e, "internal_error");
  601. goto done;
  602. }
  603. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  604. if(arity != 1) {
  605. ret = enc_error(e, "invalid_ejson");
  606. goto done;
  607. }
  608. if(!enif_is_list(env, tuple[0])) {
  609. ret = enc_error(e, "invalid_object");
  610. goto done;
  611. }
  612. if(!enc_start_object(e)) {
  613. ret = enc_error(e, "internal_error");
  614. goto done;
  615. }
  616. if(enif_is_empty_list(env, tuple[0])) {
  617. if(!enc_end_object(e)) {
  618. ret = enc_error(e, "internal_error");
  619. goto done;
  620. }
  621. continue;
  622. }
  623. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  624. ret = enc_error(e, "internal_error");
  625. goto done;
  626. }
  627. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  628. ret = enc_error(e, "invalid_object_member");
  629. goto done;
  630. }
  631. if(arity != 2) {
  632. ret = enc_error(e, "invalid_object_member_arity");
  633. goto done;
  634. }
  635. if(!enc_string(e, tuple[0])) {
  636. ret = enc_error(e, "invalid_object_member_key");
  637. goto done;
  638. }
  639. if(!enc_colon(e)) {
  640. ret = enc_error(e, "internal_error");
  641. goto done;
  642. }
  643. stack = enif_make_list_cell(env, curr, stack);
  644. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  645. stack = enif_make_list_cell(env, tuple[1], stack);
  646. } else if(enif_is_list(env, curr)) {
  647. if(!enc_start_array(e)) {
  648. ret = enc_error(e, "internal_error");
  649. goto done;
  650. }
  651. if(enif_is_empty_list(env, curr)) {
  652. if(!enc_end_array(e)) {
  653. ret = enc_error(e, "internal_error");
  654. goto done;
  655. }
  656. continue;
  657. }
  658. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  659. ret = enc_error(e, "internal_error");
  660. goto done;
  661. }
  662. stack = enif_make_list_cell(env, curr, stack);
  663. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  664. stack = enif_make_list_cell(env, item, stack);
  665. } else {
  666. if(!enc_unknown(e, curr)) {
  667. ret = enc_error(e, "internal_error");
  668. goto done;
  669. }
  670. }
  671. }
  672. if(!enc_done(e, &item)) {
  673. ret = enc_error(e, "internal_error");
  674. goto done;
  675. }
  676. if(e->iolen == 0) {
  677. ret = item;
  678. } else {
  679. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  680. }
  681. done:
  682. processed = (e->iosize + e->i) - start;
  683. consume_timeslice(env, processed, e->bytes_per_iter);
  684. return ret;
  685. }