You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

718 lines
17 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  10. #define MAYBE_PRETTY(e) \
  11. do { \
  12. if(e->pretty) { \
  13. if(!enc_shift(e)) \
  14. return 0; \
  15. } \
  16. } while(0)
  17. typedef struct {
  18. ErlNifEnv* env;
  19. jiffy_st* atoms;
  20. int uescape;
  21. int pretty;
  22. int shiftcnt;
  23. int count;
  24. int iolen;
  25. ERL_NIF_TERM iolist;
  26. ErlNifBinary* curr;
  27. char* p;
  28. unsigned char* u;
  29. size_t i;
  30. } Encoder;
  31. // String constants for pretty printing.
  32. // Every string starts with its length.
  33. #define NUM_SHIFTS 8
  34. static char* shifts[NUM_SHIFTS] = {
  35. "\x01\n",
  36. "\x03\n ",
  37. "\x05\n ",
  38. "\x07\n ",
  39. "\x09\n ",
  40. "\x0b\n ",
  41. "\x0d\n ",
  42. "\x0f\n "
  43. };
  44. int
  45. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin)
  46. {
  47. ERL_NIF_TERM val;
  48. e->env = env;
  49. e->atoms = enif_priv_data(env);
  50. e->uescape = 0;
  51. e->pretty = 0;
  52. e->shiftcnt = 0;
  53. e->count = 0;
  54. if(!enif_is_list(env, opts)) {
  55. return 0;
  56. }
  57. while(enif_get_list_cell(env, opts, &val, &opts)) {
  58. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  59. e->uescape = 1;
  60. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  61. e->pretty = 1;
  62. } else {
  63. return 0;
  64. }
  65. }
  66. e->iolen = 0;
  67. e->iolist = enif_make_list(env, 0);
  68. e->curr = bin;
  69. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  70. return 0;
  71. }
  72. memset(e->curr->data, 0, e->curr->size);
  73. e->p = (char*) e->curr->data;
  74. e->u = (unsigned char*) e->curr->data;
  75. e->i = 0;
  76. return 1;
  77. }
  78. void
  79. enc_destroy(Encoder* e)
  80. {
  81. if(e->curr != NULL) {
  82. enif_release_binary(e->curr);
  83. }
  84. }
  85. ERL_NIF_TERM
  86. enc_error(Encoder* e, const char* msg)
  87. {
  88. //assert(0 && msg);
  89. return make_error(e->atoms, e->env, msg);
  90. }
  91. static inline int
  92. enc_ensure(Encoder* e, size_t req)
  93. {
  94. size_t need = e->curr->size;
  95. while(req >= (need - e->i)) need <<= 1;
  96. if(need != e->curr->size) {
  97. if(!enif_realloc_binary(e->curr, need)) {
  98. return 0;
  99. }
  100. e->p = (char*) e->curr->data;
  101. e->u = (unsigned char*) e->curr->data;
  102. }
  103. return 1;
  104. }
  105. int
  106. enc_result(Encoder* e, ERL_NIF_TERM* value)
  107. {
  108. if(e->i != e->curr->size) {
  109. if(!enif_realloc_binary(e->curr, e->i)) {
  110. return 0;
  111. }
  112. }
  113. *value = enif_make_binary(e->env, e->curr);
  114. e->curr = NULL;
  115. return 1;
  116. }
  117. int
  118. enc_done(Encoder* e, ERL_NIF_TERM* value)
  119. {
  120. ERL_NIF_TERM last;
  121. if(e->iolen == 0) {
  122. return enc_result(e, value);
  123. }
  124. if(e->i > 0 ) {
  125. if(!enc_result(e, &last)) {
  126. return 0;
  127. }
  128. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  129. e->iolen++;
  130. }
  131. *value = e->iolist;
  132. return 1;
  133. }
  134. static inline int
  135. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  136. {
  137. ErlNifBinary* bin = e->curr;
  138. ERL_NIF_TERM curr;
  139. if(e->i > 0) {
  140. if(!enc_result(e, &curr)) {
  141. return 0;
  142. }
  143. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  144. e->iolen++;
  145. }
  146. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  147. e->iolen++;
  148. // Reinitialize our binary for the next buffer.
  149. e->curr = bin;
  150. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  151. return 0;
  152. }
  153. memset(e->curr->data, 0, e->curr->size);
  154. e->p = (char*) e->curr->data;
  155. e->u = (unsigned char*) e->curr->data;
  156. e->i = 0;
  157. return 1;
  158. }
  159. static inline int
  160. enc_literal(Encoder* e, const char* literal, size_t len)
  161. {
  162. if(!enc_ensure(e, len)) {
  163. return 0;
  164. }
  165. memcpy(&(e->p[e->i]), literal, len);
  166. e->i += len;
  167. e->count++;
  168. return 1;
  169. }
  170. static inline int
  171. enc_string(Encoder* e, ERL_NIF_TERM val)
  172. {
  173. ErlNifBinary bin;
  174. char atom[512];
  175. unsigned char* data;
  176. size_t size;
  177. int esc_extra = 0;
  178. int ulen;
  179. int uval;
  180. int i;
  181. if(enif_is_binary(e->env, val)) {
  182. if(!enif_inspect_binary(e->env, val, &bin)) {
  183. return 0;
  184. }
  185. data = bin.data;
  186. size = bin.size;
  187. } else if(enif_is_atom(e->env, val)) {
  188. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  189. return 0;
  190. }
  191. data = (unsigned char*) atom;
  192. size = strlen(atom);
  193. } else {
  194. return 0;
  195. }
  196. i = 0;
  197. while(i < size) {
  198. switch((char) data[i]) {
  199. case '\"':
  200. case '\\':
  201. case '/':
  202. case '\b':
  203. case '\f':
  204. case '\n':
  205. case '\r':
  206. case '\t':
  207. esc_extra += 1;
  208. i++;
  209. continue;
  210. default:
  211. if(data[i] < 0x20) {
  212. esc_extra += 5;
  213. i++;
  214. continue;
  215. } else if(data[i] < 0x80) {
  216. i++;
  217. continue;
  218. }
  219. ulen = utf8_validate(&(data[i]), size - i);
  220. if(ulen < 0) {
  221. return 0;
  222. }
  223. if(e->uescape) {
  224. uval = utf8_to_unicode(&(data[i]), ulen);
  225. if(uval < 0) {
  226. return 0;
  227. }
  228. esc_extra = utf8_esc_len(uval);
  229. if(ulen < 0) {
  230. return 0;
  231. }
  232. }
  233. i += ulen;
  234. }
  235. }
  236. if(!enc_ensure(e, size + esc_extra + 2)) {
  237. return 0;
  238. }
  239. e->p[e->i++] = '\"';
  240. i = 0;
  241. while(i < size) {
  242. switch((char) data[i]) {
  243. case '\"':
  244. case '\\':
  245. case '/':
  246. e->p[e->i++] = '\\';
  247. e->u[e->i++] = data[i];
  248. i++;
  249. continue;
  250. case '\b':
  251. e->p[e->i++] = '\\';
  252. e->p[e->i++] = 'b';
  253. i++;
  254. continue;
  255. case '\f':
  256. e->p[e->i++] = '\\';
  257. e->p[e->i++] = 'f';
  258. i++;
  259. continue;
  260. case '\n':
  261. e->p[e->i++] = '\\';
  262. e->p[e->i++] = 'n';
  263. i++;
  264. continue;
  265. case '\r':
  266. e->p[e->i++] = '\\';
  267. e->p[e->i++] = 'r';
  268. i++;
  269. continue;
  270. case '\t':
  271. e->p[e->i++] = '\\';
  272. e->p[e->i++] = 't';
  273. i++;
  274. continue;
  275. default:
  276. if(data[i] < 0x20) {
  277. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  278. if(ulen < 0) {
  279. return 0;
  280. }
  281. e->i += ulen;
  282. i++;
  283. } else if((data[i] & 0x80) && e->uescape) {
  284. uval = utf8_to_unicode(&(data[i]), size-i);
  285. if(uval < 0) {
  286. return 0;
  287. }
  288. ulen = unicode_uescape(uval, &(e->p[e->i]));
  289. if(ulen < 0) {
  290. return 0;
  291. }
  292. e->i += ulen;
  293. ulen = utf8_len(uval);
  294. if(ulen < 0) {
  295. return 0;
  296. }
  297. i += ulen;
  298. } else {
  299. e->u[e->i++] = data[i++];
  300. }
  301. }
  302. }
  303. e->p[e->i++] = '\"';
  304. e->count++;
  305. return 1;
  306. }
  307. static inline int
  308. enc_long(Encoder* e, ErlNifSInt64 val)
  309. {
  310. if(!enc_ensure(e, 32)) {
  311. return 0;
  312. }
  313. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  314. snprintf(&(e->p[e->i]), 32, "%ld", val);
  315. #elif SIZEOF_LONG == 8
  316. snprintf(&(e->p[e->i]), 32, "%ld", val);
  317. #else
  318. snprintf(&(e->p[e->i]), 32, "%lld", val);
  319. #endif
  320. e->i += strlen(&(e->p[e->i]));
  321. e->count++;
  322. return 1;
  323. }
  324. static inline int
  325. enc_double(Encoder* e, double val)
  326. {
  327. char* start;
  328. size_t len;
  329. size_t i;
  330. if(!enc_ensure(e, 32)) {
  331. return 0;
  332. }
  333. start = &(e->p[e->i]);
  334. sprintf(start, "%0.20g", val);
  335. len = strlen(start);
  336. // Check if we have a decimal point
  337. for(i = 0; i < len; i++) {
  338. if(start[i] == '.' || start[i] == 'e' || start[i] == 'E')
  339. goto done;
  340. }
  341. if(len > 29) return 0;
  342. // Force a decimal point
  343. start[len++] = '.';
  344. start[len++] = '0';
  345. done:
  346. e->i += len;
  347. e->count++;
  348. return 1;
  349. }
  350. static inline int
  351. enc_char(Encoder* e, char c)
  352. {
  353. if(!enc_ensure(e, 1)) {
  354. return 0;
  355. }
  356. e->p[e->i++] = c;
  357. return 1;
  358. }
  359. static int
  360. enc_shift(Encoder* e) {
  361. int i;
  362. char* shift;
  363. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  364. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  365. if(!enc_literal(e, shift + 1, *shift))
  366. return 0;
  367. // Finish the rest of this shift it's it bigger than
  368. // our largest predefined constant.
  369. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  370. if(!enc_literal(e, " ", 2))
  371. return 0;
  372. }
  373. return 1;
  374. }
  375. static inline int
  376. enc_start_object(Encoder* e)
  377. {
  378. e->count++;
  379. e->shiftcnt++;
  380. if(!enc_char(e, '{'))
  381. return 0;
  382. MAYBE_PRETTY(e);
  383. return 1;
  384. }
  385. static inline int
  386. enc_end_object(Encoder* e)
  387. {
  388. e->shiftcnt--;
  389. MAYBE_PRETTY(e);
  390. return enc_char(e, '}');
  391. }
  392. static inline int
  393. enc_start_array(Encoder* e)
  394. {
  395. e->count++;
  396. e->shiftcnt++;
  397. if(!enc_char(e, '['))
  398. return 0;
  399. MAYBE_PRETTY(e);
  400. return 1;
  401. }
  402. static inline int
  403. enc_end_array(Encoder* e)
  404. {
  405. e->shiftcnt--;
  406. MAYBE_PRETTY(e);
  407. return enc_char(e, ']');
  408. }
  409. static inline int
  410. enc_colon(Encoder* e)
  411. {
  412. if(e->pretty)
  413. return enc_literal(e, " : ", 3);
  414. return enc_char(e, ':');
  415. }
  416. static inline int
  417. enc_comma(Encoder* e)
  418. {
  419. if(!enc_char(e, ','))
  420. return 0;
  421. MAYBE_PRETTY(e);
  422. return 1;
  423. }
  424. ERL_NIF_TERM
  425. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  426. {
  427. Encoder enc;
  428. Encoder* e = &enc;
  429. ErlNifBinary bin;
  430. ERL_NIF_TERM ret;
  431. ERL_NIF_TERM stack;
  432. ERL_NIF_TERM curr;
  433. ERL_NIF_TERM item;
  434. const ERL_NIF_TERM* tuple;
  435. int arity;
  436. ErlNifSInt64 lval;
  437. double dval;
  438. if(argc != 2) {
  439. return enif_make_badarg(env);
  440. }
  441. if(!enc_init(e, env, argv[1], &bin)) {
  442. return enif_make_badarg(env);
  443. }
  444. stack = enif_make_list(env, 1, argv[0]);
  445. while(!enif_is_empty_list(env, stack)) {
  446. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  447. ret = enc_error(e, "internal_error");
  448. goto done;
  449. }
  450. if(enif_is_identical(curr, e->atoms->ref_object)) {
  451. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  452. ret = enc_error(e, "internal_error");
  453. goto done;
  454. }
  455. if(enif_is_empty_list(env, curr)) {
  456. if(!enc_end_object(e)) {
  457. ret = enc_error(e, "internal_error");
  458. goto done;
  459. }
  460. continue;
  461. }
  462. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  463. ret = enc_error(e, "internal_error");
  464. goto done;
  465. }
  466. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  467. ret = enc_error(e, "invalid_object_pair");
  468. goto done;
  469. }
  470. if(arity != 2) {
  471. ret = enc_error(e, "invalid_object_pair");
  472. goto done;
  473. }
  474. if(!enc_comma(e)) {
  475. ret = enc_error(e, "internal_error");
  476. goto done;
  477. }
  478. if(!enc_string(e, tuple[0])) {
  479. ret = enc_error(e, "invalid_object_key");
  480. goto done;
  481. }
  482. if(!enc_colon(e)) {
  483. ret = enc_error(e, "internal_error");
  484. goto done;
  485. }
  486. stack = enif_make_list_cell(env, curr, stack);
  487. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  488. stack = enif_make_list_cell(env, tuple[1], stack);
  489. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  490. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  491. ret = enc_error(e, "internal_error");
  492. goto done;
  493. }
  494. if(enif_is_empty_list(env, curr)) {
  495. if(!enc_end_array(e)) {
  496. ret = enc_error(e, "internal_error");
  497. goto done;
  498. }
  499. continue;
  500. }
  501. if(!enc_comma(e)) {
  502. ret = enc_error(e, "internal_error");
  503. goto done;
  504. }
  505. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  506. ret = enc_error(e, "internal_error");
  507. goto done;
  508. }
  509. stack = enif_make_list_cell(env, curr, stack);
  510. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  511. stack = enif_make_list_cell(env, item, stack);
  512. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  513. if(!enc_literal(e, "null", 4)) {
  514. ret = enc_error(e, "null");
  515. goto done;
  516. }
  517. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  518. if(!enc_literal(e, "true", 4)) {
  519. ret = enc_error(e, "true");
  520. goto done;
  521. }
  522. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  523. if(!enc_literal(e, "false", 5)) {
  524. ret = enc_error(e, "false");
  525. goto done;
  526. }
  527. } else if(enif_is_binary(env, curr)) {
  528. if(!enc_string(e, curr)) {
  529. ret = enc_error(e, "invalid_string");
  530. goto done;
  531. }
  532. } else if(enif_is_atom(env, curr)) {
  533. if(!enc_string(e, curr)) {
  534. ret = enc_error(e, "invalid_string");
  535. goto done;
  536. }
  537. } else if(enif_get_int64(env, curr, &lval)) {
  538. if(!enc_long(e, lval)) {
  539. ret = enc_error(e, "internal_error");
  540. goto done;
  541. }
  542. } else if(enif_get_double(env, curr, &dval)) {
  543. if(!enc_double(e, dval)) {
  544. ret = enc_error(e, "internal_error");
  545. goto done;
  546. }
  547. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  548. if(arity != 1) {
  549. ret = enc_error(e, "invalid_ejson");
  550. goto done;
  551. }
  552. if(!enif_is_list(env, tuple[0])) {
  553. ret = enc_error(e, "invalid_object");
  554. goto done;
  555. }
  556. if(!enc_start_object(e)) {
  557. ret = enc_error(e, "internal_error");
  558. goto done;
  559. }
  560. if(enif_is_empty_list(env, tuple[0])) {
  561. if(!enc_end_object(e)) {
  562. ret = enc_error(e, "internal_error");
  563. goto done;
  564. }
  565. continue;
  566. }
  567. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  568. ret = enc_error(e, "internal_error");
  569. goto done;
  570. }
  571. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  572. ret = enc_error(e, "invalid_object_member");
  573. goto done;
  574. }
  575. if(arity != 2) {
  576. ret = enc_error(e, "invalid_object_member_arity");
  577. goto done;
  578. }
  579. if(!enc_string(e, tuple[0])) {
  580. ret = enc_error(e, "invalid_object_member_key");
  581. goto done;
  582. }
  583. if(!enc_colon(e)) {
  584. ret = enc_error(e, "internal_error");
  585. goto done;
  586. }
  587. stack = enif_make_list_cell(env, curr, stack);
  588. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  589. stack = enif_make_list_cell(env, tuple[1], stack);
  590. } else if(enif_is_list(env, curr)) {
  591. if(!enc_start_array(e)) {
  592. ret = enc_error(e, "internal_error");
  593. goto done;
  594. }
  595. if(enif_is_empty_list(env, curr)) {
  596. if(!enc_end_array(e)) {
  597. ret = enc_error(e, "internal_error");
  598. goto done;
  599. }
  600. continue;
  601. }
  602. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  603. ret = enc_error(e, "internal_error");
  604. goto done;
  605. }
  606. stack = enif_make_list_cell(env, curr, stack);
  607. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  608. stack = enif_make_list_cell(env, item, stack);
  609. } else {
  610. if(!enc_unknown(e, curr)) {
  611. ret = enc_error(e, "internal_error");
  612. goto done;
  613. }
  614. }
  615. } while(!enif_is_empty_list(env, stack));
  616. if(!enc_done(e, &item)) {
  617. ret = enc_error(e, "internal_error");
  618. goto done;
  619. }
  620. if(e->iolen == 0) {
  621. ret = item;
  622. } else {
  623. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  624. }
  625. done:
  626. enc_destroy(e);
  627. return ret;
  628. }