You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

732 lines
18 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include <float.h>
  7. #include "erl_nif.h"
  8. #include "jiffy.h"
  9. #define BIN_INC_SIZE 2048
  10. #define FLOAT_BUFLEN (LDBL_DIG*2)
  11. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  12. #define MAYBE_PRETTY(e) \
  13. do { \
  14. if(e->pretty) { \
  15. if(!enc_shift(e)) \
  16. return 0; \
  17. } \
  18. } while(0)
  19. #if WINDOWS || WIN32
  20. #define inline __inline
  21. #define snprintf _snprintf
  22. #endif
  23. typedef struct {
  24. ErlNifEnv* env;
  25. jiffy_st* atoms;
  26. int uescape;
  27. int pretty;
  28. int shiftcnt;
  29. int count;
  30. int iolen;
  31. ERL_NIF_TERM iolist;
  32. ErlNifBinary* curr;
  33. char* p;
  34. unsigned char* u;
  35. size_t i;
  36. } Encoder;
  37. // String constants for pretty printing.
  38. // Every string starts with its length.
  39. #define NUM_SHIFTS 8
  40. static char* shifts[NUM_SHIFTS] = {
  41. "\x01\n",
  42. "\x03\n ",
  43. "\x05\n ",
  44. "\x07\n ",
  45. "\x09\n ",
  46. "\x0b\n ",
  47. "\x0d\n ",
  48. "\x0f\n "
  49. };
  50. int
  51. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin)
  52. {
  53. ERL_NIF_TERM val;
  54. e->env = env;
  55. e->atoms = enif_priv_data(env);
  56. e->uescape = 0;
  57. e->pretty = 0;
  58. e->shiftcnt = 0;
  59. e->count = 0;
  60. if(!enif_is_list(env, opts)) {
  61. return 0;
  62. }
  63. while(enif_get_list_cell(env, opts, &val, &opts)) {
  64. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  65. e->uescape = 1;
  66. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  67. e->pretty = 1;
  68. } else if(enif_compare(val, e->atoms->atom_force_utf8) == 0) {
  69. // Ignore, handled in Erlang
  70. } else {
  71. return 0;
  72. }
  73. }
  74. e->iolen = 0;
  75. e->iolist = enif_make_list(env, 0);
  76. e->curr = bin;
  77. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  78. return 0;
  79. }
  80. memset(e->curr->data, 0, e->curr->size);
  81. e->p = (char*) e->curr->data;
  82. e->u = (unsigned char*) e->curr->data;
  83. e->i = 0;
  84. return 1;
  85. }
  86. void
  87. enc_destroy(Encoder* e)
  88. {
  89. if(e->curr != NULL) {
  90. enif_release_binary(e->curr);
  91. }
  92. }
  93. ERL_NIF_TERM
  94. enc_error(Encoder* e, const char* msg)
  95. {
  96. //assert(0 && msg);
  97. return make_error(e->atoms, e->env, msg);
  98. }
  99. static inline int
  100. enc_ensure(Encoder* e, size_t req)
  101. {
  102. size_t need = e->curr->size;
  103. while(req >= (need - e->i)) need <<= 1;
  104. if(need != e->curr->size) {
  105. if(!enif_realloc_binary(e->curr, need)) {
  106. return 0;
  107. }
  108. e->p = (char*) e->curr->data;
  109. e->u = (unsigned char*) e->curr->data;
  110. }
  111. return 1;
  112. }
  113. int
  114. enc_result(Encoder* e, ERL_NIF_TERM* value)
  115. {
  116. if(e->i != e->curr->size) {
  117. if(!enif_realloc_binary(e->curr, e->i)) {
  118. return 0;
  119. }
  120. }
  121. *value = enif_make_binary(e->env, e->curr);
  122. e->curr = NULL;
  123. return 1;
  124. }
  125. int
  126. enc_done(Encoder* e, ERL_NIF_TERM* value)
  127. {
  128. ERL_NIF_TERM last;
  129. if(e->iolen == 0) {
  130. return enc_result(e, value);
  131. }
  132. if(e->i > 0 ) {
  133. if(!enc_result(e, &last)) {
  134. return 0;
  135. }
  136. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  137. e->iolen++;
  138. }
  139. *value = e->iolist;
  140. return 1;
  141. }
  142. static inline int
  143. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  144. {
  145. ErlNifBinary* bin = e->curr;
  146. ERL_NIF_TERM curr;
  147. if(e->i > 0) {
  148. if(!enc_result(e, &curr)) {
  149. return 0;
  150. }
  151. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  152. e->iolen++;
  153. }
  154. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  155. e->iolen++;
  156. // Reinitialize our binary for the next buffer.
  157. e->curr = bin;
  158. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  159. return 0;
  160. }
  161. memset(e->curr->data, 0, e->curr->size);
  162. e->p = (char*) e->curr->data;
  163. e->u = (unsigned char*) e->curr->data;
  164. e->i = 0;
  165. return 1;
  166. }
  167. static inline int
  168. enc_literal(Encoder* e, const char* literal, size_t len)
  169. {
  170. if(!enc_ensure(e, len)) {
  171. return 0;
  172. }
  173. memcpy(&(e->p[e->i]), literal, len);
  174. e->i += len;
  175. e->count++;
  176. return 1;
  177. }
  178. static inline int
  179. enc_string(Encoder* e, ERL_NIF_TERM val)
  180. {
  181. ErlNifBinary bin;
  182. char atom[512];
  183. unsigned char* data;
  184. size_t size;
  185. int esc_extra = 0;
  186. int ulen;
  187. int uval;
  188. int i;
  189. if(enif_is_binary(e->env, val)) {
  190. if(!enif_inspect_binary(e->env, val, &bin)) {
  191. return 0;
  192. }
  193. data = bin.data;
  194. size = bin.size;
  195. } else if(enif_is_atom(e->env, val)) {
  196. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  197. return 0;
  198. }
  199. data = (unsigned char*) atom;
  200. size = strlen(atom);
  201. } else {
  202. return 0;
  203. }
  204. i = 0;
  205. while(i < size) {
  206. switch((char) data[i]) {
  207. case '\"':
  208. case '\\':
  209. case '/':
  210. case '\b':
  211. case '\f':
  212. case '\n':
  213. case '\r':
  214. case '\t':
  215. esc_extra += 1;
  216. i++;
  217. continue;
  218. default:
  219. if(data[i] < 0x20) {
  220. esc_extra += 5;
  221. i++;
  222. continue;
  223. } else if(data[i] < 0x80) {
  224. i++;
  225. continue;
  226. }
  227. ulen = utf8_validate(&(data[i]), size - i);
  228. if(ulen < 0) {
  229. return 0;
  230. }
  231. if(e->uescape) {
  232. uval = utf8_to_unicode(&(data[i]), ulen);
  233. if(uval < 0) {
  234. return 0;
  235. }
  236. esc_extra += utf8_esc_len(uval);
  237. if(ulen < 0) {
  238. return 0;
  239. }
  240. }
  241. i += ulen;
  242. }
  243. }
  244. if(!enc_ensure(e, size + esc_extra + 2)) {
  245. return 0;
  246. }
  247. e->p[e->i++] = '\"';
  248. i = 0;
  249. while(i < size) {
  250. switch((char) data[i]) {
  251. case '\"':
  252. case '\\':
  253. case '/':
  254. e->p[e->i++] = '\\';
  255. e->u[e->i++] = data[i];
  256. i++;
  257. continue;
  258. case '\b':
  259. e->p[e->i++] = '\\';
  260. e->p[e->i++] = 'b';
  261. i++;
  262. continue;
  263. case '\f':
  264. e->p[e->i++] = '\\';
  265. e->p[e->i++] = 'f';
  266. i++;
  267. continue;
  268. case '\n':
  269. e->p[e->i++] = '\\';
  270. e->p[e->i++] = 'n';
  271. i++;
  272. continue;
  273. case '\r':
  274. e->p[e->i++] = '\\';
  275. e->p[e->i++] = 'r';
  276. i++;
  277. continue;
  278. case '\t':
  279. e->p[e->i++] = '\\';
  280. e->p[e->i++] = 't';
  281. i++;
  282. continue;
  283. default:
  284. if(data[i] < 0x20) {
  285. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  286. if(ulen < 0) {
  287. return 0;
  288. }
  289. e->i += ulen;
  290. i++;
  291. } else if((data[i] & 0x80) && e->uescape) {
  292. uval = utf8_to_unicode(&(data[i]), size-i);
  293. if(uval < 0) {
  294. return 0;
  295. }
  296. ulen = unicode_uescape(uval, &(e->p[e->i]));
  297. if(ulen < 0) {
  298. return 0;
  299. }
  300. e->i += ulen;
  301. ulen = utf8_len(uval);
  302. if(ulen < 0) {
  303. return 0;
  304. }
  305. i += ulen;
  306. } else {
  307. e->u[e->i++] = data[i++];
  308. }
  309. }
  310. }
  311. e->p[e->i++] = '\"';
  312. e->count++;
  313. return 1;
  314. }
  315. static inline int
  316. enc_long(Encoder* e, ErlNifSInt64 val)
  317. {
  318. if(!enc_ensure(e, 32)) {
  319. return 0;
  320. }
  321. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  322. snprintf(&(e->p[e->i]), 32, "%ld", val);
  323. #elif SIZEOF_LONG == 8
  324. snprintf(&(e->p[e->i]), 32, "%ld", val);
  325. #else
  326. snprintf(&(e->p[e->i]), 32, "%lld", val);
  327. #endif
  328. e->i += strlen(&(e->p[e->i]));
  329. e->count++;
  330. return 1;
  331. }
  332. static inline int
  333. enc_double(Encoder* e, double val)
  334. {
  335. char* start;
  336. size_t len;
  337. size_t i;
  338. if(!enc_ensure(e, FLOAT_BUFLEN)) {
  339. return 0;
  340. }
  341. start = &(e->p[e->i]);
  342. // try to encode doubles using the fewest digits possible...
  343. if (snprintf(start, FLOAT_BUFLEN, "%.*g", DBL_DIG, val) > FLT_DIG)
  344. {
  345. // ...fall back to full expansion to be safe
  346. snprintf(start, FLOAT_BUFLEN, "%.*g", LDBL_DIG, val);
  347. }
  348. len = strlen(start);
  349. // Check if we have a decimal point
  350. for(i = 0; i < len; i++) {
  351. if(start[i] == '.' || start[i] == 'e' || start[i] == 'E')
  352. goto done;
  353. }
  354. if(len >= FLOAT_BUFLEN-2) return 0;
  355. // Force a decimal point
  356. start[len++] = '.';
  357. start[len++] = '0';
  358. done:
  359. e->i += len;
  360. e->count++;
  361. return 1;
  362. }
  363. static inline int
  364. enc_char(Encoder* e, char c)
  365. {
  366. if(!enc_ensure(e, 1)) {
  367. return 0;
  368. }
  369. e->p[e->i++] = c;
  370. return 1;
  371. }
  372. static int
  373. enc_shift(Encoder* e) {
  374. int i;
  375. char* shift;
  376. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  377. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  378. if(!enc_literal(e, shift + 1, *shift))
  379. return 0;
  380. // Finish the rest of this shift it's it bigger than
  381. // our largest predefined constant.
  382. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  383. if(!enc_literal(e, " ", 2))
  384. return 0;
  385. }
  386. return 1;
  387. }
  388. static inline int
  389. enc_start_object(Encoder* e)
  390. {
  391. e->count++;
  392. e->shiftcnt++;
  393. if(!enc_char(e, '{'))
  394. return 0;
  395. MAYBE_PRETTY(e);
  396. return 1;
  397. }
  398. static inline int
  399. enc_end_object(Encoder* e)
  400. {
  401. e->shiftcnt--;
  402. MAYBE_PRETTY(e);
  403. return enc_char(e, '}');
  404. }
  405. static inline int
  406. enc_start_array(Encoder* e)
  407. {
  408. e->count++;
  409. e->shiftcnt++;
  410. if(!enc_char(e, '['))
  411. return 0;
  412. MAYBE_PRETTY(e);
  413. return 1;
  414. }
  415. static inline int
  416. enc_end_array(Encoder* e)
  417. {
  418. e->shiftcnt--;
  419. MAYBE_PRETTY(e);
  420. return enc_char(e, ']');
  421. }
  422. static inline int
  423. enc_colon(Encoder* e)
  424. {
  425. if(e->pretty)
  426. return enc_literal(e, " : ", 3);
  427. return enc_char(e, ':');
  428. }
  429. static inline int
  430. enc_comma(Encoder* e)
  431. {
  432. if(!enc_char(e, ','))
  433. return 0;
  434. MAYBE_PRETTY(e);
  435. return 1;
  436. }
  437. ERL_NIF_TERM
  438. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  439. {
  440. Encoder enc;
  441. Encoder* e = &enc;
  442. ErlNifBinary bin;
  443. ERL_NIF_TERM ret;
  444. ERL_NIF_TERM stack;
  445. ERL_NIF_TERM curr;
  446. ERL_NIF_TERM item;
  447. const ERL_NIF_TERM* tuple;
  448. int arity;
  449. ErlNifSInt64 lval;
  450. double dval;
  451. if(argc != 2) {
  452. return enif_make_badarg(env);
  453. }
  454. if(!enc_init(e, env, argv[1], &bin)) {
  455. return enif_make_badarg(env);
  456. }
  457. stack = enif_make_list(env, 1, argv[0]);
  458. while(!enif_is_empty_list(env, stack)) {
  459. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  460. ret = enc_error(e, "internal_error");
  461. goto done;
  462. }
  463. if(enif_is_identical(curr, e->atoms->ref_object)) {
  464. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  465. ret = enc_error(e, "internal_error");
  466. goto done;
  467. }
  468. if(enif_is_empty_list(env, curr)) {
  469. if(!enc_end_object(e)) {
  470. ret = enc_error(e, "internal_error");
  471. goto done;
  472. }
  473. continue;
  474. }
  475. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  476. ret = enc_error(e, "internal_error");
  477. goto done;
  478. }
  479. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  480. ret = enc_error(e, "invalid_object_pair");
  481. goto done;
  482. }
  483. if(arity != 2) {
  484. ret = enc_error(e, "invalid_object_pair");
  485. goto done;
  486. }
  487. if(!enc_comma(e)) {
  488. ret = enc_error(e, "internal_error");
  489. goto done;
  490. }
  491. if(!enc_string(e, tuple[0])) {
  492. ret = enc_error(e, "invalid_object_key");
  493. goto done;
  494. }
  495. if(!enc_colon(e)) {
  496. ret = enc_error(e, "internal_error");
  497. goto done;
  498. }
  499. stack = enif_make_list_cell(env, curr, stack);
  500. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  501. stack = enif_make_list_cell(env, tuple[1], stack);
  502. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  503. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  504. ret = enc_error(e, "internal_error");
  505. goto done;
  506. }
  507. if(enif_is_empty_list(env, curr)) {
  508. if(!enc_end_array(e)) {
  509. ret = enc_error(e, "internal_error");
  510. goto done;
  511. }
  512. continue;
  513. }
  514. if(!enc_comma(e)) {
  515. ret = enc_error(e, "internal_error");
  516. goto done;
  517. }
  518. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  519. ret = enc_error(e, "internal_error");
  520. goto done;
  521. }
  522. stack = enif_make_list_cell(env, curr, stack);
  523. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  524. stack = enif_make_list_cell(env, item, stack);
  525. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  526. if(!enc_literal(e, "null", 4)) {
  527. ret = enc_error(e, "null");
  528. goto done;
  529. }
  530. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  531. if(!enc_literal(e, "true", 4)) {
  532. ret = enc_error(e, "true");
  533. goto done;
  534. }
  535. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  536. if(!enc_literal(e, "false", 5)) {
  537. ret = enc_error(e, "false");
  538. goto done;
  539. }
  540. } else if(enif_is_binary(env, curr)) {
  541. if(!enc_string(e, curr)) {
  542. ret = enc_error(e, "invalid_string");
  543. goto done;
  544. }
  545. } else if(enif_is_atom(env, curr)) {
  546. if(!enc_string(e, curr)) {
  547. ret = enc_error(e, "invalid_string");
  548. goto done;
  549. }
  550. } else if(enif_get_int64(env, curr, &lval)) {
  551. if(!enc_long(e, lval)) {
  552. ret = enc_error(e, "internal_error");
  553. goto done;
  554. }
  555. } else if(enif_get_double(env, curr, &dval)) {
  556. if(!enc_double(e, dval)) {
  557. ret = enc_error(e, "internal_error");
  558. goto done;
  559. }
  560. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  561. if(arity != 1) {
  562. ret = enc_error(e, "invalid_ejson");
  563. goto done;
  564. }
  565. if(!enif_is_list(env, tuple[0])) {
  566. ret = enc_error(e, "invalid_object");
  567. goto done;
  568. }
  569. if(!enc_start_object(e)) {
  570. ret = enc_error(e, "internal_error");
  571. goto done;
  572. }
  573. if(enif_is_empty_list(env, tuple[0])) {
  574. if(!enc_end_object(e)) {
  575. ret = enc_error(e, "internal_error");
  576. goto done;
  577. }
  578. continue;
  579. }
  580. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  581. ret = enc_error(e, "internal_error");
  582. goto done;
  583. }
  584. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  585. ret = enc_error(e, "invalid_object_member");
  586. goto done;
  587. }
  588. if(arity != 2) {
  589. ret = enc_error(e, "invalid_object_member_arity");
  590. goto done;
  591. }
  592. if(!enc_string(e, tuple[0])) {
  593. ret = enc_error(e, "invalid_object_member_key");
  594. goto done;
  595. }
  596. if(!enc_colon(e)) {
  597. ret = enc_error(e, "internal_error");
  598. goto done;
  599. }
  600. stack = enif_make_list_cell(env, curr, stack);
  601. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  602. stack = enif_make_list_cell(env, tuple[1], stack);
  603. } else if(enif_is_list(env, curr)) {
  604. if(!enc_start_array(e)) {
  605. ret = enc_error(e, "internal_error");
  606. goto done;
  607. }
  608. if(enif_is_empty_list(env, curr)) {
  609. if(!enc_end_array(e)) {
  610. ret = enc_error(e, "internal_error");
  611. goto done;
  612. }
  613. continue;
  614. }
  615. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  616. ret = enc_error(e, "internal_error");
  617. goto done;
  618. }
  619. stack = enif_make_list_cell(env, curr, stack);
  620. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  621. stack = enif_make_list_cell(env, item, stack);
  622. } else {
  623. if(!enc_unknown(e, curr)) {
  624. ret = enc_error(e, "internal_error");
  625. goto done;
  626. }
  627. }
  628. } while(!enif_is_empty_list(env, stack));
  629. if(!enc_done(e, &item)) {
  630. ret = enc_error(e, "internal_error");
  631. goto done;
  632. }
  633. if(e->iolen == 0) {
  634. ret = item;
  635. } else {
  636. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  637. }
  638. done:
  639. enc_destroy(e);
  640. return ret;
  641. }