Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

724 Zeilen
18 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  10. #define MAYBE_PRETTY(e) \
  11. do { \
  12. if(e->pretty) { \
  13. if(!enc_shift(e)) \
  14. return 0; \
  15. } \
  16. } while(0)
  17. #if WINDOWS || WIN32
  18. #define inline __inline
  19. #define snprintf _snprintf
  20. #endif
  21. typedef struct {
  22. ErlNifEnv* env;
  23. jiffy_st* atoms;
  24. int uescape;
  25. int pretty;
  26. int shiftcnt;
  27. int count;
  28. int iolen;
  29. ERL_NIF_TERM iolist;
  30. ErlNifBinary* curr;
  31. char* p;
  32. unsigned char* u;
  33. size_t i;
  34. } Encoder;
  35. // String constants for pretty printing.
  36. // Every string starts with its length.
  37. #define NUM_SHIFTS 8
  38. static char* shifts[NUM_SHIFTS] = {
  39. "\x01\n",
  40. "\x03\n ",
  41. "\x05\n ",
  42. "\x07\n ",
  43. "\x09\n ",
  44. "\x0b\n ",
  45. "\x0d\n ",
  46. "\x0f\n "
  47. };
  48. int
  49. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin)
  50. {
  51. ERL_NIF_TERM val;
  52. e->env = env;
  53. e->atoms = enif_priv_data(env);
  54. e->uescape = 0;
  55. e->pretty = 0;
  56. e->shiftcnt = 0;
  57. e->count = 0;
  58. if(!enif_is_list(env, opts)) {
  59. return 0;
  60. }
  61. while(enif_get_list_cell(env, opts, &val, &opts)) {
  62. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  63. e->uescape = 1;
  64. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  65. e->pretty = 1;
  66. } else if(enif_compare(val, e->atoms->atom_force_utf8) == 0) {
  67. // Ignore, handled in Erlang
  68. } else {
  69. return 0;
  70. }
  71. }
  72. e->iolen = 0;
  73. e->iolist = enif_make_list(env, 0);
  74. e->curr = bin;
  75. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  76. return 0;
  77. }
  78. memset(e->curr->data, 0, e->curr->size);
  79. e->p = (char*) e->curr->data;
  80. e->u = (unsigned char*) e->curr->data;
  81. e->i = 0;
  82. return 1;
  83. }
  84. void
  85. enc_destroy(Encoder* e)
  86. {
  87. if(e->curr != NULL) {
  88. enif_release_binary(e->curr);
  89. }
  90. }
  91. ERL_NIF_TERM
  92. enc_error(Encoder* e, const char* msg)
  93. {
  94. //assert(0 && msg);
  95. return make_error(e->atoms, e->env, msg);
  96. }
  97. static inline int
  98. enc_ensure(Encoder* e, size_t req)
  99. {
  100. size_t need = e->curr->size;
  101. while(req >= (need - e->i)) need <<= 1;
  102. if(need != e->curr->size) {
  103. if(!enif_realloc_binary(e->curr, need)) {
  104. return 0;
  105. }
  106. e->p = (char*) e->curr->data;
  107. e->u = (unsigned char*) e->curr->data;
  108. }
  109. return 1;
  110. }
  111. int
  112. enc_result(Encoder* e, ERL_NIF_TERM* value)
  113. {
  114. if(e->i != e->curr->size) {
  115. if(!enif_realloc_binary(e->curr, e->i)) {
  116. return 0;
  117. }
  118. }
  119. *value = enif_make_binary(e->env, e->curr);
  120. e->curr = NULL;
  121. return 1;
  122. }
  123. int
  124. enc_done(Encoder* e, ERL_NIF_TERM* value)
  125. {
  126. ERL_NIF_TERM last;
  127. if(e->iolen == 0) {
  128. return enc_result(e, value);
  129. }
  130. if(e->i > 0 ) {
  131. if(!enc_result(e, &last)) {
  132. return 0;
  133. }
  134. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  135. e->iolen++;
  136. }
  137. *value = e->iolist;
  138. return 1;
  139. }
  140. static inline int
  141. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  142. {
  143. ErlNifBinary* bin = e->curr;
  144. ERL_NIF_TERM curr;
  145. if(e->i > 0) {
  146. if(!enc_result(e, &curr)) {
  147. return 0;
  148. }
  149. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  150. e->iolen++;
  151. }
  152. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  153. e->iolen++;
  154. // Reinitialize our binary for the next buffer.
  155. e->curr = bin;
  156. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  157. return 0;
  158. }
  159. memset(e->curr->data, 0, e->curr->size);
  160. e->p = (char*) e->curr->data;
  161. e->u = (unsigned char*) e->curr->data;
  162. e->i = 0;
  163. return 1;
  164. }
  165. static inline int
  166. enc_literal(Encoder* e, const char* literal, size_t len)
  167. {
  168. if(!enc_ensure(e, len)) {
  169. return 0;
  170. }
  171. memcpy(&(e->p[e->i]), literal, len);
  172. e->i += len;
  173. e->count++;
  174. return 1;
  175. }
  176. static inline int
  177. enc_string(Encoder* e, ERL_NIF_TERM val)
  178. {
  179. ErlNifBinary bin;
  180. char atom[512];
  181. unsigned char* data;
  182. size_t size;
  183. int esc_extra = 0;
  184. int ulen;
  185. int uval;
  186. int i;
  187. if(enif_is_binary(e->env, val)) {
  188. if(!enif_inspect_binary(e->env, val, &bin)) {
  189. return 0;
  190. }
  191. data = bin.data;
  192. size = bin.size;
  193. } else if(enif_is_atom(e->env, val)) {
  194. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  195. return 0;
  196. }
  197. data = (unsigned char*) atom;
  198. size = strlen(atom);
  199. } else {
  200. return 0;
  201. }
  202. i = 0;
  203. while(i < size) {
  204. switch((char) data[i]) {
  205. case '\"':
  206. case '\\':
  207. case '/':
  208. case '\b':
  209. case '\f':
  210. case '\n':
  211. case '\r':
  212. case '\t':
  213. esc_extra += 1;
  214. i++;
  215. continue;
  216. default:
  217. if(data[i] < 0x20) {
  218. esc_extra += 5;
  219. i++;
  220. continue;
  221. } else if(data[i] < 0x80) {
  222. i++;
  223. continue;
  224. }
  225. ulen = utf8_validate(&(data[i]), size - i);
  226. if(ulen < 0) {
  227. return 0;
  228. }
  229. if(e->uescape) {
  230. uval = utf8_to_unicode(&(data[i]), ulen);
  231. if(uval < 0) {
  232. return 0;
  233. }
  234. esc_extra += utf8_esc_len(uval);
  235. if(ulen < 0) {
  236. return 0;
  237. }
  238. }
  239. i += ulen;
  240. }
  241. }
  242. if(!enc_ensure(e, size + esc_extra + 2)) {
  243. return 0;
  244. }
  245. e->p[e->i++] = '\"';
  246. i = 0;
  247. while(i < size) {
  248. switch((char) data[i]) {
  249. case '\"':
  250. case '\\':
  251. case '/':
  252. e->p[e->i++] = '\\';
  253. e->u[e->i++] = data[i];
  254. i++;
  255. continue;
  256. case '\b':
  257. e->p[e->i++] = '\\';
  258. e->p[e->i++] = 'b';
  259. i++;
  260. continue;
  261. case '\f':
  262. e->p[e->i++] = '\\';
  263. e->p[e->i++] = 'f';
  264. i++;
  265. continue;
  266. case '\n':
  267. e->p[e->i++] = '\\';
  268. e->p[e->i++] = 'n';
  269. i++;
  270. continue;
  271. case '\r':
  272. e->p[e->i++] = '\\';
  273. e->p[e->i++] = 'r';
  274. i++;
  275. continue;
  276. case '\t':
  277. e->p[e->i++] = '\\';
  278. e->p[e->i++] = 't';
  279. i++;
  280. continue;
  281. default:
  282. if(data[i] < 0x20) {
  283. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  284. if(ulen < 0) {
  285. return 0;
  286. }
  287. e->i += ulen;
  288. i++;
  289. } else if((data[i] & 0x80) && e->uescape) {
  290. uval = utf8_to_unicode(&(data[i]), size-i);
  291. if(uval < 0) {
  292. return 0;
  293. }
  294. ulen = unicode_uescape(uval, &(e->p[e->i]));
  295. if(ulen < 0) {
  296. return 0;
  297. }
  298. e->i += ulen;
  299. ulen = utf8_len(uval);
  300. if(ulen < 0) {
  301. return 0;
  302. }
  303. i += ulen;
  304. } else {
  305. e->u[e->i++] = data[i++];
  306. }
  307. }
  308. }
  309. e->p[e->i++] = '\"';
  310. e->count++;
  311. return 1;
  312. }
  313. static inline int
  314. enc_long(Encoder* e, ErlNifSInt64 val)
  315. {
  316. if(!enc_ensure(e, 32)) {
  317. return 0;
  318. }
  319. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  320. snprintf(&(e->p[e->i]), 32, "%ld", val);
  321. #elif SIZEOF_LONG == 8
  322. snprintf(&(e->p[e->i]), 32, "%ld", val);
  323. #else
  324. snprintf(&(e->p[e->i]), 32, "%lld", val);
  325. #endif
  326. e->i += strlen(&(e->p[e->i]));
  327. e->count++;
  328. return 1;
  329. }
  330. static inline int
  331. enc_double(Encoder* e, double val)
  332. {
  333. char* start;
  334. size_t len;
  335. size_t i;
  336. if(!enc_ensure(e, 32)) {
  337. return 0;
  338. }
  339. start = &(e->p[e->i]);
  340. sprintf(start, "%0.20g", val);
  341. len = strlen(start);
  342. // Check if we have a decimal point
  343. for(i = 0; i < len; i++) {
  344. if(start[i] == '.' || start[i] == 'e' || start[i] == 'E')
  345. goto done;
  346. }
  347. if(len > 29) return 0;
  348. // Force a decimal point
  349. start[len++] = '.';
  350. start[len++] = '0';
  351. done:
  352. e->i += len;
  353. e->count++;
  354. return 1;
  355. }
  356. static inline int
  357. enc_char(Encoder* e, char c)
  358. {
  359. if(!enc_ensure(e, 1)) {
  360. return 0;
  361. }
  362. e->p[e->i++] = c;
  363. return 1;
  364. }
  365. static int
  366. enc_shift(Encoder* e) {
  367. int i;
  368. char* shift;
  369. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  370. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  371. if(!enc_literal(e, shift + 1, *shift))
  372. return 0;
  373. // Finish the rest of this shift it's it bigger than
  374. // our largest predefined constant.
  375. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  376. if(!enc_literal(e, " ", 2))
  377. return 0;
  378. }
  379. return 1;
  380. }
  381. static inline int
  382. enc_start_object(Encoder* e)
  383. {
  384. e->count++;
  385. e->shiftcnt++;
  386. if(!enc_char(e, '{'))
  387. return 0;
  388. MAYBE_PRETTY(e);
  389. return 1;
  390. }
  391. static inline int
  392. enc_end_object(Encoder* e)
  393. {
  394. e->shiftcnt--;
  395. MAYBE_PRETTY(e);
  396. return enc_char(e, '}');
  397. }
  398. static inline int
  399. enc_start_array(Encoder* e)
  400. {
  401. e->count++;
  402. e->shiftcnt++;
  403. if(!enc_char(e, '['))
  404. return 0;
  405. MAYBE_PRETTY(e);
  406. return 1;
  407. }
  408. static inline int
  409. enc_end_array(Encoder* e)
  410. {
  411. e->shiftcnt--;
  412. MAYBE_PRETTY(e);
  413. return enc_char(e, ']');
  414. }
  415. static inline int
  416. enc_colon(Encoder* e)
  417. {
  418. if(e->pretty)
  419. return enc_literal(e, " : ", 3);
  420. return enc_char(e, ':');
  421. }
  422. static inline int
  423. enc_comma(Encoder* e)
  424. {
  425. if(!enc_char(e, ','))
  426. return 0;
  427. MAYBE_PRETTY(e);
  428. return 1;
  429. }
  430. ERL_NIF_TERM
  431. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  432. {
  433. Encoder enc;
  434. Encoder* e = &enc;
  435. ErlNifBinary bin;
  436. ERL_NIF_TERM ret;
  437. ERL_NIF_TERM stack;
  438. ERL_NIF_TERM curr;
  439. ERL_NIF_TERM item;
  440. const ERL_NIF_TERM* tuple;
  441. int arity;
  442. ErlNifSInt64 lval;
  443. double dval;
  444. if(argc != 2) {
  445. return enif_make_badarg(env);
  446. }
  447. if(!enc_init(e, env, argv[1], &bin)) {
  448. return enif_make_badarg(env);
  449. }
  450. stack = enif_make_list(env, 1, argv[0]);
  451. while(!enif_is_empty_list(env, stack)) {
  452. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  453. ret = enc_error(e, "internal_error");
  454. goto done;
  455. }
  456. if(enif_is_identical(curr, e->atoms->ref_object)) {
  457. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  458. ret = enc_error(e, "internal_error");
  459. goto done;
  460. }
  461. if(enif_is_empty_list(env, curr)) {
  462. if(!enc_end_object(e)) {
  463. ret = enc_error(e, "internal_error");
  464. goto done;
  465. }
  466. continue;
  467. }
  468. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  469. ret = enc_error(e, "internal_error");
  470. goto done;
  471. }
  472. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  473. ret = enc_error(e, "invalid_object_pair");
  474. goto done;
  475. }
  476. if(arity != 2) {
  477. ret = enc_error(e, "invalid_object_pair");
  478. goto done;
  479. }
  480. if(!enc_comma(e)) {
  481. ret = enc_error(e, "internal_error");
  482. goto done;
  483. }
  484. if(!enc_string(e, tuple[0])) {
  485. ret = enc_error(e, "invalid_object_key");
  486. goto done;
  487. }
  488. if(!enc_colon(e)) {
  489. ret = enc_error(e, "internal_error");
  490. goto done;
  491. }
  492. stack = enif_make_list_cell(env, curr, stack);
  493. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  494. stack = enif_make_list_cell(env, tuple[1], stack);
  495. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  496. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  497. ret = enc_error(e, "internal_error");
  498. goto done;
  499. }
  500. if(enif_is_empty_list(env, curr)) {
  501. if(!enc_end_array(e)) {
  502. ret = enc_error(e, "internal_error");
  503. goto done;
  504. }
  505. continue;
  506. }
  507. if(!enc_comma(e)) {
  508. ret = enc_error(e, "internal_error");
  509. goto done;
  510. }
  511. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  512. ret = enc_error(e, "internal_error");
  513. goto done;
  514. }
  515. stack = enif_make_list_cell(env, curr, stack);
  516. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  517. stack = enif_make_list_cell(env, item, stack);
  518. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  519. if(!enc_literal(e, "null", 4)) {
  520. ret = enc_error(e, "null");
  521. goto done;
  522. }
  523. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  524. if(!enc_literal(e, "true", 4)) {
  525. ret = enc_error(e, "true");
  526. goto done;
  527. }
  528. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  529. if(!enc_literal(e, "false", 5)) {
  530. ret = enc_error(e, "false");
  531. goto done;
  532. }
  533. } else if(enif_is_binary(env, curr)) {
  534. if(!enc_string(e, curr)) {
  535. ret = enc_error(e, "invalid_string");
  536. goto done;
  537. }
  538. } else if(enif_is_atom(env, curr)) {
  539. if(!enc_string(e, curr)) {
  540. ret = enc_error(e, "invalid_string");
  541. goto done;
  542. }
  543. } else if(enif_get_int64(env, curr, &lval)) {
  544. if(!enc_long(e, lval)) {
  545. ret = enc_error(e, "internal_error");
  546. goto done;
  547. }
  548. } else if(enif_get_double(env, curr, &dval)) {
  549. if(!enc_double(e, dval)) {
  550. ret = enc_error(e, "internal_error");
  551. goto done;
  552. }
  553. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  554. if(arity != 1) {
  555. ret = enc_error(e, "invalid_ejson");
  556. goto done;
  557. }
  558. if(!enif_is_list(env, tuple[0])) {
  559. ret = enc_error(e, "invalid_object");
  560. goto done;
  561. }
  562. if(!enc_start_object(e)) {
  563. ret = enc_error(e, "internal_error");
  564. goto done;
  565. }
  566. if(enif_is_empty_list(env, tuple[0])) {
  567. if(!enc_end_object(e)) {
  568. ret = enc_error(e, "internal_error");
  569. goto done;
  570. }
  571. continue;
  572. }
  573. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  574. ret = enc_error(e, "internal_error");
  575. goto done;
  576. }
  577. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  578. ret = enc_error(e, "invalid_object_member");
  579. goto done;
  580. }
  581. if(arity != 2) {
  582. ret = enc_error(e, "invalid_object_member_arity");
  583. goto done;
  584. }
  585. if(!enc_string(e, tuple[0])) {
  586. ret = enc_error(e, "invalid_object_member_key");
  587. goto done;
  588. }
  589. if(!enc_colon(e)) {
  590. ret = enc_error(e, "internal_error");
  591. goto done;
  592. }
  593. stack = enif_make_list_cell(env, curr, stack);
  594. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  595. stack = enif_make_list_cell(env, tuple[1], stack);
  596. } else if(enif_is_list(env, curr)) {
  597. if(!enc_start_array(e)) {
  598. ret = enc_error(e, "internal_error");
  599. goto done;
  600. }
  601. if(enif_is_empty_list(env, curr)) {
  602. if(!enc_end_array(e)) {
  603. ret = enc_error(e, "internal_error");
  604. goto done;
  605. }
  606. continue;
  607. }
  608. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  609. ret = enc_error(e, "internal_error");
  610. goto done;
  611. }
  612. stack = enif_make_list_cell(env, curr, stack);
  613. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  614. stack = enif_make_list_cell(env, item, stack);
  615. } else {
  616. if(!enc_unknown(e, curr)) {
  617. ret = enc_error(e, "internal_error");
  618. goto done;
  619. }
  620. }
  621. }
  622. if(!enc_done(e, &item)) {
  623. ret = enc_error(e, "internal_error");
  624. goto done;
  625. }
  626. if(e->iolen == 0) {
  627. ret = item;
  628. } else {
  629. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  630. }
  631. done:
  632. enc_destroy(e);
  633. return ret;
  634. }