您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

752 行
19 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  10. #define MAYBE_PRETTY(e) \
  11. do { \
  12. if(e->pretty) { \
  13. if(!enc_shift(e)) \
  14. return 0; \
  15. } \
  16. } while(0)
  17. #if WINDOWS || WIN32
  18. #define inline __inline
  19. #define snprintf _snprintf
  20. #endif
  21. typedef struct {
  22. ErlNifEnv* env;
  23. jiffy_st* atoms;
  24. int uescape;
  25. int pretty;
  26. int shiftcnt;
  27. int count;
  28. int iolen;
  29. ERL_NIF_TERM iolist;
  30. size_t iosize;
  31. ErlNifBinary* curr;
  32. char* p;
  33. unsigned char* u;
  34. size_t i;
  35. int is_resource;
  36. size_t reds;
  37. } Encoder;
  38. // String constants for pretty printing.
  39. // Every string starts with its length.
  40. #define NUM_SHIFTS 8
  41. static char* shifts[NUM_SHIFTS] = {
  42. "\x01\n",
  43. "\x03\n ",
  44. "\x05\n ",
  45. "\x07\n ",
  46. "\x09\n ",
  47. "\x0b\n ",
  48. "\x0d\n ",
  49. "\x0f\n "
  50. };
  51. int
  52. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts)
  53. {
  54. ERL_NIF_TERM val;
  55. e->env = env;
  56. e->atoms = enif_priv_data(env);
  57. e->uescape = 0;
  58. e->pretty = 0;
  59. e->shiftcnt = 0;
  60. e->count = 0;
  61. e->reds = REDUCTIONS;
  62. if(!enif_is_list(env, opts)) {
  63. return 0;
  64. }
  65. while(enif_get_list_cell(env, opts, &val, &opts)) {
  66. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  67. e->uescape = 1;
  68. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  69. e->pretty = 1;
  70. } else if(enif_compare(val, e->atoms->atom_force_utf8) == 0) {
  71. // Ignore, handled in Erlang
  72. } else if(!get_reductions(env, val, e->atoms, &e->reds)) {
  73. return 0;
  74. }
  75. }
  76. e->iolen = 0;
  77. e->iolist = enif_make_list(env, 0);
  78. e->iosize = 0;
  79. e->curr = enif_alloc(sizeof(ErlNifBinary));
  80. if(!e->curr || !enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  81. return 0;
  82. }
  83. memset(e->curr->data, 0, e->curr->size);
  84. e->p = (char*) e->curr->data;
  85. e->u = (unsigned char*) e->curr->data;
  86. e->i = 0;
  87. e->is_resource = 0;
  88. return 1;
  89. }
  90. void
  91. enc_destroy(ErlNifEnv* env, void* enc)
  92. {
  93. Encoder *e = enc;
  94. if(e->curr != NULL) {
  95. enif_release_binary(e->curr);
  96. }
  97. enif_free(e->curr);
  98. }
  99. ERL_NIF_TERM
  100. enc_error(Encoder* e, const char* msg)
  101. {
  102. //assert(0 && msg);
  103. return make_error(e->atoms, e->env, msg);
  104. }
  105. static inline int
  106. enc_ensure(Encoder* e, size_t req)
  107. {
  108. size_t need = e->curr->size;
  109. while(req >= (need - e->i)) need <<= 1;
  110. if(need != e->curr->size) {
  111. if(!enif_realloc_binary(e->curr, need)) {
  112. return 0;
  113. }
  114. e->p = (char*) e->curr->data;
  115. e->u = (unsigned char*) e->curr->data;
  116. }
  117. return 1;
  118. }
  119. int
  120. enc_result(Encoder* e, ERL_NIF_TERM* value)
  121. {
  122. if(e->i != e->curr->size) {
  123. if(!enif_realloc_binary(e->curr, e->i)) {
  124. return 0;
  125. }
  126. }
  127. *value = enif_make_binary(e->env, e->curr);
  128. e->curr = NULL;
  129. return 1;
  130. }
  131. int
  132. enc_done(Encoder* e, ERL_NIF_TERM* value)
  133. {
  134. ERL_NIF_TERM last;
  135. if(e->iolen == 0) {
  136. return enc_result(e, value);
  137. }
  138. if(e->i > 0 ) {
  139. if(!enc_result(e, &last)) {
  140. return 0;
  141. }
  142. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  143. e->iolen++;
  144. }
  145. *value = e->iolist;
  146. return 1;
  147. }
  148. static inline int
  149. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  150. {
  151. ErlNifBinary* bin = e->curr;
  152. ERL_NIF_TERM curr;
  153. if(e->i > 0) {
  154. if(!enc_result(e, &curr)) {
  155. return 0;
  156. }
  157. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  158. e->iolen++;
  159. }
  160. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  161. e->iolen++;
  162. e->iosize += e->i;
  163. // Reinitialize our binary for the next buffer.
  164. e->curr = bin;
  165. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  166. return 0;
  167. }
  168. memset(e->curr->data, 0, e->curr->size);
  169. e->p = (char*) e->curr->data;
  170. e->u = (unsigned char*) e->curr->data;
  171. e->i = 0;
  172. return 1;
  173. }
  174. static inline int
  175. enc_literal(Encoder* e, const char* literal, size_t len)
  176. {
  177. if(!enc_ensure(e, len)) {
  178. return 0;
  179. }
  180. memcpy(&(e->p[e->i]), literal, len);
  181. e->i += len;
  182. e->count++;
  183. return 1;
  184. }
  185. static inline int
  186. enc_string(Encoder* e, ERL_NIF_TERM val)
  187. {
  188. ErlNifBinary bin;
  189. char atom[512];
  190. unsigned char* data;
  191. size_t size;
  192. int esc_extra = 0;
  193. int ulen;
  194. int uval;
  195. int i;
  196. if(enif_is_binary(e->env, val)) {
  197. if(!enif_inspect_binary(e->env, val, &bin)) {
  198. return 0;
  199. }
  200. data = bin.data;
  201. size = bin.size;
  202. } else if(enif_is_atom(e->env, val)) {
  203. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  204. return 0;
  205. }
  206. data = (unsigned char*) atom;
  207. size = strlen(atom);
  208. } else {
  209. return 0;
  210. }
  211. i = 0;
  212. while(i < size) {
  213. switch((char) data[i]) {
  214. case '\"':
  215. case '\\':
  216. case '\b':
  217. case '\f':
  218. case '\n':
  219. case '\r':
  220. case '\t':
  221. esc_extra += 1;
  222. i++;
  223. continue;
  224. default:
  225. if(data[i] < 0x20) {
  226. esc_extra += 5;
  227. i++;
  228. continue;
  229. } else if(data[i] < 0x80) {
  230. i++;
  231. continue;
  232. }
  233. ulen = utf8_validate(&(data[i]), size - i);
  234. if(ulen < 0) {
  235. return 0;
  236. }
  237. if(e->uescape) {
  238. uval = utf8_to_unicode(&(data[i]), ulen);
  239. if(uval < 0) {
  240. return 0;
  241. }
  242. esc_extra += utf8_esc_len(uval);
  243. if(ulen < 0) {
  244. return 0;
  245. }
  246. }
  247. i += ulen;
  248. }
  249. }
  250. if(!enc_ensure(e, size + esc_extra + 2)) {
  251. return 0;
  252. }
  253. e->p[e->i++] = '\"';
  254. i = 0;
  255. while(i < size) {
  256. switch((char) data[i]) {
  257. case '\"':
  258. case '\\':
  259. e->p[e->i++] = '\\';
  260. e->u[e->i++] = data[i];
  261. i++;
  262. continue;
  263. case '\b':
  264. e->p[e->i++] = '\\';
  265. e->p[e->i++] = 'b';
  266. i++;
  267. continue;
  268. case '\f':
  269. e->p[e->i++] = '\\';
  270. e->p[e->i++] = 'f';
  271. i++;
  272. continue;
  273. case '\n':
  274. e->p[e->i++] = '\\';
  275. e->p[e->i++] = 'n';
  276. i++;
  277. continue;
  278. case '\r':
  279. e->p[e->i++] = '\\';
  280. e->p[e->i++] = 'r';
  281. i++;
  282. continue;
  283. case '\t':
  284. e->p[e->i++] = '\\';
  285. e->p[e->i++] = 't';
  286. i++;
  287. continue;
  288. default:
  289. if(data[i] < 0x20) {
  290. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  291. if(ulen < 0) {
  292. return 0;
  293. }
  294. e->i += ulen;
  295. i++;
  296. } else if((data[i] & 0x80) && e->uescape) {
  297. uval = utf8_to_unicode(&(data[i]), size-i);
  298. if(uval < 0) {
  299. return 0;
  300. }
  301. ulen = unicode_uescape(uval, &(e->p[e->i]));
  302. if(ulen < 0) {
  303. return 0;
  304. }
  305. e->i += ulen;
  306. ulen = utf8_len(uval);
  307. if(ulen < 0) {
  308. return 0;
  309. }
  310. i += ulen;
  311. } else {
  312. e->u[e->i++] = data[i++];
  313. }
  314. }
  315. }
  316. e->p[e->i++] = '\"';
  317. e->count++;
  318. return 1;
  319. }
  320. static inline int
  321. enc_long(Encoder* e, ErlNifSInt64 val)
  322. {
  323. if(!enc_ensure(e, 32)) {
  324. return 0;
  325. }
  326. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  327. snprintf(&(e->p[e->i]), 32, "%ld", val);
  328. #elif SIZEOF_LONG == 8
  329. snprintf(&(e->p[e->i]), 32, "%ld", val);
  330. #else
  331. snprintf(&(e->p[e->i]), 32, "%lld", val);
  332. #endif
  333. e->i += strlen(&(e->p[e->i]));
  334. e->count++;
  335. return 1;
  336. }
  337. static inline int
  338. enc_double(Encoder* e, double val)
  339. {
  340. char* start;
  341. size_t len;
  342. if(!enc_ensure(e, 32)) {
  343. return 0;
  344. }
  345. start = &(e->p[e->i]);
  346. if(!double_to_shortest(start, e->curr->size, &len, val)) {
  347. return 0;
  348. }
  349. e->i += len;
  350. e->count++;
  351. return 1;
  352. }
  353. static inline int
  354. enc_char(Encoder* e, char c)
  355. {
  356. if(!enc_ensure(e, 1)) {
  357. return 0;
  358. }
  359. e->p[e->i++] = c;
  360. return 1;
  361. }
  362. static int
  363. enc_shift(Encoder* e) {
  364. int i;
  365. char* shift;
  366. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  367. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  368. if(!enc_literal(e, shift + 1, *shift))
  369. return 0;
  370. // Finish the rest of this shift it's it bigger than
  371. // our largest predefined constant.
  372. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  373. if(!enc_literal(e, " ", 2))
  374. return 0;
  375. }
  376. return 1;
  377. }
  378. static inline int
  379. enc_start_object(Encoder* e)
  380. {
  381. e->count++;
  382. e->shiftcnt++;
  383. if(!enc_char(e, '{'))
  384. return 0;
  385. MAYBE_PRETTY(e);
  386. return 1;
  387. }
  388. static inline int
  389. enc_end_object(Encoder* e)
  390. {
  391. e->shiftcnt--;
  392. MAYBE_PRETTY(e);
  393. return enc_char(e, '}');
  394. }
  395. static inline int
  396. enc_start_array(Encoder* e)
  397. {
  398. e->count++;
  399. e->shiftcnt++;
  400. if(!enc_char(e, '['))
  401. return 0;
  402. MAYBE_PRETTY(e);
  403. return 1;
  404. }
  405. static inline int
  406. enc_end_array(Encoder* e)
  407. {
  408. e->shiftcnt--;
  409. MAYBE_PRETTY(e);
  410. return enc_char(e, ']');
  411. }
  412. static inline int
  413. enc_colon(Encoder* e)
  414. {
  415. if(e->pretty)
  416. return enc_literal(e, " : ", 3);
  417. return enc_char(e, ':');
  418. }
  419. static inline int
  420. enc_comma(Encoder* e)
  421. {
  422. if(!enc_char(e, ','))
  423. return 0;
  424. MAYBE_PRETTY(e);
  425. return 1;
  426. }
  427. static ERL_NIF_TERM
  428. enc_yield(Encoder* e, ERL_NIF_TERM stack)
  429. {
  430. Encoder* enc = e;
  431. if(!e->is_resource) {
  432. enc = enif_alloc_resource(e->atoms->res_encoder, sizeof(Encoder));
  433. *enc = *e;
  434. enc->is_resource = 1;
  435. }
  436. ERL_NIF_TERM val = enif_make_resource(e->env, enc);
  437. return enif_make_tuple4(e->env, e->atoms->atom_partial, val, stack, e->iolist);
  438. }
  439. ERL_NIF_TERM
  440. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  441. {
  442. Encoder enc;
  443. Encoder* e = &enc;
  444. ERL_NIF_TERM ret;
  445. ERL_NIF_TERM stack;
  446. ERL_NIF_TERM curr;
  447. ERL_NIF_TERM item;
  448. const ERL_NIF_TERM* tuple;
  449. int arity;
  450. ErlNifSInt64 lval;
  451. double dval;
  452. if(argc != 2) {
  453. return enif_make_badarg(env);
  454. }
  455. jiffy_st *priv = enif_priv_data(env);
  456. if(!enif_get_resource(env, argv[0], priv->res_encoder, (void **) &e)) {
  457. if(!enc_init(e, env, argv[1])) {
  458. return enif_make_badarg(env);
  459. }
  460. stack = enif_make_list(env, 1, argv[0]);
  461. } else {
  462. int arity;
  463. ERL_NIF_TERM* args;
  464. if(!enif_get_tuple(env, argv[1], &arity, (const ERL_NIF_TERM **) &args)) {
  465. return enif_make_badarg(env);
  466. } else if(arity != 2) {
  467. return enif_make_badarg(env);
  468. }
  469. stack = args[0];
  470. e->iolist = args[1];
  471. e->env = env;
  472. }
  473. size_t processed = e->iosize + e->i;
  474. while(!enif_is_empty_list(env, stack)) {
  475. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  476. ret = enc_error(e, "internal_error");
  477. goto done;
  478. }
  479. if(enif_is_identical(curr, e->atoms->ref_object)) {
  480. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  481. ret = enc_error(e, "internal_error");
  482. goto done;
  483. }
  484. if(enif_is_empty_list(env, curr)) {
  485. if(!enc_end_object(e)) {
  486. ret = enc_error(e, "internal_error");
  487. goto done;
  488. }
  489. continue;
  490. }
  491. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  492. ret = enc_error(e, "internal_error");
  493. goto done;
  494. }
  495. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  496. ret = enc_error(e, "invalid_object_pair");
  497. goto done;
  498. }
  499. if(arity != 2) {
  500. ret = enc_error(e, "invalid_object_pair");
  501. goto done;
  502. }
  503. if(!enc_comma(e)) {
  504. ret = enc_error(e, "internal_error");
  505. goto done;
  506. }
  507. if(!enc_string(e, tuple[0])) {
  508. ret = enc_error(e, "invalid_object_key");
  509. goto done;
  510. }
  511. if(!enc_colon(e)) {
  512. ret = enc_error(e, "internal_error");
  513. goto done;
  514. }
  515. stack = enif_make_list_cell(env, curr, stack);
  516. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  517. stack = enif_make_list_cell(env, tuple[1], stack);
  518. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  519. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  520. ret = enc_error(e, "internal_error");
  521. goto done;
  522. }
  523. if(enif_is_empty_list(env, curr)) {
  524. if(!enc_end_array(e)) {
  525. ret = enc_error(e, "internal_error");
  526. goto done;
  527. }
  528. continue;
  529. }
  530. if(!enc_comma(e)) {
  531. ret = enc_error(e, "internal_error");
  532. goto done;
  533. }
  534. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  535. ret = enc_error(e, "internal_error");
  536. goto done;
  537. }
  538. stack = enif_make_list_cell(env, curr, stack);
  539. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  540. stack = enif_make_list_cell(env, item, stack);
  541. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  542. if(!enc_literal(e, "null", 4)) {
  543. ret = enc_error(e, "null");
  544. goto done;
  545. }
  546. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  547. if(!enc_literal(e, "true", 4)) {
  548. ret = enc_error(e, "true");
  549. goto done;
  550. }
  551. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  552. if(!enc_literal(e, "false", 5)) {
  553. ret = enc_error(e, "false");
  554. goto done;
  555. }
  556. } else if(enif_is_binary(env, curr)) {
  557. if(!enc_string(e, curr)) {
  558. ret = enc_error(e, "invalid_string");
  559. goto done;
  560. }
  561. } else if(enif_is_atom(env, curr)) {
  562. if(!enc_string(e, curr)) {
  563. ret = enc_error(e, "invalid_string");
  564. goto done;
  565. }
  566. } else if(enif_get_int64(env, curr, &lval)) {
  567. if(!enc_long(e, lval)) {
  568. ret = enc_error(e, "internal_error");
  569. goto done;
  570. }
  571. } else if(enif_get_double(env, curr, &dval)) {
  572. if(!enc_double(e, dval)) {
  573. ret = enc_error(e, "internal_error");
  574. goto done;
  575. }
  576. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  577. if(arity != 1) {
  578. ret = enc_error(e, "invalid_ejson");
  579. goto done;
  580. }
  581. if(!enif_is_list(env, tuple[0])) {
  582. ret = enc_error(e, "invalid_object");
  583. goto done;
  584. }
  585. if(!enc_start_object(e)) {
  586. ret = enc_error(e, "internal_error");
  587. goto done;
  588. }
  589. if(enif_is_empty_list(env, tuple[0])) {
  590. if(!enc_end_object(e)) {
  591. ret = enc_error(e, "internal_error");
  592. goto done;
  593. }
  594. continue;
  595. }
  596. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  597. ret = enc_error(e, "internal_error");
  598. goto done;
  599. }
  600. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  601. ret = enc_error(e, "invalid_object_member");
  602. goto done;
  603. }
  604. if(arity != 2) {
  605. ret = enc_error(e, "invalid_object_member_arity");
  606. goto done;
  607. }
  608. if(!enc_string(e, tuple[0])) {
  609. ret = enc_error(e, "invalid_object_member_key");
  610. goto done;
  611. }
  612. if(!enc_colon(e)) {
  613. ret = enc_error(e, "internal_error");
  614. goto done;
  615. }
  616. stack = enif_make_list_cell(env, curr, stack);
  617. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  618. stack = enif_make_list_cell(env, tuple[1], stack);
  619. } else if(enif_is_list(env, curr)) {
  620. if(!enc_start_array(e)) {
  621. ret = enc_error(e, "internal_error");
  622. goto done;
  623. }
  624. if(enif_is_empty_list(env, curr)) {
  625. if(!enc_end_array(e)) {
  626. ret = enc_error(e, "internal_error");
  627. goto done;
  628. }
  629. continue;
  630. }
  631. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  632. ret = enc_error(e, "internal_error");
  633. goto done;
  634. }
  635. stack = enif_make_list_cell(env, curr, stack);
  636. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  637. stack = enif_make_list_cell(env, item, stack);
  638. } else {
  639. if(!enc_unknown(e, curr)) {
  640. ret = enc_error(e, "internal_error");
  641. goto done;
  642. }
  643. }
  644. if(jiffy_consume_timeslice(env, e->reds, e->iosize + e->i, &processed)) {
  645. return enc_yield(e, stack);
  646. }
  647. }
  648. if(!enc_done(e, &item)) {
  649. ret = enc_error(e, "internal_error");
  650. goto done;
  651. }
  652. if(e->iolen == 0) {
  653. ret = item;
  654. } else {
  655. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  656. }
  657. done:
  658. jiffy_consume_timeslice(env, e->reds, e->i, &processed);
  659. enc_destroy(env, e);
  660. return ret;
  661. }