Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

752 строки
19 KiB

13 лет назад
13 лет назад
13 лет назад
13 лет назад
13 лет назад
13 лет назад
13 лет назад
13 лет назад
13 лет назад
13 лет назад
13 лет назад
  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  10. #define MAYBE_PRETTY(e) \
  11. do { \
  12. if(e->pretty) { \
  13. if(!enc_shift(e)) \
  14. return 0; \
  15. } \
  16. } while(0)
  17. #if WINDOWS || WIN32
  18. #define inline __inline
  19. #define snprintf _snprintf
  20. #endif
  21. typedef struct {
  22. ErlNifEnv* env;
  23. jiffy_st* atoms;
  24. int uescape;
  25. int pretty;
  26. int shiftcnt;
  27. int count;
  28. int iolen;
  29. ERL_NIF_TERM iolist;
  30. size_t iosize;
  31. ErlNifBinary* curr;
  32. char* p;
  33. unsigned char* u;
  34. size_t i;
  35. int is_resource;
  36. size_t reds;
  37. } Encoder;
  38. // String constants for pretty printing.
  39. // Every string starts with its length.
  40. #define NUM_SHIFTS 8
  41. static char* shifts[NUM_SHIFTS] = {
  42. "\x01\n",
  43. "\x03\n ",
  44. "\x05\n ",
  45. "\x07\n ",
  46. "\x09\n ",
  47. "\x0b\n ",
  48. "\x0d\n ",
  49. "\x0f\n "
  50. };
  51. int
  52. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts)
  53. {
  54. ERL_NIF_TERM val;
  55. e->env = env;
  56. e->atoms = enif_priv_data(env);
  57. e->uescape = 0;
  58. e->pretty = 0;
  59. e->shiftcnt = 0;
  60. e->count = 0;
  61. e->reds = REDUCTIONS;
  62. if(!enif_is_list(env, opts)) {
  63. return 0;
  64. }
  65. while(enif_get_list_cell(env, opts, &val, &opts)) {
  66. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  67. e->uescape = 1;
  68. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  69. e->pretty = 1;
  70. } else if(enif_compare(val, e->atoms->atom_force_utf8) == 0) {
  71. // Ignore, handled in Erlang
  72. } else if(!get_reductions(env, val, e->atoms, &e->reds)) {
  73. return 0;
  74. }
  75. }
  76. e->iolen = 0;
  77. e->iolist = enif_make_list(env, 0);
  78. e->iosize = 0;
  79. e->curr = enif_alloc(sizeof(ErlNifBinary));
  80. if(!e->curr || !enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  81. return 0;
  82. }
  83. memset(e->curr->data, 0, e->curr->size);
  84. e->p = (char*) e->curr->data;
  85. e->u = (unsigned char*) e->curr->data;
  86. e->i = 0;
  87. e->is_resource = 0;
  88. return 1;
  89. }
  90. void
  91. enc_destroy(ErlNifEnv* env, void* enc)
  92. {
  93. Encoder *e = enc;
  94. if(e->curr != NULL) {
  95. enif_release_binary(e->curr);
  96. }
  97. enif_free(e->curr);
  98. }
  99. ERL_NIF_TERM
  100. enc_error(Encoder* e, const char* msg)
  101. {
  102. //assert(0 && msg);
  103. return make_error(e->atoms, e->env, msg);
  104. }
  105. static inline int
  106. enc_ensure(Encoder* e, size_t req)
  107. {
  108. size_t need = e->curr->size;
  109. while(req >= (need - e->i)) need <<= 1;
  110. if(need != e->curr->size) {
  111. if(!enif_realloc_binary(e->curr, need)) {
  112. return 0;
  113. }
  114. e->p = (char*) e->curr->data;
  115. e->u = (unsigned char*) e->curr->data;
  116. }
  117. return 1;
  118. }
  119. int
  120. enc_result(Encoder* e, ERL_NIF_TERM* value)
  121. {
  122. if(e->i != e->curr->size) {
  123. if(!enif_realloc_binary(e->curr, e->i)) {
  124. return 0;
  125. }
  126. }
  127. *value = enif_make_binary(e->env, e->curr);
  128. e->curr = NULL;
  129. return 1;
  130. }
  131. int
  132. enc_done(Encoder* e, ERL_NIF_TERM* value)
  133. {
  134. ERL_NIF_TERM last;
  135. if(e->iolen == 0) {
  136. return enc_result(e, value);
  137. }
  138. if(e->i > 0 ) {
  139. if(!enc_result(e, &last)) {
  140. return 0;
  141. }
  142. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  143. e->iolen++;
  144. }
  145. *value = e->iolist;
  146. return 1;
  147. }
  148. static inline int
  149. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  150. {
  151. ErlNifBinary* bin = e->curr;
  152. ERL_NIF_TERM curr;
  153. if(e->i > 0) {
  154. if(!enc_result(e, &curr)) {
  155. return 0;
  156. }
  157. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  158. e->iolen++;
  159. }
  160. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  161. e->iolen++;
  162. e->iosize += e->i;
  163. // Reinitialize our binary for the next buffer.
  164. e->curr = bin;
  165. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  166. return 0;
  167. }
  168. memset(e->curr->data, 0, e->curr->size);
  169. e->p = (char*) e->curr->data;
  170. e->u = (unsigned char*) e->curr->data;
  171. e->i = 0;
  172. return 1;
  173. }
  174. static inline int
  175. enc_literal(Encoder* e, const char* literal, size_t len)
  176. {
  177. if(!enc_ensure(e, len)) {
  178. return 0;
  179. }
  180. memcpy(&(e->p[e->i]), literal, len);
  181. e->i += len;
  182. e->count++;
  183. return 1;
  184. }
  185. static inline int
  186. enc_string(Encoder* e, ERL_NIF_TERM val)
  187. {
  188. ErlNifBinary bin;
  189. char atom[512];
  190. unsigned char* data;
  191. size_t size;
  192. int esc_extra = 0;
  193. int ulen;
  194. int uval;
  195. int i;
  196. if(enif_is_binary(e->env, val)) {
  197. if(!enif_inspect_binary(e->env, val, &bin)) {
  198. return 0;
  199. }
  200. data = bin.data;
  201. size = bin.size;
  202. } else if(enif_is_atom(e->env, val)) {
  203. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  204. return 0;
  205. }
  206. data = (unsigned char*) atom;
  207. size = strlen(atom);
  208. } else {
  209. return 0;
  210. }
  211. i = 0;
  212. while(i < size) {
  213. switch((char) data[i]) {
  214. case '\"':
  215. case '\\':
  216. case '\b':
  217. case '\f':
  218. case '\n':
  219. case '\r':
  220. case '\t':
  221. esc_extra += 1;
  222. i++;
  223. continue;
  224. default:
  225. if(data[i] < 0x20) {
  226. esc_extra += 5;
  227. i++;
  228. continue;
  229. } else if(data[i] < 0x80) {
  230. i++;
  231. continue;
  232. }
  233. ulen = utf8_validate(&(data[i]), size - i);
  234. if(ulen < 0) {
  235. return 0;
  236. }
  237. if(e->uescape) {
  238. uval = utf8_to_unicode(&(data[i]), ulen);
  239. if(uval < 0) {
  240. return 0;
  241. }
  242. esc_extra += utf8_esc_len(uval);
  243. if(ulen < 0) {
  244. return 0;
  245. }
  246. }
  247. i += ulen;
  248. }
  249. }
  250. if(!enc_ensure(e, size + esc_extra + 2)) {
  251. return 0;
  252. }
  253. e->p[e->i++] = '\"';
  254. i = 0;
  255. while(i < size) {
  256. switch((char) data[i]) {
  257. case '\"':
  258. case '\\':
  259. e->p[e->i++] = '\\';
  260. e->u[e->i++] = data[i];
  261. i++;
  262. continue;
  263. case '\b':
  264. e->p[e->i++] = '\\';
  265. e->p[e->i++] = 'b';
  266. i++;
  267. continue;
  268. case '\f':
  269. e->p[e->i++] = '\\';
  270. e->p[e->i++] = 'f';
  271. i++;
  272. continue;
  273. case '\n':
  274. e->p[e->i++] = '\\';
  275. e->p[e->i++] = 'n';
  276. i++;
  277. continue;
  278. case '\r':
  279. e->p[e->i++] = '\\';
  280. e->p[e->i++] = 'r';
  281. i++;
  282. continue;
  283. case '\t':
  284. e->p[e->i++] = '\\';
  285. e->p[e->i++] = 't';
  286. i++;
  287. continue;
  288. default:
  289. if(data[i] < 0x20) {
  290. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  291. if(ulen < 0) {
  292. return 0;
  293. }
  294. e->i += ulen;
  295. i++;
  296. } else if((data[i] & 0x80) && e->uescape) {
  297. uval = utf8_to_unicode(&(data[i]), size-i);
  298. if(uval < 0) {
  299. return 0;
  300. }
  301. ulen = unicode_uescape(uval, &(e->p[e->i]));
  302. if(ulen < 0) {
  303. return 0;
  304. }
  305. e->i += ulen;
  306. ulen = utf8_len(uval);
  307. if(ulen < 0) {
  308. return 0;
  309. }
  310. i += ulen;
  311. } else {
  312. e->u[e->i++] = data[i++];
  313. }
  314. }
  315. }
  316. e->p[e->i++] = '\"';
  317. e->count++;
  318. return 1;
  319. }
  320. static inline int
  321. enc_long(Encoder* e, ErlNifSInt64 val)
  322. {
  323. if(!enc_ensure(e, 32)) {
  324. return 0;
  325. }
  326. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  327. snprintf(&(e->p[e->i]), 32, "%ld", val);
  328. #elif SIZEOF_LONG == 8
  329. snprintf(&(e->p[e->i]), 32, "%ld", val);
  330. #else
  331. snprintf(&(e->p[e->i]), 32, "%lld", val);
  332. #endif
  333. e->i += strlen(&(e->p[e->i]));
  334. e->count++;
  335. return 1;
  336. }
  337. static inline int
  338. enc_double(Encoder* e, double val)
  339. {
  340. char* start;
  341. size_t len;
  342. if(!enc_ensure(e, 32)) {
  343. return 0;
  344. }
  345. start = &(e->p[e->i]);
  346. if(!double_to_shortest(start, e->curr->size, &len, val)) {
  347. return 0;
  348. }
  349. e->i += len;
  350. e->count++;
  351. return 1;
  352. }
  353. static inline int
  354. enc_char(Encoder* e, char c)
  355. {
  356. if(!enc_ensure(e, 1)) {
  357. return 0;
  358. }
  359. e->p[e->i++] = c;
  360. return 1;
  361. }
  362. static int
  363. enc_shift(Encoder* e) {
  364. int i;
  365. char* shift;
  366. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  367. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  368. if(!enc_literal(e, shift + 1, *shift))
  369. return 0;
  370. // Finish the rest of this shift it's it bigger than
  371. // our largest predefined constant.
  372. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  373. if(!enc_literal(e, " ", 2))
  374. return 0;
  375. }
  376. return 1;
  377. }
  378. static inline int
  379. enc_start_object(Encoder* e)
  380. {
  381. e->count++;
  382. e->shiftcnt++;
  383. if(!enc_char(e, '{'))
  384. return 0;
  385. MAYBE_PRETTY(e);
  386. return 1;
  387. }
  388. static inline int
  389. enc_end_object(Encoder* e)
  390. {
  391. e->shiftcnt--;
  392. MAYBE_PRETTY(e);
  393. return enc_char(e, '}');
  394. }
  395. static inline int
  396. enc_start_array(Encoder* e)
  397. {
  398. e->count++;
  399. e->shiftcnt++;
  400. if(!enc_char(e, '['))
  401. return 0;
  402. MAYBE_PRETTY(e);
  403. return 1;
  404. }
  405. static inline int
  406. enc_end_array(Encoder* e)
  407. {
  408. e->shiftcnt--;
  409. MAYBE_PRETTY(e);
  410. return enc_char(e, ']');
  411. }
  412. static inline int
  413. enc_colon(Encoder* e)
  414. {
  415. if(e->pretty)
  416. return enc_literal(e, " : ", 3);
  417. return enc_char(e, ':');
  418. }
  419. static inline int
  420. enc_comma(Encoder* e)
  421. {
  422. if(!enc_char(e, ','))
  423. return 0;
  424. MAYBE_PRETTY(e);
  425. return 1;
  426. }
  427. static ERL_NIF_TERM
  428. enc_yield(Encoder* e, ERL_NIF_TERM stack)
  429. {
  430. Encoder* enc = e;
  431. if(!e->is_resource) {
  432. enc = enif_alloc_resource(e->atoms->res_encoder, sizeof(Encoder));
  433. *enc = *e;
  434. enc->is_resource = 1;
  435. }
  436. ERL_NIF_TERM val = enif_make_resource(e->env, enc);
  437. return enif_make_tuple4(e->env, e->atoms->atom_partial, val, stack, e->iolist);
  438. }
  439. ERL_NIF_TERM
  440. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  441. {
  442. Encoder enc;
  443. Encoder* e = &enc;
  444. ERL_NIF_TERM ret;
  445. ERL_NIF_TERM stack;
  446. ERL_NIF_TERM curr;
  447. ERL_NIF_TERM item;
  448. const ERL_NIF_TERM* tuple;
  449. int arity;
  450. ErlNifSInt64 lval;
  451. double dval;
  452. if(argc != 2) {
  453. return enif_make_badarg(env);
  454. }
  455. jiffy_st *priv = enif_priv_data(env);
  456. if(!enif_get_resource(env, argv[0], priv->res_encoder, (void **) &e)) {
  457. if(!enc_init(e, env, argv[1])) {
  458. return enif_make_badarg(env);
  459. }
  460. stack = enif_make_list(env, 1, argv[0]);
  461. } else {
  462. int arity;
  463. ERL_NIF_TERM* args;
  464. if(!enif_get_tuple(env, argv[1], &arity, (const ERL_NIF_TERM **) &args)) {
  465. return enif_make_badarg(env);
  466. } else if(arity != 2) {
  467. return enif_make_badarg(env);
  468. }
  469. stack = args[0];
  470. e->iolist = args[1];
  471. e->env = env;
  472. }
  473. size_t processed = e->iosize + e->i;
  474. while(!enif_is_empty_list(env, stack)) {
  475. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  476. ret = enc_error(e, "internal_error");
  477. goto done;
  478. }
  479. if(enif_is_identical(curr, e->atoms->ref_object)) {
  480. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  481. ret = enc_error(e, "internal_error");
  482. goto done;
  483. }
  484. if(enif_is_empty_list(env, curr)) {
  485. if(!enc_end_object(e)) {
  486. ret = enc_error(e, "internal_error");
  487. goto done;
  488. }
  489. continue;
  490. }
  491. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  492. ret = enc_error(e, "internal_error");
  493. goto done;
  494. }
  495. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  496. ret = enc_error(e, "invalid_object_pair");
  497. goto done;
  498. }
  499. if(arity != 2) {
  500. ret = enc_error(e, "invalid_object_pair");
  501. goto done;
  502. }
  503. if(!enc_comma(e)) {
  504. ret = enc_error(e, "internal_error");
  505. goto done;
  506. }
  507. if(!enc_string(e, tuple[0])) {
  508. ret = enc_error(e, "invalid_object_key");
  509. goto done;
  510. }
  511. if(!enc_colon(e)) {
  512. ret = enc_error(e, "internal_error");
  513. goto done;
  514. }
  515. stack = enif_make_list_cell(env, curr, stack);
  516. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  517. stack = enif_make_list_cell(env, tuple[1], stack);
  518. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  519. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  520. ret = enc_error(e, "internal_error");
  521. goto done;
  522. }
  523. if(enif_is_empty_list(env, curr)) {
  524. if(!enc_end_array(e)) {
  525. ret = enc_error(e, "internal_error");
  526. goto done;
  527. }
  528. continue;
  529. }
  530. if(!enc_comma(e)) {
  531. ret = enc_error(e, "internal_error");
  532. goto done;
  533. }
  534. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  535. ret = enc_error(e, "internal_error");
  536. goto done;
  537. }
  538. stack = enif_make_list_cell(env, curr, stack);
  539. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  540. stack = enif_make_list_cell(env, item, stack);
  541. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  542. if(!enc_literal(e, "null", 4)) {
  543. ret = enc_error(e, "null");
  544. goto done;
  545. }
  546. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  547. if(!enc_literal(e, "true", 4)) {
  548. ret = enc_error(e, "true");
  549. goto done;
  550. }
  551. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  552. if(!enc_literal(e, "false", 5)) {
  553. ret = enc_error(e, "false");
  554. goto done;
  555. }
  556. } else if(enif_is_binary(env, curr)) {
  557. if(!enc_string(e, curr)) {
  558. ret = enc_error(e, "invalid_string");
  559. goto done;
  560. }
  561. } else if(enif_is_atom(env, curr)) {
  562. if(!enc_string(e, curr)) {
  563. ret = enc_error(e, "invalid_string");
  564. goto done;
  565. }
  566. } else if(enif_get_int64(env, curr, &lval)) {
  567. if(!enc_long(e, lval)) {
  568. ret = enc_error(e, "internal_error");
  569. goto done;
  570. }
  571. } else if(enif_get_double(env, curr, &dval)) {
  572. if(!enc_double(e, dval)) {
  573. ret = enc_error(e, "internal_error");
  574. goto done;
  575. }
  576. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  577. if(arity != 1) {
  578. ret = enc_error(e, "invalid_ejson");
  579. goto done;
  580. }
  581. if(!enif_is_list(env, tuple[0])) {
  582. ret = enc_error(e, "invalid_object");
  583. goto done;
  584. }
  585. if(!enc_start_object(e)) {
  586. ret = enc_error(e, "internal_error");
  587. goto done;
  588. }
  589. if(enif_is_empty_list(env, tuple[0])) {
  590. if(!enc_end_object(e)) {
  591. ret = enc_error(e, "internal_error");
  592. goto done;
  593. }
  594. continue;
  595. }
  596. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  597. ret = enc_error(e, "internal_error");
  598. goto done;
  599. }
  600. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  601. ret = enc_error(e, "invalid_object_member");
  602. goto done;
  603. }
  604. if(arity != 2) {
  605. ret = enc_error(e, "invalid_object_member_arity");
  606. goto done;
  607. }
  608. if(!enc_string(e, tuple[0])) {
  609. ret = enc_error(e, "invalid_object_member_key");
  610. goto done;
  611. }
  612. if(!enc_colon(e)) {
  613. ret = enc_error(e, "internal_error");
  614. goto done;
  615. }
  616. stack = enif_make_list_cell(env, curr, stack);
  617. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  618. stack = enif_make_list_cell(env, tuple[1], stack);
  619. } else if(enif_is_list(env, curr)) {
  620. if(!enc_start_array(e)) {
  621. ret = enc_error(e, "internal_error");
  622. goto done;
  623. }
  624. if(enif_is_empty_list(env, curr)) {
  625. if(!enc_end_array(e)) {
  626. ret = enc_error(e, "internal_error");
  627. goto done;
  628. }
  629. continue;
  630. }
  631. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  632. ret = enc_error(e, "internal_error");
  633. goto done;
  634. }
  635. stack = enif_make_list_cell(env, curr, stack);
  636. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  637. stack = enif_make_list_cell(env, item, stack);
  638. } else {
  639. if(!enc_unknown(e, curr)) {
  640. ret = enc_error(e, "internal_error");
  641. goto done;
  642. }
  643. }
  644. if(jiffy_consume_timeslice(env, e->reds, e->iosize + e->i, &processed)) {
  645. return enc_yield(e, stack);
  646. }
  647. }
  648. if(!enc_done(e, &item)) {
  649. ret = enc_error(e, "internal_error");
  650. goto done;
  651. }
  652. if(e->iolen == 0) {
  653. ret = item;
  654. } else {
  655. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  656. }
  657. done:
  658. jiffy_consume_timeslice(env, e->reds, e->i, &processed);
  659. enc_destroy(env, e);
  660. return ret;
  661. }