Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

789 рядки
19 KiB

13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  10. #define MAYBE_PRETTY(e) \
  11. do { \
  12. if(e->pretty) { \
  13. if(!enc_shift(e)) \
  14. return 0; \
  15. } \
  16. } while(0)
  17. #if WINDOWS || WIN32
  18. #define inline __inline
  19. #define snprintf _snprintf
  20. #endif
  21. typedef struct {
  22. ErlNifEnv* env;
  23. jiffy_st* atoms;
  24. size_t bytes_per_iter;
  25. int uescape;
  26. int pretty;
  27. int shiftcnt;
  28. int count;
  29. size_t iolen;
  30. size_t iosize;
  31. ERL_NIF_TERM iolist;
  32. ErlNifBinary bin;
  33. ErlNifBinary* curr;
  34. char* p;
  35. unsigned char* u;
  36. size_t i;
  37. } Encoder;
  38. // String constants for pretty printing.
  39. // Every string starts with its length.
  40. #define NUM_SHIFTS 8
  41. static char* shifts[NUM_SHIFTS] = {
  42. "\x01\n",
  43. "\x03\n ",
  44. "\x05\n ",
  45. "\x07\n ",
  46. "\x09\n ",
  47. "\x0b\n ",
  48. "\x0d\n ",
  49. "\x0f\n "
  50. };
  51. Encoder*
  52. enc_new(ErlNifEnv* env)
  53. {
  54. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  55. Encoder* e = enif_alloc_resource(st->res_enc, sizeof(Encoder));
  56. e->atoms = st;
  57. e->bytes_per_iter = DEFAULT_BYTES_PER_ITER;
  58. e->uescape = 0;
  59. e->pretty = 0;
  60. e->shiftcnt = 0;
  61. e->count = 0;
  62. e->iolen = 0;
  63. e->iosize = 0;
  64. e->curr = &(e->bin);
  65. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  66. e->curr = NULL;
  67. enif_release_resource(e);
  68. return NULL;
  69. }
  70. memset(e->curr->data, 0, e->curr->size);
  71. e->p = (char*) e->curr->data;
  72. e->u = (unsigned char*) e->curr->data;
  73. e->i = 0;
  74. return e;
  75. }
  76. int
  77. enc_init(Encoder* e, ErlNifEnv* env)
  78. {
  79. e->env = env;
  80. return 1;
  81. }
  82. void
  83. enc_destroy(ErlNifEnv* env, void* obj)
  84. {
  85. Encoder* e = (Encoder*) obj;
  86. if(e->curr != NULL) {
  87. enif_release_binary(e->curr);
  88. }
  89. }
  90. ERL_NIF_TERM
  91. enc_error(Encoder* e, const char* msg)
  92. {
  93. //assert(0 && msg);
  94. return make_error(e->atoms, e->env, msg);
  95. }
  96. static inline int
  97. enc_ensure(Encoder* e, size_t req)
  98. {
  99. size_t need = e->curr->size;
  100. while(req >= (need - e->i)) need <<= 1;
  101. if(need != e->curr->size) {
  102. if(!enif_realloc_binary(e->curr, need)) {
  103. return 0;
  104. }
  105. e->p = (char*) e->curr->data;
  106. e->u = (unsigned char*) e->curr->data;
  107. }
  108. return 1;
  109. }
  110. int
  111. enc_result(Encoder* e, ERL_NIF_TERM* value)
  112. {
  113. if(e->i != e->curr->size) {
  114. if(!enif_realloc_binary(e->curr, e->i)) {
  115. return 0;
  116. }
  117. }
  118. *value = enif_make_binary(e->env, e->curr);
  119. e->curr = NULL;
  120. return 1;
  121. }
  122. int
  123. enc_done(Encoder* e, ERL_NIF_TERM* value)
  124. {
  125. ERL_NIF_TERM last;
  126. if(e->iolen == 0) {
  127. return enc_result(e, value);
  128. }
  129. if(e->i > 0 ) {
  130. if(!enc_result(e, &last)) {
  131. return 0;
  132. }
  133. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  134. e->iolen++;
  135. }
  136. *value = e->iolist;
  137. return 1;
  138. }
  139. static inline int
  140. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  141. {
  142. ErlNifBinary* bin = e->curr;
  143. ERL_NIF_TERM curr;
  144. if(e->i > 0) {
  145. if(!enc_result(e, &curr)) {
  146. return 0;
  147. }
  148. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  149. e->iolen++;
  150. }
  151. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  152. e->iolen++;
  153. // Track the total number of bytes produced before
  154. // splitting our IO buffer. We add 16 to this value
  155. // as a rough estimate of the number of bytes that
  156. // a bignum might produce when encoded.
  157. e->iosize += e->i + 16;
  158. // Reinitialize our binary for the next buffer.
  159. e->curr = bin;
  160. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  161. return 0;
  162. }
  163. memset(e->curr->data, 0, e->curr->size);
  164. e->p = (char*) e->curr->data;
  165. e->u = (unsigned char*) e->curr->data;
  166. e->i = 0;
  167. return 1;
  168. }
  169. static inline int
  170. enc_literal(Encoder* e, const char* literal, size_t len)
  171. {
  172. if(!enc_ensure(e, len)) {
  173. return 0;
  174. }
  175. memcpy(&(e->p[e->i]), literal, len);
  176. e->i += len;
  177. e->count++;
  178. return 1;
  179. }
  180. static inline int
  181. enc_string(Encoder* e, ERL_NIF_TERM val)
  182. {
  183. ErlNifBinary bin;
  184. char atom[512];
  185. unsigned char* data;
  186. size_t size;
  187. int esc_extra = 0;
  188. int ulen;
  189. int uval;
  190. int i;
  191. if(enif_is_binary(e->env, val)) {
  192. if(!enif_inspect_binary(e->env, val, &bin)) {
  193. return 0;
  194. }
  195. data = bin.data;
  196. size = bin.size;
  197. } else if(enif_is_atom(e->env, val)) {
  198. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  199. return 0;
  200. }
  201. data = (unsigned char*) atom;
  202. size = strlen(atom);
  203. } else {
  204. return 0;
  205. }
  206. i = 0;
  207. while(i < size) {
  208. switch((char) data[i]) {
  209. case '\"':
  210. case '\\':
  211. case '\b':
  212. case '\f':
  213. case '\n':
  214. case '\r':
  215. case '\t':
  216. esc_extra += 1;
  217. i++;
  218. continue;
  219. default:
  220. if(data[i] < 0x20) {
  221. esc_extra += 5;
  222. i++;
  223. continue;
  224. } else if(data[i] < 0x80) {
  225. i++;
  226. continue;
  227. }
  228. ulen = utf8_validate(&(data[i]), size - i);
  229. if(ulen < 0) {
  230. return 0;
  231. }
  232. if(e->uescape) {
  233. uval = utf8_to_unicode(&(data[i]), ulen);
  234. if(uval < 0) {
  235. return 0;
  236. }
  237. esc_extra += utf8_esc_len(uval);
  238. if(ulen < 0) {
  239. return 0;
  240. }
  241. }
  242. i += ulen;
  243. }
  244. }
  245. if(!enc_ensure(e, size + esc_extra + 2)) {
  246. return 0;
  247. }
  248. e->p[e->i++] = '\"';
  249. i = 0;
  250. while(i < size) {
  251. switch((char) data[i]) {
  252. case '\"':
  253. case '\\':
  254. e->p[e->i++] = '\\';
  255. e->u[e->i++] = data[i];
  256. i++;
  257. continue;
  258. case '\b':
  259. e->p[e->i++] = '\\';
  260. e->p[e->i++] = 'b';
  261. i++;
  262. continue;
  263. case '\f':
  264. e->p[e->i++] = '\\';
  265. e->p[e->i++] = 'f';
  266. i++;
  267. continue;
  268. case '\n':
  269. e->p[e->i++] = '\\';
  270. e->p[e->i++] = 'n';
  271. i++;
  272. continue;
  273. case '\r':
  274. e->p[e->i++] = '\\';
  275. e->p[e->i++] = 'r';
  276. i++;
  277. continue;
  278. case '\t':
  279. e->p[e->i++] = '\\';
  280. e->p[e->i++] = 't';
  281. i++;
  282. continue;
  283. default:
  284. if(data[i] < 0x20) {
  285. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  286. if(ulen < 0) {
  287. return 0;
  288. }
  289. e->i += ulen;
  290. i++;
  291. } else if((data[i] & 0x80) && e->uescape) {
  292. uval = utf8_to_unicode(&(data[i]), size-i);
  293. if(uval < 0) {
  294. return 0;
  295. }
  296. ulen = unicode_uescape(uval, &(e->p[e->i]));
  297. if(ulen < 0) {
  298. return 0;
  299. }
  300. e->i += ulen;
  301. ulen = utf8_len(uval);
  302. if(ulen < 0) {
  303. return 0;
  304. }
  305. i += ulen;
  306. } else {
  307. e->u[e->i++] = data[i++];
  308. }
  309. }
  310. }
  311. e->p[e->i++] = '\"';
  312. e->count++;
  313. return 1;
  314. }
  315. static inline int
  316. enc_long(Encoder* e, ErlNifSInt64 val)
  317. {
  318. if(!enc_ensure(e, 32)) {
  319. return 0;
  320. }
  321. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  322. snprintf(&(e->p[e->i]), 32, "%ld", val);
  323. #elif SIZEOF_LONG == 8
  324. snprintf(&(e->p[e->i]), 32, "%ld", val);
  325. #else
  326. snprintf(&(e->p[e->i]), 32, "%lld", val);
  327. #endif
  328. e->i += strlen(&(e->p[e->i]));
  329. e->count++;
  330. return 1;
  331. }
  332. static inline int
  333. enc_double(Encoder* e, double val)
  334. {
  335. char* start;
  336. size_t len;
  337. if(!enc_ensure(e, 32)) {
  338. return 0;
  339. }
  340. start = &(e->p[e->i]);
  341. if(!double_to_shortest(start, e->curr->size, &len, val)) {
  342. return 0;
  343. }
  344. e->i += len;
  345. e->count++;
  346. return 1;
  347. }
  348. static inline int
  349. enc_char(Encoder* e, char c)
  350. {
  351. if(!enc_ensure(e, 1)) {
  352. return 0;
  353. }
  354. e->p[e->i++] = c;
  355. return 1;
  356. }
  357. static int
  358. enc_shift(Encoder* e) {
  359. int i;
  360. char* shift;
  361. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  362. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  363. if(!enc_literal(e, shift + 1, *shift))
  364. return 0;
  365. // Finish the rest of this shift it's it bigger than
  366. // our largest predefined constant.
  367. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  368. if(!enc_literal(e, " ", 2))
  369. return 0;
  370. }
  371. return 1;
  372. }
  373. static inline int
  374. enc_start_object(Encoder* e)
  375. {
  376. e->count++;
  377. e->shiftcnt++;
  378. if(!enc_char(e, '{'))
  379. return 0;
  380. MAYBE_PRETTY(e);
  381. return 1;
  382. }
  383. static inline int
  384. enc_end_object(Encoder* e)
  385. {
  386. e->shiftcnt--;
  387. MAYBE_PRETTY(e);
  388. return enc_char(e, '}');
  389. }
  390. static inline int
  391. enc_start_array(Encoder* e)
  392. {
  393. e->count++;
  394. e->shiftcnt++;
  395. if(!enc_char(e, '['))
  396. return 0;
  397. MAYBE_PRETTY(e);
  398. return 1;
  399. }
  400. static inline int
  401. enc_end_array(Encoder* e)
  402. {
  403. e->shiftcnt--;
  404. MAYBE_PRETTY(e);
  405. return enc_char(e, ']');
  406. }
  407. static inline int
  408. enc_colon(Encoder* e)
  409. {
  410. if(e->pretty)
  411. return enc_literal(e, " : ", 3);
  412. return enc_char(e, ':');
  413. }
  414. static inline int
  415. enc_comma(Encoder* e)
  416. {
  417. if(!enc_char(e, ','))
  418. return 0;
  419. MAYBE_PRETTY(e);
  420. return 1;
  421. }
  422. ERL_NIF_TERM
  423. encode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  424. {
  425. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  426. Encoder* e;
  427. ERL_NIF_TERM opts;
  428. ERL_NIF_TERM val;
  429. ERL_NIF_TERM tmp_argv[3];
  430. if(argc != 2) {
  431. return enif_make_badarg(env);
  432. }
  433. e = enc_new(env);
  434. if(e == NULL) {
  435. return make_error(st, env, "internal_error");
  436. }
  437. tmp_argv[0] = enif_make_resource(env, e);
  438. tmp_argv[1] = enif_make_list(env, 1, argv[0]);
  439. tmp_argv[2] = enif_make_list(env, 0);
  440. enif_release_resource(e);
  441. opts = argv[1];
  442. if(!enif_is_list(env, opts)) {
  443. return enif_make_badarg(env);
  444. }
  445. while(enif_get_list_cell(env, opts, &val, &opts)) {
  446. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  447. e->uescape = 1;
  448. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  449. e->pretty = 1;
  450. } else if(enif_compare(val, e->atoms->atom_force_utf8) == 0) {
  451. // Ignore, handled in Erlang
  452. } else if(get_bytes_per_iter(env, val, &(e->bytes_per_iter))) {
  453. continue;
  454. } else {
  455. return enif_make_badarg(env);
  456. }
  457. }
  458. return encode_iter(env, 3, tmp_argv);
  459. }
  460. ERL_NIF_TERM
  461. encode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  462. {
  463. Encoder* e;
  464. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  465. ERL_NIF_TERM ret = 0;
  466. ERL_NIF_TERM stack;
  467. ERL_NIF_TERM curr;
  468. ERL_NIF_TERM item;
  469. const ERL_NIF_TERM* tuple;
  470. int arity;
  471. ErlNifSInt64 lval;
  472. double dval;
  473. size_t start;
  474. size_t processed;
  475. if(argc != 3) {
  476. return enif_make_badarg(env);
  477. } else if(!enif_get_resource(env, argv[0], st->res_enc, (void**) &e)) {
  478. return enif_make_badarg(env);
  479. } else if(!enif_is_list(env, argv[1])) {
  480. return enif_make_badarg(env);
  481. } else if(!enif_is_list(env, argv[2])) {
  482. return enif_make_badarg(env);
  483. }
  484. if(!enc_init(e, env)) {
  485. return enif_make_badarg(env);
  486. }
  487. stack = argv[1];
  488. e->iolist = argv[2];
  489. start = e->iosize + e->i;
  490. while(!enif_is_empty_list(env, stack)) {
  491. processed = (e->iosize + e->i) - start;
  492. if(should_yield(processed, e->bytes_per_iter)) {
  493. consume_timeslice(env, processed, e->bytes_per_iter);
  494. return enif_make_tuple4(
  495. env,
  496. st->atom_iter,
  497. argv[0],
  498. stack,
  499. e->iolist
  500. );
  501. }
  502. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  503. ret = enc_error(e, "internal_error");
  504. goto done;
  505. }
  506. if(enif_is_identical(curr, e->atoms->ref_object)) {
  507. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  508. ret = enc_error(e, "internal_error");
  509. goto done;
  510. }
  511. if(enif_is_empty_list(env, curr)) {
  512. if(!enc_end_object(e)) {
  513. ret = enc_error(e, "internal_error");
  514. goto done;
  515. }
  516. continue;
  517. }
  518. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  519. ret = enc_error(e, "internal_error");
  520. goto done;
  521. }
  522. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  523. ret = enc_error(e, "invalid_object_pair");
  524. goto done;
  525. }
  526. if(arity != 2) {
  527. ret = enc_error(e, "invalid_object_pair");
  528. goto done;
  529. }
  530. if(!enc_comma(e)) {
  531. ret = enc_error(e, "internal_error");
  532. goto done;
  533. }
  534. if(!enc_string(e, tuple[0])) {
  535. ret = enc_error(e, "invalid_object_key");
  536. goto done;
  537. }
  538. if(!enc_colon(e)) {
  539. ret = enc_error(e, "internal_error");
  540. goto done;
  541. }
  542. stack = enif_make_list_cell(env, curr, stack);
  543. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  544. stack = enif_make_list_cell(env, tuple[1], stack);
  545. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  546. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  547. ret = enc_error(e, "internal_error");
  548. goto done;
  549. }
  550. if(enif_is_empty_list(env, curr)) {
  551. if(!enc_end_array(e)) {
  552. ret = enc_error(e, "internal_error");
  553. goto done;
  554. }
  555. continue;
  556. }
  557. if(!enc_comma(e)) {
  558. ret = enc_error(e, "internal_error");
  559. goto done;
  560. }
  561. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  562. ret = enc_error(e, "internal_error");
  563. goto done;
  564. }
  565. stack = enif_make_list_cell(env, curr, stack);
  566. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  567. stack = enif_make_list_cell(env, item, stack);
  568. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  569. if(!enc_literal(e, "null", 4)) {
  570. ret = enc_error(e, "null");
  571. goto done;
  572. }
  573. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  574. if(!enc_literal(e, "true", 4)) {
  575. ret = enc_error(e, "true");
  576. goto done;
  577. }
  578. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  579. if(!enc_literal(e, "false", 5)) {
  580. ret = enc_error(e, "false");
  581. goto done;
  582. }
  583. } else if(enif_is_binary(env, curr)) {
  584. if(!enc_string(e, curr)) {
  585. ret = enc_error(e, "invalid_string");
  586. goto done;
  587. }
  588. } else if(enif_is_atom(env, curr)) {
  589. if(!enc_string(e, curr)) {
  590. ret = enc_error(e, "invalid_string");
  591. goto done;
  592. }
  593. } else if(enif_get_int64(env, curr, &lval)) {
  594. if(!enc_long(e, lval)) {
  595. ret = enc_error(e, "internal_error");
  596. goto done;
  597. }
  598. } else if(enif_get_double(env, curr, &dval)) {
  599. if(!enc_double(e, dval)) {
  600. ret = enc_error(e, "internal_error");
  601. goto done;
  602. }
  603. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  604. if(arity != 1) {
  605. ret = enc_error(e, "invalid_ejson");
  606. goto done;
  607. }
  608. if(!enif_is_list(env, tuple[0])) {
  609. ret = enc_error(e, "invalid_object");
  610. goto done;
  611. }
  612. if(!enc_start_object(e)) {
  613. ret = enc_error(e, "internal_error");
  614. goto done;
  615. }
  616. if(enif_is_empty_list(env, tuple[0])) {
  617. if(!enc_end_object(e)) {
  618. ret = enc_error(e, "internal_error");
  619. goto done;
  620. }
  621. continue;
  622. }
  623. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  624. ret = enc_error(e, "internal_error");
  625. goto done;
  626. }
  627. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  628. ret = enc_error(e, "invalid_object_member");
  629. goto done;
  630. }
  631. if(arity != 2) {
  632. ret = enc_error(e, "invalid_object_member_arity");
  633. goto done;
  634. }
  635. if(!enc_string(e, tuple[0])) {
  636. ret = enc_error(e, "invalid_object_member_key");
  637. goto done;
  638. }
  639. if(!enc_colon(e)) {
  640. ret = enc_error(e, "internal_error");
  641. goto done;
  642. }
  643. stack = enif_make_list_cell(env, curr, stack);
  644. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  645. stack = enif_make_list_cell(env, tuple[1], stack);
  646. } else if(enif_is_list(env, curr)) {
  647. if(!enc_start_array(e)) {
  648. ret = enc_error(e, "internal_error");
  649. goto done;
  650. }
  651. if(enif_is_empty_list(env, curr)) {
  652. if(!enc_end_array(e)) {
  653. ret = enc_error(e, "internal_error");
  654. goto done;
  655. }
  656. continue;
  657. }
  658. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  659. ret = enc_error(e, "internal_error");
  660. goto done;
  661. }
  662. stack = enif_make_list_cell(env, curr, stack);
  663. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  664. stack = enif_make_list_cell(env, item, stack);
  665. } else {
  666. if(!enc_unknown(e, curr)) {
  667. ret = enc_error(e, "internal_error");
  668. goto done;
  669. }
  670. }
  671. }
  672. if(!enc_done(e, &item)) {
  673. ret = enc_error(e, "internal_error");
  674. goto done;
  675. }
  676. if(e->iolen == 0) {
  677. ret = item;
  678. } else {
  679. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  680. }
  681. done:
  682. processed = (e->iosize + e->i) - start;
  683. consume_timeslice(env, processed, e->bytes_per_iter);
  684. return ret;
  685. }