Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

732 рядки
18 KiB

13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
13 роки тому
  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include <float.h>
  7. #include "erl_nif.h"
  8. #include "jiffy.h"
  9. #define BIN_INC_SIZE 2048
  10. #define FLOAT_BUFLEN (LDBL_DIG*2)
  11. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  12. #define MAYBE_PRETTY(e) \
  13. do { \
  14. if(e->pretty) { \
  15. if(!enc_shift(e)) \
  16. return 0; \
  17. } \
  18. } while(0)
  19. #if WINDOWS || WIN32
  20. #define inline __inline
  21. #define snprintf _snprintf
  22. #endif
  23. typedef struct {
  24. ErlNifEnv* env;
  25. jiffy_st* atoms;
  26. int uescape;
  27. int pretty;
  28. int shiftcnt;
  29. int count;
  30. int iolen;
  31. ERL_NIF_TERM iolist;
  32. ErlNifBinary* curr;
  33. char* p;
  34. unsigned char* u;
  35. size_t i;
  36. } Encoder;
  37. // String constants for pretty printing.
  38. // Every string starts with its length.
  39. #define NUM_SHIFTS 8
  40. static char* shifts[NUM_SHIFTS] = {
  41. "\x01\n",
  42. "\x03\n ",
  43. "\x05\n ",
  44. "\x07\n ",
  45. "\x09\n ",
  46. "\x0b\n ",
  47. "\x0d\n ",
  48. "\x0f\n "
  49. };
  50. int
  51. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin)
  52. {
  53. ERL_NIF_TERM val;
  54. e->env = env;
  55. e->atoms = enif_priv_data(env);
  56. e->uescape = 0;
  57. e->pretty = 0;
  58. e->shiftcnt = 0;
  59. e->count = 0;
  60. if(!enif_is_list(env, opts)) {
  61. return 0;
  62. }
  63. while(enif_get_list_cell(env, opts, &val, &opts)) {
  64. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  65. e->uescape = 1;
  66. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  67. e->pretty = 1;
  68. } else if(enif_compare(val, e->atoms->atom_force_utf8) == 0) {
  69. // Ignore, handled in Erlang
  70. } else {
  71. return 0;
  72. }
  73. }
  74. e->iolen = 0;
  75. e->iolist = enif_make_list(env, 0);
  76. e->curr = bin;
  77. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  78. return 0;
  79. }
  80. memset(e->curr->data, 0, e->curr->size);
  81. e->p = (char*) e->curr->data;
  82. e->u = (unsigned char*) e->curr->data;
  83. e->i = 0;
  84. return 1;
  85. }
  86. void
  87. enc_destroy(Encoder* e)
  88. {
  89. if(e->curr != NULL) {
  90. enif_release_binary(e->curr);
  91. }
  92. }
  93. ERL_NIF_TERM
  94. enc_error(Encoder* e, const char* msg)
  95. {
  96. //assert(0 && msg);
  97. return make_error(e->atoms, e->env, msg);
  98. }
  99. static inline int
  100. enc_ensure(Encoder* e, size_t req)
  101. {
  102. size_t need = e->curr->size;
  103. while(req >= (need - e->i)) need <<= 1;
  104. if(need != e->curr->size) {
  105. if(!enif_realloc_binary(e->curr, need)) {
  106. return 0;
  107. }
  108. e->p = (char*) e->curr->data;
  109. e->u = (unsigned char*) e->curr->data;
  110. }
  111. return 1;
  112. }
  113. int
  114. enc_result(Encoder* e, ERL_NIF_TERM* value)
  115. {
  116. if(e->i != e->curr->size) {
  117. if(!enif_realloc_binary(e->curr, e->i)) {
  118. return 0;
  119. }
  120. }
  121. *value = enif_make_binary(e->env, e->curr);
  122. e->curr = NULL;
  123. return 1;
  124. }
  125. int
  126. enc_done(Encoder* e, ERL_NIF_TERM* value)
  127. {
  128. ERL_NIF_TERM last;
  129. if(e->iolen == 0) {
  130. return enc_result(e, value);
  131. }
  132. if(e->i > 0 ) {
  133. if(!enc_result(e, &last)) {
  134. return 0;
  135. }
  136. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  137. e->iolen++;
  138. }
  139. *value = e->iolist;
  140. return 1;
  141. }
  142. static inline int
  143. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  144. {
  145. ErlNifBinary* bin = e->curr;
  146. ERL_NIF_TERM curr;
  147. if(e->i > 0) {
  148. if(!enc_result(e, &curr)) {
  149. return 0;
  150. }
  151. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  152. e->iolen++;
  153. }
  154. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  155. e->iolen++;
  156. // Reinitialize our binary for the next buffer.
  157. e->curr = bin;
  158. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  159. return 0;
  160. }
  161. memset(e->curr->data, 0, e->curr->size);
  162. e->p = (char*) e->curr->data;
  163. e->u = (unsigned char*) e->curr->data;
  164. e->i = 0;
  165. return 1;
  166. }
  167. static inline int
  168. enc_literal(Encoder* e, const char* literal, size_t len)
  169. {
  170. if(!enc_ensure(e, len)) {
  171. return 0;
  172. }
  173. memcpy(&(e->p[e->i]), literal, len);
  174. e->i += len;
  175. e->count++;
  176. return 1;
  177. }
  178. static inline int
  179. enc_string(Encoder* e, ERL_NIF_TERM val)
  180. {
  181. ErlNifBinary bin;
  182. char atom[512];
  183. unsigned char* data;
  184. size_t size;
  185. int esc_extra = 0;
  186. int ulen;
  187. int uval;
  188. int i;
  189. if(enif_is_binary(e->env, val)) {
  190. if(!enif_inspect_binary(e->env, val, &bin)) {
  191. return 0;
  192. }
  193. data = bin.data;
  194. size = bin.size;
  195. } else if(enif_is_atom(e->env, val)) {
  196. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  197. return 0;
  198. }
  199. data = (unsigned char*) atom;
  200. size = strlen(atom);
  201. } else {
  202. return 0;
  203. }
  204. i = 0;
  205. while(i < size) {
  206. switch((char) data[i]) {
  207. case '\"':
  208. case '\\':
  209. case '/':
  210. case '\b':
  211. case '\f':
  212. case '\n':
  213. case '\r':
  214. case '\t':
  215. esc_extra += 1;
  216. i++;
  217. continue;
  218. default:
  219. if(data[i] < 0x20) {
  220. esc_extra += 5;
  221. i++;
  222. continue;
  223. } else if(data[i] < 0x80) {
  224. i++;
  225. continue;
  226. }
  227. ulen = utf8_validate(&(data[i]), size - i);
  228. if(ulen < 0) {
  229. return 0;
  230. }
  231. if(e->uescape) {
  232. uval = utf8_to_unicode(&(data[i]), ulen);
  233. if(uval < 0) {
  234. return 0;
  235. }
  236. esc_extra += utf8_esc_len(uval);
  237. if(ulen < 0) {
  238. return 0;
  239. }
  240. }
  241. i += ulen;
  242. }
  243. }
  244. if(!enc_ensure(e, size + esc_extra + 2)) {
  245. return 0;
  246. }
  247. e->p[e->i++] = '\"';
  248. i = 0;
  249. while(i < size) {
  250. switch((char) data[i]) {
  251. case '\"':
  252. case '\\':
  253. case '/':
  254. e->p[e->i++] = '\\';
  255. e->u[e->i++] = data[i];
  256. i++;
  257. continue;
  258. case '\b':
  259. e->p[e->i++] = '\\';
  260. e->p[e->i++] = 'b';
  261. i++;
  262. continue;
  263. case '\f':
  264. e->p[e->i++] = '\\';
  265. e->p[e->i++] = 'f';
  266. i++;
  267. continue;
  268. case '\n':
  269. e->p[e->i++] = '\\';
  270. e->p[e->i++] = 'n';
  271. i++;
  272. continue;
  273. case '\r':
  274. e->p[e->i++] = '\\';
  275. e->p[e->i++] = 'r';
  276. i++;
  277. continue;
  278. case '\t':
  279. e->p[e->i++] = '\\';
  280. e->p[e->i++] = 't';
  281. i++;
  282. continue;
  283. default:
  284. if(data[i] < 0x20) {
  285. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  286. if(ulen < 0) {
  287. return 0;
  288. }
  289. e->i += ulen;
  290. i++;
  291. } else if((data[i] & 0x80) && e->uescape) {
  292. uval = utf8_to_unicode(&(data[i]), size-i);
  293. if(uval < 0) {
  294. return 0;
  295. }
  296. ulen = unicode_uescape(uval, &(e->p[e->i]));
  297. if(ulen < 0) {
  298. return 0;
  299. }
  300. e->i += ulen;
  301. ulen = utf8_len(uval);
  302. if(ulen < 0) {
  303. return 0;
  304. }
  305. i += ulen;
  306. } else {
  307. e->u[e->i++] = data[i++];
  308. }
  309. }
  310. }
  311. e->p[e->i++] = '\"';
  312. e->count++;
  313. return 1;
  314. }
  315. static inline int
  316. enc_long(Encoder* e, ErlNifSInt64 val)
  317. {
  318. if(!enc_ensure(e, 32)) {
  319. return 0;
  320. }
  321. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  322. snprintf(&(e->p[e->i]), 32, "%ld", val);
  323. #elif SIZEOF_LONG == 8
  324. snprintf(&(e->p[e->i]), 32, "%ld", val);
  325. #else
  326. snprintf(&(e->p[e->i]), 32, "%lld", val);
  327. #endif
  328. e->i += strlen(&(e->p[e->i]));
  329. e->count++;
  330. return 1;
  331. }
  332. static inline int
  333. enc_double(Encoder* e, double val)
  334. {
  335. char* start;
  336. size_t len;
  337. size_t i;
  338. if(!enc_ensure(e, FLOAT_BUFLEN)) {
  339. return 0;
  340. }
  341. start = &(e->p[e->i]);
  342. // try to encode doubles using the fewest digits possible...
  343. if (snprintf(start, FLOAT_BUFLEN, "%.*g", DBL_DIG, val) > FLT_DIG)
  344. {
  345. // ...fall back to full expansion to be safe
  346. snprintf(start, FLOAT_BUFLEN, "%.*g", LDBL_DIG, val);
  347. }
  348. len = strlen(start);
  349. // Check if we have a decimal point
  350. for(i = 0; i < len; i++) {
  351. if(start[i] == '.' || start[i] == 'e' || start[i] == 'E')
  352. goto done;
  353. }
  354. if(len >= FLOAT_BUFLEN-2) return 0;
  355. // Force a decimal point
  356. start[len++] = '.';
  357. start[len++] = '0';
  358. done:
  359. e->i += len;
  360. e->count++;
  361. return 1;
  362. }
  363. static inline int
  364. enc_char(Encoder* e, char c)
  365. {
  366. if(!enc_ensure(e, 1)) {
  367. return 0;
  368. }
  369. e->p[e->i++] = c;
  370. return 1;
  371. }
  372. static int
  373. enc_shift(Encoder* e) {
  374. int i;
  375. char* shift;
  376. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  377. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  378. if(!enc_literal(e, shift + 1, *shift))
  379. return 0;
  380. // Finish the rest of this shift it's it bigger than
  381. // our largest predefined constant.
  382. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  383. if(!enc_literal(e, " ", 2))
  384. return 0;
  385. }
  386. return 1;
  387. }
  388. static inline int
  389. enc_start_object(Encoder* e)
  390. {
  391. e->count++;
  392. e->shiftcnt++;
  393. if(!enc_char(e, '{'))
  394. return 0;
  395. MAYBE_PRETTY(e);
  396. return 1;
  397. }
  398. static inline int
  399. enc_end_object(Encoder* e)
  400. {
  401. e->shiftcnt--;
  402. MAYBE_PRETTY(e);
  403. return enc_char(e, '}');
  404. }
  405. static inline int
  406. enc_start_array(Encoder* e)
  407. {
  408. e->count++;
  409. e->shiftcnt++;
  410. if(!enc_char(e, '['))
  411. return 0;
  412. MAYBE_PRETTY(e);
  413. return 1;
  414. }
  415. static inline int
  416. enc_end_array(Encoder* e)
  417. {
  418. e->shiftcnt--;
  419. MAYBE_PRETTY(e);
  420. return enc_char(e, ']');
  421. }
  422. static inline int
  423. enc_colon(Encoder* e)
  424. {
  425. if(e->pretty)
  426. return enc_literal(e, " : ", 3);
  427. return enc_char(e, ':');
  428. }
  429. static inline int
  430. enc_comma(Encoder* e)
  431. {
  432. if(!enc_char(e, ','))
  433. return 0;
  434. MAYBE_PRETTY(e);
  435. return 1;
  436. }
  437. ERL_NIF_TERM
  438. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  439. {
  440. Encoder enc;
  441. Encoder* e = &enc;
  442. ErlNifBinary bin;
  443. ERL_NIF_TERM ret;
  444. ERL_NIF_TERM stack;
  445. ERL_NIF_TERM curr;
  446. ERL_NIF_TERM item;
  447. const ERL_NIF_TERM* tuple;
  448. int arity;
  449. ErlNifSInt64 lval;
  450. double dval;
  451. if(argc != 2) {
  452. return enif_make_badarg(env);
  453. }
  454. if(!enc_init(e, env, argv[1], &bin)) {
  455. return enif_make_badarg(env);
  456. }
  457. stack = enif_make_list(env, 1, argv[0]);
  458. while(!enif_is_empty_list(env, stack)) {
  459. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  460. ret = enc_error(e, "internal_error");
  461. goto done;
  462. }
  463. if(enif_is_identical(curr, e->atoms->ref_object)) {
  464. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  465. ret = enc_error(e, "internal_error");
  466. goto done;
  467. }
  468. if(enif_is_empty_list(env, curr)) {
  469. if(!enc_end_object(e)) {
  470. ret = enc_error(e, "internal_error");
  471. goto done;
  472. }
  473. continue;
  474. }
  475. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  476. ret = enc_error(e, "internal_error");
  477. goto done;
  478. }
  479. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  480. ret = enc_error(e, "invalid_object_pair");
  481. goto done;
  482. }
  483. if(arity != 2) {
  484. ret = enc_error(e, "invalid_object_pair");
  485. goto done;
  486. }
  487. if(!enc_comma(e)) {
  488. ret = enc_error(e, "internal_error");
  489. goto done;
  490. }
  491. if(!enc_string(e, tuple[0])) {
  492. ret = enc_error(e, "invalid_object_key");
  493. goto done;
  494. }
  495. if(!enc_colon(e)) {
  496. ret = enc_error(e, "internal_error");
  497. goto done;
  498. }
  499. stack = enif_make_list_cell(env, curr, stack);
  500. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  501. stack = enif_make_list_cell(env, tuple[1], stack);
  502. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  503. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  504. ret = enc_error(e, "internal_error");
  505. goto done;
  506. }
  507. if(enif_is_empty_list(env, curr)) {
  508. if(!enc_end_array(e)) {
  509. ret = enc_error(e, "internal_error");
  510. goto done;
  511. }
  512. continue;
  513. }
  514. if(!enc_comma(e)) {
  515. ret = enc_error(e, "internal_error");
  516. goto done;
  517. }
  518. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  519. ret = enc_error(e, "internal_error");
  520. goto done;
  521. }
  522. stack = enif_make_list_cell(env, curr, stack);
  523. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  524. stack = enif_make_list_cell(env, item, stack);
  525. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  526. if(!enc_literal(e, "null", 4)) {
  527. ret = enc_error(e, "null");
  528. goto done;
  529. }
  530. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  531. if(!enc_literal(e, "true", 4)) {
  532. ret = enc_error(e, "true");
  533. goto done;
  534. }
  535. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  536. if(!enc_literal(e, "false", 5)) {
  537. ret = enc_error(e, "false");
  538. goto done;
  539. }
  540. } else if(enif_is_binary(env, curr)) {
  541. if(!enc_string(e, curr)) {
  542. ret = enc_error(e, "invalid_string");
  543. goto done;
  544. }
  545. } else if(enif_is_atom(env, curr)) {
  546. if(!enc_string(e, curr)) {
  547. ret = enc_error(e, "invalid_string");
  548. goto done;
  549. }
  550. } else if(enif_get_int64(env, curr, &lval)) {
  551. if(!enc_long(e, lval)) {
  552. ret = enc_error(e, "internal_error");
  553. goto done;
  554. }
  555. } else if(enif_get_double(env, curr, &dval)) {
  556. if(!enc_double(e, dval)) {
  557. ret = enc_error(e, "internal_error");
  558. goto done;
  559. }
  560. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  561. if(arity != 1) {
  562. ret = enc_error(e, "invalid_ejson");
  563. goto done;
  564. }
  565. if(!enif_is_list(env, tuple[0])) {
  566. ret = enc_error(e, "invalid_object");
  567. goto done;
  568. }
  569. if(!enc_start_object(e)) {
  570. ret = enc_error(e, "internal_error");
  571. goto done;
  572. }
  573. if(enif_is_empty_list(env, tuple[0])) {
  574. if(!enc_end_object(e)) {
  575. ret = enc_error(e, "internal_error");
  576. goto done;
  577. }
  578. continue;
  579. }
  580. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  581. ret = enc_error(e, "internal_error");
  582. goto done;
  583. }
  584. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  585. ret = enc_error(e, "invalid_object_member");
  586. goto done;
  587. }
  588. if(arity != 2) {
  589. ret = enc_error(e, "invalid_object_member_arity");
  590. goto done;
  591. }
  592. if(!enc_string(e, tuple[0])) {
  593. ret = enc_error(e, "invalid_object_member_key");
  594. goto done;
  595. }
  596. if(!enc_colon(e)) {
  597. ret = enc_error(e, "internal_error");
  598. goto done;
  599. }
  600. stack = enif_make_list_cell(env, curr, stack);
  601. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  602. stack = enif_make_list_cell(env, tuple[1], stack);
  603. } else if(enif_is_list(env, curr)) {
  604. if(!enc_start_array(e)) {
  605. ret = enc_error(e, "internal_error");
  606. goto done;
  607. }
  608. if(enif_is_empty_list(env, curr)) {
  609. if(!enc_end_array(e)) {
  610. ret = enc_error(e, "internal_error");
  611. goto done;
  612. }
  613. continue;
  614. }
  615. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  616. ret = enc_error(e, "internal_error");
  617. goto done;
  618. }
  619. stack = enif_make_list_cell(env, curr, stack);
  620. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  621. stack = enif_make_list_cell(env, item, stack);
  622. } else {
  623. if(!enc_unknown(e, curr)) {
  624. ret = enc_error(e, "internal_error");
  625. goto done;
  626. }
  627. }
  628. } while(!enif_is_empty_list(env, stack));
  629. if(!enc_done(e, &item)) {
  630. ret = enc_error(e, "internal_error");
  631. goto done;
  632. }
  633. if(e->iolen == 0) {
  634. ret = item;
  635. } else {
  636. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  637. }
  638. done:
  639. enc_destroy(e);
  640. return ret;
  641. }