Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

722 rindas
18 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
  10. #define MAYBE_PRETTY(e) \
  11. do { \
  12. if(e->pretty) { \
  13. if(!enc_shift(e)) \
  14. return 0; \
  15. } \
  16. } while(0)
  17. #ifdef WINDOWS || WIN32
  18. #define inline __inline
  19. #define snprintf _snprintf
  20. #endif
  21. typedef struct {
  22. ErlNifEnv* env;
  23. jiffy_st* atoms;
  24. int uescape;
  25. int pretty;
  26. int shiftcnt;
  27. int count;
  28. int iolen;
  29. ERL_NIF_TERM iolist;
  30. ErlNifBinary* curr;
  31. char* p;
  32. unsigned char* u;
  33. size_t i;
  34. } Encoder;
  35. // String constants for pretty printing.
  36. // Every string starts with its length.
  37. #define NUM_SHIFTS 8
  38. static char* shifts[NUM_SHIFTS] = {
  39. "\x01\n",
  40. "\x03\n ",
  41. "\x05\n ",
  42. "\x07\n ",
  43. "\x09\n ",
  44. "\x0b\n ",
  45. "\x0d\n ",
  46. "\x0f\n "
  47. };
  48. int
  49. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin)
  50. {
  51. ERL_NIF_TERM val;
  52. e->env = env;
  53. e->atoms = enif_priv_data(env);
  54. e->uescape = 0;
  55. e->pretty = 0;
  56. e->shiftcnt = 0;
  57. e->count = 0;
  58. if(!enif_is_list(env, opts)) {
  59. return 0;
  60. }
  61. while(enif_get_list_cell(env, opts, &val, &opts)) {
  62. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  63. e->uescape = 1;
  64. } else if(enif_compare(val, e->atoms->atom_pretty) == 0) {
  65. e->pretty = 1;
  66. } else {
  67. return 0;
  68. }
  69. }
  70. e->iolen = 0;
  71. e->iolist = enif_make_list(env, 0);
  72. e->curr = bin;
  73. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  74. return 0;
  75. }
  76. memset(e->curr->data, 0, e->curr->size);
  77. e->p = (char*) e->curr->data;
  78. e->u = (unsigned char*) e->curr->data;
  79. e->i = 0;
  80. return 1;
  81. }
  82. void
  83. enc_destroy(Encoder* e)
  84. {
  85. if(e->curr != NULL) {
  86. enif_release_binary(e->curr);
  87. }
  88. }
  89. ERL_NIF_TERM
  90. enc_error(Encoder* e, const char* msg)
  91. {
  92. //assert(0 && msg);
  93. return make_error(e->atoms, e->env, msg);
  94. }
  95. static inline int
  96. enc_ensure(Encoder* e, size_t req)
  97. {
  98. size_t need = e->curr->size;
  99. while(req >= (need - e->i)) need <<= 1;
  100. if(need != e->curr->size) {
  101. if(!enif_realloc_binary(e->curr, need)) {
  102. return 0;
  103. }
  104. e->p = (char*) e->curr->data;
  105. e->u = (unsigned char*) e->curr->data;
  106. }
  107. return 1;
  108. }
  109. int
  110. enc_result(Encoder* e, ERL_NIF_TERM* value)
  111. {
  112. if(e->i != e->curr->size) {
  113. if(!enif_realloc_binary(e->curr, e->i)) {
  114. return 0;
  115. }
  116. }
  117. *value = enif_make_binary(e->env, e->curr);
  118. e->curr = NULL;
  119. return 1;
  120. }
  121. int
  122. enc_done(Encoder* e, ERL_NIF_TERM* value)
  123. {
  124. ERL_NIF_TERM last;
  125. if(e->iolen == 0) {
  126. return enc_result(e, value);
  127. }
  128. if(e->i > 0 ) {
  129. if(!enc_result(e, &last)) {
  130. return 0;
  131. }
  132. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  133. e->iolen++;
  134. }
  135. *value = e->iolist;
  136. return 1;
  137. }
  138. static inline int
  139. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  140. {
  141. ErlNifBinary* bin = e->curr;
  142. ERL_NIF_TERM curr;
  143. if(e->i > 0) {
  144. if(!enc_result(e, &curr)) {
  145. return 0;
  146. }
  147. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  148. e->iolen++;
  149. }
  150. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  151. e->iolen++;
  152. // Reinitialize our binary for the next buffer.
  153. e->curr = bin;
  154. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  155. return 0;
  156. }
  157. memset(e->curr->data, 0, e->curr->size);
  158. e->p = (char*) e->curr->data;
  159. e->u = (unsigned char*) e->curr->data;
  160. e->i = 0;
  161. return 1;
  162. }
  163. static inline int
  164. enc_literal(Encoder* e, const char* literal, size_t len)
  165. {
  166. if(!enc_ensure(e, len)) {
  167. return 0;
  168. }
  169. memcpy(&(e->p[e->i]), literal, len);
  170. e->i += len;
  171. e->count++;
  172. return 1;
  173. }
  174. static inline int
  175. enc_string(Encoder* e, ERL_NIF_TERM val)
  176. {
  177. ErlNifBinary bin;
  178. char atom[512];
  179. unsigned char* data;
  180. size_t size;
  181. int esc_extra = 0;
  182. int ulen;
  183. int uval;
  184. int i;
  185. if(enif_is_binary(e->env, val)) {
  186. if(!enif_inspect_binary(e->env, val, &bin)) {
  187. return 0;
  188. }
  189. data = bin.data;
  190. size = bin.size;
  191. } else if(enif_is_atom(e->env, val)) {
  192. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  193. return 0;
  194. }
  195. data = (unsigned char*) atom;
  196. size = strlen(atom);
  197. } else {
  198. return 0;
  199. }
  200. i = 0;
  201. while(i < size) {
  202. switch((char) data[i]) {
  203. case '\"':
  204. case '\\':
  205. case '/':
  206. case '\b':
  207. case '\f':
  208. case '\n':
  209. case '\r':
  210. case '\t':
  211. esc_extra += 1;
  212. i++;
  213. continue;
  214. default:
  215. if(data[i] < 0x20) {
  216. esc_extra += 5;
  217. i++;
  218. continue;
  219. } else if(data[i] < 0x80) {
  220. i++;
  221. continue;
  222. }
  223. ulen = utf8_validate(&(data[i]), size - i);
  224. if(ulen < 0) {
  225. return 0;
  226. }
  227. if(e->uescape) {
  228. uval = utf8_to_unicode(&(data[i]), ulen);
  229. if(uval < 0) {
  230. return 0;
  231. }
  232. esc_extra += utf8_esc_len(uval);
  233. if(ulen < 0) {
  234. return 0;
  235. }
  236. }
  237. i += ulen;
  238. }
  239. }
  240. if(!enc_ensure(e, size + esc_extra + 2)) {
  241. return 0;
  242. }
  243. e->p[e->i++] = '\"';
  244. i = 0;
  245. while(i < size) {
  246. switch((char) data[i]) {
  247. case '\"':
  248. case '\\':
  249. case '/':
  250. e->p[e->i++] = '\\';
  251. e->u[e->i++] = data[i];
  252. i++;
  253. continue;
  254. case '\b':
  255. e->p[e->i++] = '\\';
  256. e->p[e->i++] = 'b';
  257. i++;
  258. continue;
  259. case '\f':
  260. e->p[e->i++] = '\\';
  261. e->p[e->i++] = 'f';
  262. i++;
  263. continue;
  264. case '\n':
  265. e->p[e->i++] = '\\';
  266. e->p[e->i++] = 'n';
  267. i++;
  268. continue;
  269. case '\r':
  270. e->p[e->i++] = '\\';
  271. e->p[e->i++] = 'r';
  272. i++;
  273. continue;
  274. case '\t':
  275. e->p[e->i++] = '\\';
  276. e->p[e->i++] = 't';
  277. i++;
  278. continue;
  279. default:
  280. if(data[i] < 0x20) {
  281. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  282. if(ulen < 0) {
  283. return 0;
  284. }
  285. e->i += ulen;
  286. i++;
  287. } else if((data[i] & 0x80) && e->uescape) {
  288. uval = utf8_to_unicode(&(data[i]), size-i);
  289. if(uval < 0) {
  290. return 0;
  291. }
  292. ulen = unicode_uescape(uval, &(e->p[e->i]));
  293. if(ulen < 0) {
  294. return 0;
  295. }
  296. e->i += ulen;
  297. ulen = utf8_len(uval);
  298. if(ulen < 0) {
  299. return 0;
  300. }
  301. i += ulen;
  302. } else {
  303. e->u[e->i++] = data[i++];
  304. }
  305. }
  306. }
  307. e->p[e->i++] = '\"';
  308. e->count++;
  309. return 1;
  310. }
  311. static inline int
  312. enc_long(Encoder* e, ErlNifSInt64 val)
  313. {
  314. if(!enc_ensure(e, 32)) {
  315. return 0;
  316. }
  317. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  318. snprintf(&(e->p[e->i]), 32, "%ld", val);
  319. #elif SIZEOF_LONG == 8
  320. snprintf(&(e->p[e->i]), 32, "%ld", val);
  321. #else
  322. snprintf(&(e->p[e->i]), 32, "%lld", val);
  323. #endif
  324. e->i += strlen(&(e->p[e->i]));
  325. e->count++;
  326. return 1;
  327. }
  328. static inline int
  329. enc_double(Encoder* e, double val)
  330. {
  331. char* start;
  332. size_t len;
  333. size_t i;
  334. if(!enc_ensure(e, 32)) {
  335. return 0;
  336. }
  337. start = &(e->p[e->i]);
  338. sprintf(start, "%0.20g", val);
  339. len = strlen(start);
  340. // Check if we have a decimal point
  341. for(i = 0; i < len; i++) {
  342. if(start[i] == '.' || start[i] == 'e' || start[i] == 'E')
  343. goto done;
  344. }
  345. if(len > 29) return 0;
  346. // Force a decimal point
  347. start[len++] = '.';
  348. start[len++] = '0';
  349. done:
  350. e->i += len;
  351. e->count++;
  352. return 1;
  353. }
  354. static inline int
  355. enc_char(Encoder* e, char c)
  356. {
  357. if(!enc_ensure(e, 1)) {
  358. return 0;
  359. }
  360. e->p[e->i++] = c;
  361. return 1;
  362. }
  363. static int
  364. enc_shift(Encoder* e) {
  365. int i;
  366. char* shift;
  367. assert(e->shiftcnt >= 0 && "Invalid shift count.");
  368. shift = shifts[MIN(e->shiftcnt, NUM_SHIFTS-1)];
  369. if(!enc_literal(e, shift + 1, *shift))
  370. return 0;
  371. // Finish the rest of this shift it's it bigger than
  372. // our largest predefined constant.
  373. for(i = NUM_SHIFTS - 1; i < e->shiftcnt; i++) {
  374. if(!enc_literal(e, " ", 2))
  375. return 0;
  376. }
  377. return 1;
  378. }
  379. static inline int
  380. enc_start_object(Encoder* e)
  381. {
  382. e->count++;
  383. e->shiftcnt++;
  384. if(!enc_char(e, '{'))
  385. return 0;
  386. MAYBE_PRETTY(e);
  387. return 1;
  388. }
  389. static inline int
  390. enc_end_object(Encoder* e)
  391. {
  392. e->shiftcnt--;
  393. MAYBE_PRETTY(e);
  394. return enc_char(e, '}');
  395. }
  396. static inline int
  397. enc_start_array(Encoder* e)
  398. {
  399. e->count++;
  400. e->shiftcnt++;
  401. if(!enc_char(e, '['))
  402. return 0;
  403. MAYBE_PRETTY(e);
  404. return 1;
  405. }
  406. static inline int
  407. enc_end_array(Encoder* e)
  408. {
  409. e->shiftcnt--;
  410. MAYBE_PRETTY(e);
  411. return enc_char(e, ']');
  412. }
  413. static inline int
  414. enc_colon(Encoder* e)
  415. {
  416. if(e->pretty)
  417. return enc_literal(e, " : ", 3);
  418. return enc_char(e, ':');
  419. }
  420. static inline int
  421. enc_comma(Encoder* e)
  422. {
  423. if(!enc_char(e, ','))
  424. return 0;
  425. MAYBE_PRETTY(e);
  426. return 1;
  427. }
  428. ERL_NIF_TERM
  429. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  430. {
  431. Encoder enc;
  432. Encoder* e = &enc;
  433. ErlNifBinary bin;
  434. ERL_NIF_TERM ret;
  435. ERL_NIF_TERM stack;
  436. ERL_NIF_TERM curr;
  437. ERL_NIF_TERM item;
  438. const ERL_NIF_TERM* tuple;
  439. int arity;
  440. ErlNifSInt64 lval;
  441. double dval;
  442. if(argc != 2) {
  443. return enif_make_badarg(env);
  444. }
  445. if(!enc_init(e, env, argv[1], &bin)) {
  446. return enif_make_badarg(env);
  447. }
  448. stack = enif_make_list(env, 1, argv[0]);
  449. while(!enif_is_empty_list(env, stack)) {
  450. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  451. ret = enc_error(e, "internal_error");
  452. goto done;
  453. }
  454. if(enif_is_identical(curr, e->atoms->ref_object)) {
  455. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  456. ret = enc_error(e, "internal_error");
  457. goto done;
  458. }
  459. if(enif_is_empty_list(env, curr)) {
  460. if(!enc_end_object(e)) {
  461. ret = enc_error(e, "internal_error");
  462. goto done;
  463. }
  464. continue;
  465. }
  466. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  467. ret = enc_error(e, "internal_error");
  468. goto done;
  469. }
  470. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  471. ret = enc_error(e, "invalid_object_pair");
  472. goto done;
  473. }
  474. if(arity != 2) {
  475. ret = enc_error(e, "invalid_object_pair");
  476. goto done;
  477. }
  478. if(!enc_comma(e)) {
  479. ret = enc_error(e, "internal_error");
  480. goto done;
  481. }
  482. if(!enc_string(e, tuple[0])) {
  483. ret = enc_error(e, "invalid_object_key");
  484. goto done;
  485. }
  486. if(!enc_colon(e)) {
  487. ret = enc_error(e, "internal_error");
  488. goto done;
  489. }
  490. stack = enif_make_list_cell(env, curr, stack);
  491. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  492. stack = enif_make_list_cell(env, tuple[1], stack);
  493. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  494. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  495. ret = enc_error(e, "internal_error");
  496. goto done;
  497. }
  498. if(enif_is_empty_list(env, curr)) {
  499. if(!enc_end_array(e)) {
  500. ret = enc_error(e, "internal_error");
  501. goto done;
  502. }
  503. continue;
  504. }
  505. if(!enc_comma(e)) {
  506. ret = enc_error(e, "internal_error");
  507. goto done;
  508. }
  509. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  510. ret = enc_error(e, "internal_error");
  511. goto done;
  512. }
  513. stack = enif_make_list_cell(env, curr, stack);
  514. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  515. stack = enif_make_list_cell(env, item, stack);
  516. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  517. if(!enc_literal(e, "null", 4)) {
  518. ret = enc_error(e, "null");
  519. goto done;
  520. }
  521. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  522. if(!enc_literal(e, "true", 4)) {
  523. ret = enc_error(e, "true");
  524. goto done;
  525. }
  526. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  527. if(!enc_literal(e, "false", 5)) {
  528. ret = enc_error(e, "false");
  529. goto done;
  530. }
  531. } else if(enif_is_binary(env, curr)) {
  532. if(!enc_string(e, curr)) {
  533. ret = enc_error(e, "invalid_string");
  534. goto done;
  535. }
  536. } else if(enif_is_atom(env, curr)) {
  537. if(!enc_string(e, curr)) {
  538. ret = enc_error(e, "invalid_string");
  539. goto done;
  540. }
  541. } else if(enif_get_int64(env, curr, &lval)) {
  542. if(!enc_long(e, lval)) {
  543. ret = enc_error(e, "internal_error");
  544. goto done;
  545. }
  546. } else if(enif_get_double(env, curr, &dval)) {
  547. if(!enc_double(e, dval)) {
  548. ret = enc_error(e, "internal_error");
  549. goto done;
  550. }
  551. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  552. if(arity != 1) {
  553. ret = enc_error(e, "invalid_ejson");
  554. goto done;
  555. }
  556. if(!enif_is_list(env, tuple[0])) {
  557. ret = enc_error(e, "invalid_object");
  558. goto done;
  559. }
  560. if(!enc_start_object(e)) {
  561. ret = enc_error(e, "internal_error");
  562. goto done;
  563. }
  564. if(enif_is_empty_list(env, tuple[0])) {
  565. if(!enc_end_object(e)) {
  566. ret = enc_error(e, "internal_error");
  567. goto done;
  568. }
  569. continue;
  570. }
  571. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  572. ret = enc_error(e, "internal_error");
  573. goto done;
  574. }
  575. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  576. ret = enc_error(e, "invalid_object_member");
  577. goto done;
  578. }
  579. if(arity != 2) {
  580. ret = enc_error(e, "invalid_object_member_arity");
  581. goto done;
  582. }
  583. if(!enc_string(e, tuple[0])) {
  584. ret = enc_error(e, "invalid_object_member_key");
  585. goto done;
  586. }
  587. if(!enc_colon(e)) {
  588. ret = enc_error(e, "internal_error");
  589. goto done;
  590. }
  591. stack = enif_make_list_cell(env, curr, stack);
  592. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  593. stack = enif_make_list_cell(env, tuple[1], stack);
  594. } else if(enif_is_list(env, curr)) {
  595. if(!enc_start_array(e)) {
  596. ret = enc_error(e, "internal_error");
  597. goto done;
  598. }
  599. if(enif_is_empty_list(env, curr)) {
  600. if(!enc_end_array(e)) {
  601. ret = enc_error(e, "internal_error");
  602. goto done;
  603. }
  604. continue;
  605. }
  606. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  607. ret = enc_error(e, "internal_error");
  608. goto done;
  609. }
  610. stack = enif_make_list_cell(env, curr, stack);
  611. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  612. stack = enif_make_list_cell(env, item, stack);
  613. } else {
  614. if(!enc_unknown(e, curr)) {
  615. ret = enc_error(e, "internal_error");
  616. goto done;
  617. }
  618. }
  619. } while(!enif_is_empty_list(env, stack));
  620. if(!enc_done(e, &item)) {
  621. ret = enc_error(e, "internal_error");
  622. goto done;
  623. }
  624. if(e->iolen == 0) {
  625. ret = item;
  626. } else {
  627. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  628. }
  629. done:
  630. enc_destroy(e);
  631. return ret;
  632. }