You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1187 lines
33 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <errno.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include "erl_nif.h"
  9. #include "jiffy.h"
  10. #define U(c) ((unsigned char) (c))
  11. #define ERROR(i, msg) make_error(st, env, msg)
  12. #define STACK_SIZE_INC 64
  13. #define NUM_BUF_LEN 32
  14. #if WINDOWS || WIN32
  15. #define snprintf _snprintf
  16. #endif
  17. enum {
  18. st_value=0,
  19. st_object,
  20. st_array,
  21. st_key,
  22. st_colon,
  23. st_comma,
  24. st_done,
  25. st_invalid
  26. } JsonState;
  27. enum {
  28. nst_init=0,
  29. nst_sign,
  30. nst_mantissa,
  31. nst_frac0,
  32. nst_frac1,
  33. nst_frac,
  34. nst_esign,
  35. nst_edigit
  36. } JsonNumState;
  37. typedef struct {
  38. ErlNifEnv* env;
  39. jiffy_st* atoms;
  40. ERL_NIF_TERM arg;
  41. ErlNifBinary bin;
  42. size_t bytes_per_red;
  43. int is_partial;
  44. int return_maps;
  45. int return_trailer;
  46. int dedupe_keys;
  47. int copy_strings;
  48. ERL_NIF_TERM null_term;
  49. unsigned char* p;
  50. int i;
  51. int len;
  52. char* st_data;
  53. int st_size;
  54. int st_top;
  55. unsigned int current_depth;
  56. unsigned int max_levels;
  57. unsigned int level_start;
  58. unsigned int empty_element;
  59. } Decoder;
  60. Decoder*
  61. dec_new(ErlNifEnv* env)
  62. {
  63. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  64. Decoder* d = enif_alloc_resource(st->res_dec, sizeof(Decoder));
  65. int i;
  66. if(d == NULL) {
  67. return NULL;
  68. }
  69. d->atoms = st;
  70. d->bytes_per_red = DEFAULT_BYTES_PER_REDUCTION;
  71. d->is_partial = 0;
  72. d->return_maps = 0;
  73. d->return_trailer = 0;
  74. d->dedupe_keys = 0;
  75. d->copy_strings = 0;
  76. d->null_term = d->atoms->atom_null;
  77. d->p = NULL;
  78. d->len = -1;
  79. d->i = 0;
  80. d->st_data = (char*) enif_alloc(STACK_SIZE_INC);
  81. d->st_size = STACK_SIZE_INC;
  82. d->st_top = 0;
  83. for(i = 0; i < d->st_size; i++) {
  84. d->st_data[i] = st_invalid;
  85. }
  86. d->current_depth = 0;
  87. d->max_levels = 0;
  88. d->level_start = 0;
  89. d->empty_element = 1;
  90. d->st_data[0] = st_value;
  91. d->st_top++;
  92. return d;
  93. }
  94. void
  95. dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin)
  96. {
  97. d->env = env;
  98. d->arg = arg;
  99. d->p = bin->data;
  100. d->len = bin->size;
  101. }
  102. void
  103. dec_destroy(ErlNifEnv* env, void* obj)
  104. {
  105. Decoder* d = (Decoder*) obj;
  106. if(d->st_data != NULL) {
  107. enif_free(d->st_data);
  108. }
  109. }
  110. ERL_NIF_TERM
  111. dec_error(Decoder* d, const char* atom)
  112. {
  113. ERL_NIF_TERM pos = enif_make_int(d->env, d->i+1);
  114. ERL_NIF_TERM msg = make_atom(d->env, atom);
  115. ERL_NIF_TERM ret = enif_make_tuple2(d->env, pos, msg);
  116. return enif_make_tuple2(d->env, d->atoms->atom_error, ret);
  117. }
  118. char
  119. dec_curr(Decoder* d)
  120. {
  121. assert(d->st_top > 0);
  122. return d->st_data[d->st_top - 1];
  123. }
  124. int
  125. dec_top(Decoder* d)
  126. {
  127. return d->st_top;
  128. }
  129. void
  130. dec_push(Decoder* d, char val)
  131. {
  132. int new_sz;
  133. int i;
  134. if(d->st_top == d->st_size) {
  135. new_sz = d->st_size + STACK_SIZE_INC;
  136. d->st_data = (char*) enif_realloc(d->st_data, new_sz);
  137. d->st_size = new_sz;
  138. for(i = d->st_top; i < d->st_size; i++) {
  139. d->st_data[i] = st_invalid;
  140. }
  141. }
  142. assert(d->st_top < d->st_size);
  143. d->st_data[d->st_top++] = val;
  144. }
  145. char
  146. dec_pop(Decoder* d) {
  147. char current = st_invalid;
  148. if (d->st_top > 0) {
  149. current = d->st_data[d->st_top - 1];
  150. d->st_data[d->st_top - 1] = st_invalid;
  151. d->st_top--;
  152. }
  153. return current;
  154. }
  155. void
  156. dec_pop_assert(Decoder* d, char val)
  157. {
  158. char current = dec_pop(d);
  159. assert(current == val && "popped invalid state.");
  160. (void)current;
  161. }
  162. static void inline
  163. level_increase(Decoder* d) {
  164. if(d->max_levels && (d->max_levels == d->current_depth++)) {
  165. d->level_start = d->i;
  166. }
  167. }
  168. static int inline
  169. level_decrease(Decoder* d, ERL_NIF_TERM* value) {
  170. if (d->max_levels && d->max_levels == --d->current_depth) {
  171. // Only builds term in threshold
  172. *value = wrap_enif_make_sub_binary(d->env, d->arg, d->level_start, (d->i - d->level_start + 1));
  173. return 1;
  174. }
  175. return 0;
  176. }
  177. static int inline
  178. level_allows_terms(Decoder* d) {
  179. return (!d->max_levels) || (d->max_levels >= d->current_depth);
  180. }
  181. int
  182. dec_string(Decoder* d, ERL_NIF_TERM* value)
  183. {
  184. int has_escape = 0;
  185. int num_escapes = 0;
  186. int st;
  187. int ulen;
  188. int ui;
  189. int hi;
  190. int lo;
  191. char* chrbuf = NULL;
  192. int chrpos;
  193. if(d->p[d->i] != '\"') {
  194. return 0;
  195. }
  196. d->i++;
  197. st = d->i;
  198. while(d->i < d->len) {
  199. if(d->p[d->i] < 0x20) {
  200. return 0;
  201. } else if(d->p[d->i] == '\"') {
  202. d->i++;
  203. goto parse;
  204. } else if(d->p[d->i] == '\\') {
  205. if(d->i+1 >= d->len) {
  206. return 0;
  207. }
  208. has_escape = 1;
  209. num_escapes += 1;
  210. d->i++;
  211. switch(d->p[d->i]) {
  212. case '\"':
  213. case '\\':
  214. case '/':
  215. case 'b':
  216. case 'f':
  217. case 'n':
  218. case 'r':
  219. case 't':
  220. d->i++;
  221. break;
  222. case 'u':
  223. hi = 0;
  224. lo = 0;
  225. d->i++;
  226. if(d->i + 4 >= d->len) {
  227. return 0;
  228. }
  229. hi = int_from_hex(&(d->p[d->i]));
  230. if(hi < 0) {
  231. return 0;
  232. }
  233. d->i += 4;
  234. if(hi >= 0xD800 && hi < 0xDC00) {
  235. if(d->i + 6 >= d->len) {
  236. return 0;
  237. }
  238. if(d->p[d->i++] != '\\') {
  239. return 0;
  240. } else if(d->p[d->i++] != 'u') {
  241. return 0;
  242. }
  243. lo = int_from_hex(&(d->p[d->i]));
  244. if(lo < 0) {
  245. return 0;
  246. }
  247. hi = unicode_from_pair(hi, lo);
  248. if(hi < 0) {
  249. return 0;
  250. }
  251. }
  252. hi = utf8_len(hi);
  253. if(hi < 0) {
  254. return 0;
  255. }
  256. if(lo == 0) {
  257. num_escapes += 5 - hi;
  258. } else {
  259. num_escapes += 11 - hi;
  260. }
  261. break;
  262. default:
  263. return 0;
  264. }
  265. } else if(d->p[d->i] < 0x80) {
  266. d->i++;
  267. } else {
  268. ulen = utf8_validate(&(d->p[d->i]), d->len - d->i);
  269. if(ulen < 0) {
  270. return 0;
  271. }
  272. d->i += ulen;
  273. }
  274. }
  275. // The goto above ensures that we only
  276. // hit this when a string is not terminated
  277. // correctly.
  278. return 0;
  279. parse:
  280. if(!has_escape && !level_allows_terms(d)) {
  281. // If has_escape, the binary is still constructed as a side effect of
  282. // the escape validation, although it's ignored by the caller
  283. return 1;
  284. } else if(!has_escape && !d->copy_strings) {
  285. *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1));
  286. return 1;
  287. } else if(!has_escape) {
  288. ulen = d->i - 1 - st;
  289. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value),
  290. memcpy(chrbuf, &(d->p[st]), ulen);
  291. return 1;
  292. }
  293. hi = 0;
  294. lo = 0;
  295. ulen = (d->i - 1) - st - num_escapes;
  296. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value);
  297. chrpos = 0;
  298. ui = st;
  299. while(ui < d->i - 1) {
  300. if(d->p[ui] != '\\') {
  301. chrbuf[chrpos++] = d->p[ui++];
  302. continue;
  303. }
  304. ui++;
  305. switch(d->p[ui]) {
  306. case '\"':
  307. case '\\':
  308. case '/':
  309. chrbuf[chrpos++] = d->p[ui];
  310. ui++;
  311. break;
  312. case 'b':
  313. chrbuf[chrpos++] = '\b';
  314. ui++;
  315. break;
  316. case 'f':
  317. chrbuf[chrpos++] = '\f';
  318. ui++;
  319. break;
  320. case 'n':
  321. chrbuf[chrpos++] = '\n';
  322. ui++;
  323. break;
  324. case 'r':
  325. chrbuf[chrpos++] = '\r';
  326. ui++;
  327. break;
  328. case 't':
  329. chrbuf[chrpos++] = '\t';
  330. ui++;
  331. break;
  332. case 'u':
  333. ui++;
  334. hi = int_from_hex(&(d->p[ui]));
  335. if(hi < 0) {
  336. return 0;
  337. }
  338. if(hi >= 0xD800 && hi < 0xDC00) {
  339. lo = int_from_hex(&(d->p[ui+6]));
  340. if(lo < 0) {
  341. return 0;
  342. }
  343. hi = unicode_from_pair(hi, lo);
  344. ui += 10;
  345. } else {
  346. ui += 4;
  347. }
  348. hi = unicode_to_utf8(hi, (unsigned char*) chrbuf+chrpos);
  349. if(hi < 0) {
  350. return 0;
  351. }
  352. chrpos += hi;
  353. break;
  354. default:
  355. return 0;
  356. }
  357. }
  358. return 1;
  359. }
  360. int
  361. dec_number(Decoder* d, ERL_NIF_TERM* value)
  362. {
  363. ERL_NIF_TERM num_type = d->atoms->atom_error;
  364. char state = nst_init;
  365. char nbuf[NUM_BUF_LEN];
  366. int st = d->i;
  367. int has_frac = 0;
  368. int has_exp = 0;
  369. double dval;
  370. long lval;
  371. while(d->i < d->len) {
  372. switch(state) {
  373. case nst_init:
  374. switch(d->p[d->i]) {
  375. case '-':
  376. state = nst_sign;
  377. d->i++;
  378. break;
  379. case '0':
  380. state = nst_frac0;
  381. d->i++;
  382. break;
  383. case '1':
  384. case '2':
  385. case '3':
  386. case '4':
  387. case '5':
  388. case '6':
  389. case '7':
  390. case '8':
  391. case '9':
  392. state = nst_mantissa;
  393. d->i++;
  394. break;
  395. default:
  396. return 0;
  397. }
  398. break;
  399. case nst_sign:
  400. switch(d->p[d->i]) {
  401. case '0':
  402. state = nst_frac0;
  403. d->i++;
  404. break;
  405. case '1':
  406. case '2':
  407. case '3':
  408. case '4':
  409. case '5':
  410. case '6':
  411. case '7':
  412. case '8':
  413. case '9':
  414. state = nst_mantissa;
  415. d->i++;
  416. break;
  417. default:
  418. return 0;
  419. }
  420. break;
  421. case nst_mantissa:
  422. switch(d->p[d->i]) {
  423. case '.':
  424. state = nst_frac1;
  425. d->i++;
  426. break;
  427. case 'e':
  428. case 'E':
  429. state = nst_esign;
  430. d->i++;
  431. break;
  432. case '0':
  433. case '1':
  434. case '2':
  435. case '3':
  436. case '4':
  437. case '5':
  438. case '6':
  439. case '7':
  440. case '8':
  441. case '9':
  442. d->i++;
  443. break;
  444. default:
  445. goto parse;
  446. }
  447. break;
  448. case nst_frac0:
  449. switch(d->p[d->i]) {
  450. case '.':
  451. state = nst_frac1;
  452. d->i++;
  453. break;
  454. case 'e':
  455. case 'E':
  456. state = nst_esign;
  457. d->i++;
  458. break;
  459. default:
  460. goto parse;
  461. }
  462. break;
  463. case nst_frac1:
  464. has_frac = 1;
  465. switch(d->p[d->i]) {
  466. case '0':
  467. case '1':
  468. case '2':
  469. case '3':
  470. case '4':
  471. case '5':
  472. case '6':
  473. case '7':
  474. case '8':
  475. case '9':
  476. state = nst_frac;
  477. d->i++;
  478. break;
  479. default:
  480. goto parse;
  481. }
  482. break;
  483. case nst_frac:
  484. switch(d->p[d->i]) {
  485. case 'e':
  486. case 'E':
  487. state = nst_esign;
  488. d->i++;
  489. break;
  490. case '0':
  491. case '1':
  492. case '2':
  493. case '3':
  494. case '4':
  495. case '5':
  496. case '6':
  497. case '7':
  498. case '8':
  499. case '9':
  500. d->i++;
  501. break;
  502. default:
  503. goto parse;
  504. }
  505. break;
  506. case nst_esign:
  507. has_exp = 1;
  508. switch(d->p[d->i]) {
  509. case '-':
  510. case '+':
  511. case '0':
  512. case '1':
  513. case '2':
  514. case '3':
  515. case '4':
  516. case '5':
  517. case '6':
  518. case '7':
  519. case '8':
  520. case '9':
  521. state = nst_edigit;
  522. d->i++;
  523. break;
  524. default:
  525. return 0;
  526. }
  527. break;
  528. case nst_edigit:
  529. switch(d->p[d->i]) {
  530. case '0':
  531. case '1':
  532. case '2':
  533. case '3':
  534. case '4':
  535. case '5':
  536. case '6':
  537. case '7':
  538. case '8':
  539. case '9':
  540. d->i++;
  541. break;
  542. default:
  543. goto parse;
  544. }
  545. break;
  546. default:
  547. return 0;
  548. }
  549. }
  550. parse:
  551. switch(state) {
  552. case nst_init:
  553. case nst_sign:
  554. case nst_frac1:
  555. case nst_esign:
  556. return 0;
  557. default:
  558. break;
  559. }
  560. if(!level_allows_terms(d)) {
  561. return 1;
  562. }
  563. errno = 0;
  564. if(d->i - st < NUM_BUF_LEN) {
  565. memset(nbuf, 0, NUM_BUF_LEN);
  566. memcpy(nbuf, &(d->p[st]), d->i - st);
  567. if(has_frac || has_exp) {
  568. dval = strtod(nbuf, NULL);
  569. if(errno != ERANGE) {
  570. *value = enif_make_double(d->env, dval);
  571. return 1;
  572. }
  573. } else {
  574. lval = strtol(nbuf, NULL, 10);
  575. if(errno != ERANGE) {
  576. *value = enif_make_int64(d->env, lval);
  577. return 1;
  578. }
  579. }
  580. }
  581. if(!has_frac && !has_exp) {
  582. num_type = d->atoms->atom_bignum;
  583. } else if(!has_frac && has_exp) {
  584. num_type = d->atoms->atom_bignum_e;
  585. } else {
  586. num_type = d->atoms->atom_bigdbl;
  587. }
  588. d->is_partial = 1;
  589. *value = enif_make_sub_binary(d->env, d->arg, st, d->i - st);
  590. *value = enif_make_tuple2(d->env, num_type, *value);
  591. return 1;
  592. }
  593. ERL_NIF_TERM
  594. make_empty_object(ErlNifEnv* env, int ret_map)
  595. {
  596. #if MAP_TYPE_PRESENT
  597. if(ret_map) {
  598. return enif_make_new_map(env);
  599. }
  600. #endif
  601. return enif_make_tuple1(env, enif_make_list(env, 0));
  602. }
  603. ERL_NIF_TERM
  604. make_array(ErlNifEnv* env, ERL_NIF_TERM list)
  605. {
  606. ERL_NIF_TERM ret = enif_make_list(env, 0);
  607. ERL_NIF_TERM item;
  608. while(enif_get_list_cell(env, list, &item, &list)) {
  609. ret = enif_make_list_cell(env, item, ret);
  610. }
  611. return ret;
  612. }
  613. int
  614. get_max_levels(ErlNifEnv* env, ERL_NIF_TERM val, unsigned int* max_levels_p)
  615. {
  616. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  617. const ERL_NIF_TERM* tuple;
  618. int arity;
  619. unsigned int max_levels;
  620. if(!enif_get_tuple(env, val, &arity, &tuple)) {
  621. return 0;
  622. }
  623. if(arity != 2) {
  624. return 0;
  625. }
  626. if(enif_compare(tuple[0], st->atom_max_levels) != 0) {
  627. return 0;
  628. }
  629. if(!enif_get_uint(env, tuple[1], &max_levels)) {
  630. return 0;
  631. }
  632. if(max_levels == 0) {
  633. return 0;
  634. }
  635. *max_levels_p = max_levels;
  636. return 1;
  637. }
  638. ERL_NIF_TERM
  639. decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  640. {
  641. Decoder* d;
  642. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  643. ERL_NIF_TERM tmp_argv[5];
  644. ERL_NIF_TERM opts;
  645. ERL_NIF_TERM val;
  646. if(argc != 2) {
  647. return enif_make_badarg(env);
  648. }
  649. d = dec_new(env);
  650. if(d == NULL) {
  651. return make_error(st, env, "internal_error");
  652. }
  653. tmp_argv[0] = argv[0];
  654. tmp_argv[1] = enif_make_resource(env, d);
  655. tmp_argv[2] = st->atom_error;
  656. tmp_argv[3] = enif_make_list(env, 0);
  657. tmp_argv[4] = enif_make_list(env, 0);
  658. enif_release_resource(d);
  659. opts = argv[1];
  660. if(!enif_is_list(env, opts)) {
  661. return enif_make_badarg(env);
  662. }
  663. while(enif_get_list_cell(env, opts, &val, &opts)) {
  664. if(get_bytes_per_iter(env, val, &(d->bytes_per_red))) {
  665. continue;
  666. } else if(get_bytes_per_red(env, val, &(d->bytes_per_red))) {
  667. continue;
  668. } else if(enif_is_identical(val, d->atoms->atom_return_maps)) {
  669. #if MAP_TYPE_PRESENT
  670. d->return_maps = 1;
  671. #else
  672. return enif_make_badarg(env);
  673. #endif
  674. } else if(enif_is_identical(val, d->atoms->atom_return_trailer)) {
  675. d->return_trailer = 1;
  676. } else if(enif_is_identical(val, d->atoms->atom_dedupe_keys)) {
  677. d->dedupe_keys = 1;
  678. } else if(enif_is_identical(val, d->atoms->atom_copy_strings)) {
  679. d->copy_strings = 1;
  680. } else if(enif_is_identical(val, d->atoms->atom_use_nil)) {
  681. d->null_term = d->atoms->atom_nil;
  682. } else if(get_null_term(env, val, &(d->null_term))) {
  683. continue;
  684. } else if(get_max_levels(env, val, &(d->max_levels))) {
  685. continue;
  686. } else {
  687. return enif_make_badarg(env);
  688. }
  689. }
  690. return decode_iter(env, 5, tmp_argv);
  691. }
  692. ERL_NIF_TERM
  693. decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  694. {
  695. Decoder* d;
  696. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  697. ErlNifBinary bin;
  698. ERL_NIF_TERM objs;
  699. ERL_NIF_TERM curr;
  700. ERL_NIF_TERM val = argv[2];
  701. ERL_NIF_TERM trailer;
  702. ERL_NIF_TERM ret;
  703. ERL_NIF_TERM tmp_argv[5];
  704. void* res;
  705. size_t start;
  706. size_t bytes_processed = 0;
  707. if(!enif_inspect_binary(env, argv[0], &bin)) {
  708. return enif_make_badarg(env);
  709. } else if(!enif_get_resource(env, argv[1], st->res_dec, &res)) {
  710. return enif_make_badarg(env);
  711. }
  712. d = (Decoder*) res;
  713. dec_init(d, env, argv[0], &bin);
  714. objs = argv[3];
  715. curr = argv[4];
  716. start = d->i;
  717. while(d->i < bin.size) {
  718. bytes_processed = d->i - start;
  719. if(should_yield(bytes_processed, d->bytes_per_red)) {
  720. assert(enif_is_list(env, objs));
  721. assert(enif_is_list(env, curr));
  722. tmp_argv[0] = argv[0];
  723. tmp_argv[1] = argv[1];
  724. tmp_argv[2] = val;
  725. tmp_argv[3] = objs;
  726. tmp_argv[4] = curr;
  727. bump_used_reds(env, bytes_processed, d->bytes_per_red);
  728. #if SCHEDULE_NIF_PRESENT
  729. return enif_schedule_nif(
  730. env,
  731. "nif_decode_iter",
  732. 0,
  733. decode_iter,
  734. 5,
  735. tmp_argv
  736. );
  737. #else
  738. return enif_make_tuple2(
  739. env,
  740. st->atom_iter,
  741. enif_make_tuple_from_array(env, tmp_argv, 5)
  742. );
  743. #endif
  744. }
  745. switch(dec_curr(d)) {
  746. case st_value:
  747. switch(d->p[d->i]) {
  748. case ' ':
  749. case '\n':
  750. case '\r':
  751. case '\t':
  752. d->i++;
  753. break;
  754. case 'n':
  755. if(d->i + 3 >= d->len) {
  756. ret = dec_error(d, "invalid_literal");
  757. goto done;
  758. }
  759. if(memcmp(&(d->p[d->i]), "null", 4) != 0) {
  760. ret = dec_error(d, "invalid_literal");
  761. goto done;
  762. }
  763. val = d->null_term;
  764. dec_pop_assert(d, st_value);
  765. d->i += 4;
  766. d->empty_element = 0;
  767. break;
  768. case 't':
  769. if(d->i + 3 >= d->len) {
  770. ret = dec_error(d, "invalid_literal");
  771. goto done;
  772. }
  773. if(memcmp(&(d->p[d->i]), "true", 4) != 0) {
  774. ret = dec_error(d, "invalid_literal");
  775. goto done;
  776. }
  777. val = d->atoms->atom_true;
  778. dec_pop_assert(d, st_value);
  779. d->i += 4;
  780. d->empty_element = 0;
  781. break;
  782. case 'f':
  783. if(d->i + 4 >= bin.size) {
  784. ret = dec_error(d, "invalid_literal");
  785. goto done;
  786. }
  787. if(memcmp(&(d->p[d->i]), "false", 5) != 0) {
  788. ret = dec_error(d, "invalid_literal");
  789. goto done;
  790. }
  791. val = d->atoms->atom_false;
  792. dec_pop_assert(d, st_value);
  793. d->i += 5;
  794. d->empty_element = 0;
  795. break;
  796. case '\"':
  797. if(!dec_string(d, &val)) {
  798. ret = dec_error(d, "invalid_string");
  799. goto done;
  800. }
  801. dec_pop_assert(d, st_value);
  802. d->empty_element = 0;
  803. break;
  804. case '-':
  805. case '0':
  806. case '1':
  807. case '2':
  808. case '3':
  809. case '4':
  810. case '5':
  811. case '6':
  812. case '7':
  813. case '8':
  814. case '9':
  815. if(!dec_number(d, &val)) {
  816. ret = dec_error(d, "invalid_number");
  817. goto done;
  818. }
  819. dec_pop_assert(d, st_value);
  820. d->empty_element = 0;
  821. break;
  822. case '{':
  823. dec_push(d, st_object);
  824. dec_push(d, st_key);
  825. level_increase(d);
  826. if(level_allows_terms(d)) {
  827. objs = enif_make_list_cell(env, curr, objs);
  828. curr = enif_make_list(env, 0);
  829. }
  830. d->i++;
  831. d->empty_element = 1;
  832. break;
  833. case '[':
  834. dec_push(d, st_array);
  835. dec_push(d, st_value);
  836. level_increase(d);
  837. if(level_allows_terms(d)) {
  838. objs = enif_make_list_cell(env, curr, objs);
  839. curr = enif_make_list(env, 0);
  840. }
  841. d->i++;
  842. d->empty_element = 1;
  843. break;
  844. case ']':
  845. if(!d->empty_element) {
  846. ret = dec_error(d, "invalid_json");
  847. goto done;
  848. }
  849. dec_pop_assert(d, st_value);
  850. if(dec_pop(d) != st_array) {
  851. ret = dec_error(d, "invalid_json");
  852. goto done;
  853. }
  854. dec_pop_assert(d, st_value);
  855. if(level_allows_terms(d)) {
  856. val = curr; // curr is []
  857. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  858. ret = dec_error(d, "internal_error");
  859. goto done;
  860. }
  861. }
  862. level_decrease(d, &val);
  863. d->i++;
  864. d->empty_element = 0;
  865. break;
  866. default:
  867. ret = dec_error(d, "invalid_json");
  868. goto done;
  869. }
  870. if(dec_top(d) == 0) {
  871. dec_push(d, st_done);
  872. } else if(dec_curr(d) != st_value && dec_curr(d) != st_key) {
  873. dec_push(d, st_comma);
  874. if(level_allows_terms(d)) {
  875. curr = enif_make_list_cell(env, val, curr);
  876. }
  877. }
  878. break;
  879. case st_key:
  880. switch(d->p[d->i]) {
  881. case ' ':
  882. case '\n':
  883. case '\r':
  884. case '\t':
  885. d->i++;
  886. break;
  887. case '\"':
  888. if(!dec_string(d, &val)) {
  889. ret = dec_error(d, "invalid_string");
  890. goto done;
  891. }
  892. dec_pop_assert(d, st_key);
  893. dec_push(d, st_colon);
  894. if(level_allows_terms(d)) {
  895. curr = enif_make_list_cell(env, val, curr);
  896. }
  897. break;
  898. case '}':
  899. if(!d->empty_element) {
  900. ret = dec_error(d, "invalid_json");
  901. goto done;
  902. }
  903. dec_pop_assert(d, st_key);
  904. dec_pop_assert(d, st_object);
  905. dec_pop_assert(d, st_value);
  906. if(level_allows_terms(d)) {
  907. val = make_empty_object(env, d->return_maps);
  908. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  909. ret = dec_error(d, "internal_error");
  910. goto done;
  911. }
  912. }
  913. level_decrease(d, &val);
  914. if(dec_top(d) == 0) {
  915. dec_push(d, st_done);
  916. } else {
  917. dec_push(d, st_comma);
  918. if(level_allows_terms(d)) {
  919. curr = enif_make_list_cell(env, val, curr);
  920. }
  921. }
  922. d->i++;
  923. d->empty_element = 0;
  924. break;
  925. default:
  926. ret = dec_error(d, "invalid_json");
  927. goto done;
  928. }
  929. break;
  930. case st_colon:
  931. switch(d->p[d->i]) {
  932. case ' ':
  933. case '\n':
  934. case '\r':
  935. case '\t':
  936. d->i++;
  937. break;
  938. case ':':
  939. dec_pop_assert(d, st_colon);
  940. dec_push(d, st_value);
  941. d->i++;
  942. break;
  943. default:
  944. ret = dec_error(d, "invalid_json");
  945. goto done;
  946. }
  947. break;
  948. case st_comma:
  949. switch(d->p[d->i]) {
  950. case ' ':
  951. case '\n':
  952. case '\r':
  953. case '\t':
  954. d->i++;
  955. break;
  956. case ',':
  957. dec_pop_assert(d, st_comma);
  958. switch(dec_curr(d)) {
  959. case st_object:
  960. dec_push(d, st_key);
  961. break;
  962. case st_array:
  963. dec_push(d, st_value);
  964. break;
  965. default:
  966. ret = dec_error(d, "internal_error");
  967. goto done;
  968. }
  969. d->i++;
  970. break;
  971. case '}':
  972. dec_pop_assert(d, st_comma);
  973. if(dec_pop(d) != st_object) {
  974. ret = dec_error(d, "invalid_json");
  975. goto done;
  976. }
  977. dec_pop_assert(d, st_value);
  978. if(level_allows_terms(d)) {
  979. if(!make_object(env, curr, &val,
  980. d->return_maps, d->dedupe_keys)) {
  981. ret = dec_error(d, "internal_object_error");
  982. goto done;
  983. }
  984. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  985. ret = dec_error(d, "internal_error");
  986. goto done;
  987. }
  988. }
  989. level_decrease(d, &val);
  990. if(dec_top(d) > 0) {
  991. dec_push(d, st_comma);
  992. if(level_allows_terms(d)) {
  993. curr = enif_make_list_cell(env, val, curr);
  994. }
  995. } else {
  996. dec_push(d, st_done);
  997. }
  998. d->i++;
  999. break;
  1000. case ']':
  1001. dec_pop_assert(d, st_comma);
  1002. if(dec_pop(d) != st_array) {
  1003. ret = dec_error(d, "invalid_json");
  1004. goto done;
  1005. }
  1006. dec_pop_assert(d, st_value);
  1007. if(level_allows_terms(d)) {
  1008. val = make_array(env, curr);
  1009. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  1010. ret = dec_error(d, "internal_error");
  1011. goto done;
  1012. }
  1013. }
  1014. level_decrease(d, &val);
  1015. if(dec_top(d) > 0) {
  1016. dec_push(d, st_comma);
  1017. if(level_allows_terms(d)) {
  1018. curr = enif_make_list_cell(env, val, curr);
  1019. }
  1020. } else {
  1021. dec_push(d, st_done);
  1022. }
  1023. d->i++;
  1024. break;
  1025. default:
  1026. ret = dec_error(d, "invalid_json");
  1027. goto done;
  1028. }
  1029. break;
  1030. case st_done:
  1031. switch(d->p[d->i]) {
  1032. case ' ':
  1033. case '\n':
  1034. case '\r':
  1035. case '\t':
  1036. d->i++;
  1037. break;
  1038. default:
  1039. goto decode_done;
  1040. }
  1041. break;
  1042. default:
  1043. ret = dec_error(d, "invalid_internal_state");
  1044. goto done;
  1045. }
  1046. }
  1047. decode_done:
  1048. if(d->i < bin.size && d->return_trailer) {
  1049. trailer = enif_make_sub_binary(env, argv[0], d->i, bin.size - d->i);
  1050. val = enif_make_tuple3(env, d->atoms->atom_has_trailer, val, trailer);
  1051. } else if(d->i < bin.size) {
  1052. ret = dec_error(d, "invalid_trailing_data");
  1053. goto done;
  1054. }
  1055. if(dec_pop(d) != st_done) {
  1056. ret = dec_error(d, "truncated_json");
  1057. } else if(d->is_partial) {
  1058. ret = enif_make_tuple2(env, d->atoms->atom_partial, val);
  1059. } else {
  1060. ret = val;
  1061. }
  1062. done:
  1063. bump_used_reds(env, bytes_processed, d->bytes_per_red);
  1064. return ret;
  1065. }