You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1068 regels
30 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <errno.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include "erl_nif.h"
  9. #include "jiffy.h"
  10. #define U(c) ((unsigned char) (c))
  11. #define ERROR(i, msg) make_error(st, env, msg)
  12. #define STACK_SIZE_INC 64
  13. #define NUM_BUF_LEN 32
  14. #if WINDOWS || WIN32
  15. #define snprintf _snprintf
  16. #endif
  17. enum {
  18. st_value=0,
  19. st_object,
  20. st_array,
  21. st_key,
  22. st_colon,
  23. st_comma,
  24. st_done,
  25. st_invalid
  26. } JsonState;
  27. enum {
  28. nst_init=0,
  29. nst_sign,
  30. nst_mantissa,
  31. nst_frac0,
  32. nst_frac1,
  33. nst_frac,
  34. nst_esign,
  35. nst_edigit
  36. } JsonNumState;
  37. typedef struct {
  38. ErlNifEnv* env;
  39. jiffy_st* atoms;
  40. ERL_NIF_TERM arg;
  41. ErlNifBinary bin;
  42. size_t bytes_per_red;
  43. int is_partial;
  44. int return_maps;
  45. int return_trailer;
  46. int dedupe_keys;
  47. int copy_strings;
  48. ERL_NIF_TERM null_term;
  49. char* p;
  50. unsigned char* u;
  51. int i;
  52. int len;
  53. char* st_data;
  54. int st_size;
  55. int st_top;
  56. } Decoder;
  57. Decoder*
  58. dec_new(ErlNifEnv* env)
  59. {
  60. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  61. Decoder* d = enif_alloc_resource(st->res_dec, sizeof(Decoder));
  62. int i;
  63. if(d == NULL) {
  64. return NULL;
  65. }
  66. d->atoms = st;
  67. d->bytes_per_red = DEFAULT_BYTES_PER_REDUCTION;
  68. d->is_partial = 0;
  69. d->return_maps = 0;
  70. d->return_trailer = 0;
  71. d->dedupe_keys = 0;
  72. d->copy_strings = 0;
  73. d->null_term = d->atoms->atom_null;
  74. d->p = NULL;
  75. d->u = NULL;
  76. d->len = -1;
  77. d->i = -1;
  78. d->st_data = (char*) enif_alloc(STACK_SIZE_INC);
  79. d->st_size = STACK_SIZE_INC;
  80. d->st_top = 0;
  81. for(i = 0; i < d->st_size; i++) {
  82. d->st_data[i] = st_invalid;
  83. }
  84. d->st_data[0] = st_value;
  85. d->st_top++;
  86. return d;
  87. }
  88. void
  89. dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin)
  90. {
  91. d->env = env;
  92. d->arg = arg;
  93. d->p = (char*) bin->data;
  94. d->u = bin->data;
  95. d->len = bin->size;
  96. // I'd like to be more forceful on this check so that when
  97. // we run a second iteration of the decoder we are sure
  98. // that we're using the same binary. Unfortunately, I don't
  99. // think there's a value to base this assertion on.
  100. if(d->i < 0) {
  101. d->i = 0;
  102. } else {
  103. assert(d->i <= d->len && "mismatched binary lengths");
  104. }
  105. }
  106. void
  107. dec_destroy(ErlNifEnv* env, void* obj)
  108. {
  109. Decoder* d = (Decoder*) obj;
  110. if(d->st_data != NULL) {
  111. enif_free(d->st_data);
  112. }
  113. }
  114. ERL_NIF_TERM
  115. dec_error(Decoder* d, const char* atom)
  116. {
  117. ERL_NIF_TERM pos = enif_make_int(d->env, d->i+1);
  118. ERL_NIF_TERM msg = make_atom(d->env, atom);
  119. ERL_NIF_TERM ret = enif_make_tuple2(d->env, pos, msg);
  120. return enif_make_tuple2(d->env, d->atoms->atom_error, ret);
  121. }
  122. char
  123. dec_curr(Decoder* d)
  124. {
  125. assert(d->st_top > 0);
  126. return d->st_data[d->st_top - 1];
  127. }
  128. int
  129. dec_top(Decoder* d)
  130. {
  131. return d->st_top;
  132. }
  133. void
  134. dec_push(Decoder* d, char val)
  135. {
  136. int new_sz;
  137. int i;
  138. if(d->st_top == d->st_size) {
  139. new_sz = d->st_size + STACK_SIZE_INC;
  140. d->st_data = (char*)enif_realloc(d->st_data, new_sz);
  141. d->st_size = new_sz;
  142. for(i = d->st_top; i < d->st_size; i++) {
  143. d->st_data[i] = st_invalid;
  144. }
  145. }
  146. assert(d->st_top < d->st_size);
  147. d->st_data[d->st_top++] = val;
  148. }
  149. char
  150. dec_pop(Decoder* d) {
  151. char current = st_invalid;
  152. if (d->st_top > 0) {
  153. current = d->st_data[d->st_top - 1];
  154. d->st_data[d->st_top - 1] = st_invalid;
  155. d->st_top--;
  156. }
  157. return current;
  158. }
  159. void
  160. dec_pop_assert(Decoder* d, char val)
  161. {
  162. char current = dec_pop(d);
  163. assert(current == val && "popped invalid state.");
  164. (void)current;
  165. }
  166. int
  167. dec_string(Decoder* d, ERL_NIF_TERM* value)
  168. {
  169. int has_escape = 0;
  170. int num_escapes = 0;
  171. int st;
  172. int ulen;
  173. int ui;
  174. int hi;
  175. int lo;
  176. char* chrbuf;
  177. int chrpos;
  178. if(d->p[d->i] != '\"') {
  179. return 0;
  180. }
  181. d->i++;
  182. st = d->i;
  183. while(d->i < d->len) {
  184. if(d->u[d->i] < 0x20) {
  185. return 0;
  186. } else if(d->p[d->i] == '\"') {
  187. d->i++;
  188. goto parse;
  189. } else if(d->p[d->i] == '\\') {
  190. if(d->i+1 >= d->len) {
  191. return 0;
  192. }
  193. has_escape = 1;
  194. num_escapes += 1;
  195. d->i++;
  196. switch(d->p[d->i]) {
  197. case '\"':
  198. case '\\':
  199. case '/':
  200. case 'b':
  201. case 'f':
  202. case 'n':
  203. case 'r':
  204. case 't':
  205. d->i++;
  206. break;
  207. case 'u':
  208. hi = 0;
  209. lo = 0;
  210. d->i++;
  211. if(d->i + 4 >= d->len) {
  212. return 0;
  213. }
  214. hi = int_from_hex(&(d->u[d->i]));
  215. if(hi < 0) {
  216. return 0;
  217. }
  218. d->i += 4;
  219. if(hi >= 0xD800 && hi < 0xDC00) {
  220. if(d->i + 6 >= d->len) {
  221. return 0;
  222. }
  223. if(d->p[d->i++] != '\\') {
  224. return 0;
  225. } else if(d->p[d->i++] != 'u') {
  226. return 0;
  227. }
  228. lo = int_from_hex(&(d->u[d->i]));
  229. if(lo < 0) {
  230. return 0;
  231. }
  232. hi = unicode_from_pair(hi, lo);
  233. if(hi < 0) {
  234. return 0;
  235. }
  236. }
  237. hi = utf8_len(hi);
  238. if(hi < 0) {
  239. return 0;
  240. }
  241. if(lo == 0) {
  242. num_escapes += 5 - hi;
  243. } else {
  244. num_escapes += 11 - hi;
  245. }
  246. break;
  247. default:
  248. return 0;
  249. }
  250. } else if(d->u[d->i] < 0x80) {
  251. d->i++;
  252. } else {
  253. ulen = utf8_validate(&(d->u[d->i]), d->len - d->i);
  254. if(ulen < 0) {
  255. return 0;
  256. }
  257. d->i += ulen;
  258. }
  259. }
  260. // The goto above ensures that we only
  261. // hit this when a string is not terminated
  262. // correctly.
  263. return 0;
  264. parse:
  265. if(!has_escape && !d->copy_strings) {
  266. *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1));
  267. return 1;
  268. } else if(!has_escape) {
  269. ulen = d->i - 1 - st;
  270. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value),
  271. memcpy(chrbuf, &(d->p[st]), ulen);
  272. return 1;
  273. }
  274. hi = 0;
  275. lo = 0;
  276. ulen = (d->i - 1) - st - num_escapes;
  277. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value);
  278. chrpos = 0;
  279. ui = st;
  280. while(ui < d->i - 1) {
  281. if(d->p[ui] != '\\') {
  282. chrbuf[chrpos++] = d->p[ui++];
  283. continue;
  284. }
  285. ui++;
  286. switch(d->p[ui]) {
  287. case '\"':
  288. case '\\':
  289. case '/':
  290. chrbuf[chrpos++] = d->p[ui];
  291. ui++;
  292. break;
  293. case 'b':
  294. chrbuf[chrpos++] = '\b';
  295. ui++;
  296. break;
  297. case 'f':
  298. chrbuf[chrpos++] = '\f';
  299. ui++;
  300. break;
  301. case 'n':
  302. chrbuf[chrpos++] = '\n';
  303. ui++;
  304. break;
  305. case 'r':
  306. chrbuf[chrpos++] = '\r';
  307. ui++;
  308. break;
  309. case 't':
  310. chrbuf[chrpos++] = '\t';
  311. ui++;
  312. break;
  313. case 'u':
  314. ui++;
  315. hi = int_from_hex(&(d->u[ui]));
  316. if(hi < 0) {
  317. return 0;
  318. }
  319. if(hi >= 0xD800 && hi < 0xDC00) {
  320. lo = int_from_hex(&(d->u[ui+6]));
  321. if(lo < 0) {
  322. return 0;
  323. }
  324. hi = unicode_from_pair(hi, lo);
  325. ui += 10;
  326. } else {
  327. ui += 4;
  328. }
  329. hi = unicode_to_utf8(hi, (unsigned char*) chrbuf+chrpos);
  330. if(hi < 0) {
  331. return 0;
  332. }
  333. chrpos += hi;
  334. break;
  335. default:
  336. return 0;
  337. }
  338. }
  339. return 1;
  340. }
  341. int
  342. dec_number(Decoder* d, ERL_NIF_TERM* value)
  343. {
  344. ERL_NIF_TERM num_type = d->atoms->atom_error;
  345. char state = nst_init;
  346. char nbuf[NUM_BUF_LEN];
  347. int st = d->i;
  348. int has_frac = 0;
  349. int has_exp = 0;
  350. double dval;
  351. long lval;
  352. while(d->i < d->len) {
  353. switch(state) {
  354. case nst_init:
  355. switch(d->p[d->i]) {
  356. case '-':
  357. state = nst_sign;
  358. d->i++;
  359. break;
  360. case '0':
  361. state = nst_frac0;
  362. d->i++;
  363. break;
  364. case '1':
  365. case '2':
  366. case '3':
  367. case '4':
  368. case '5':
  369. case '6':
  370. case '7':
  371. case '8':
  372. case '9':
  373. state = nst_mantissa;
  374. d->i++;
  375. break;
  376. default:
  377. return 0;
  378. }
  379. break;
  380. case nst_sign:
  381. switch(d->p[d->i]) {
  382. case '0':
  383. state = nst_frac0;
  384. d->i++;
  385. break;
  386. case '1':
  387. case '2':
  388. case '3':
  389. case '4':
  390. case '5':
  391. case '6':
  392. case '7':
  393. case '8':
  394. case '9':
  395. state = nst_mantissa;
  396. d->i++;
  397. break;
  398. default:
  399. return 0;
  400. }
  401. break;
  402. case nst_mantissa:
  403. switch(d->p[d->i]) {
  404. case '.':
  405. state = nst_frac1;
  406. d->i++;
  407. break;
  408. case 'e':
  409. case 'E':
  410. state = nst_esign;
  411. d->i++;
  412. break;
  413. case '0':
  414. case '1':
  415. case '2':
  416. case '3':
  417. case '4':
  418. case '5':
  419. case '6':
  420. case '7':
  421. case '8':
  422. case '9':
  423. d->i++;
  424. break;
  425. default:
  426. goto parse;
  427. }
  428. break;
  429. case nst_frac0:
  430. switch(d->p[d->i]) {
  431. case '.':
  432. state = nst_frac1;
  433. d->i++;
  434. break;
  435. case 'e':
  436. case 'E':
  437. state = nst_esign;
  438. d->i++;
  439. break;
  440. default:
  441. goto parse;
  442. }
  443. break;
  444. case nst_frac1:
  445. has_frac = 1;
  446. switch(d->p[d->i]) {
  447. case '0':
  448. case '1':
  449. case '2':
  450. case '3':
  451. case '4':
  452. case '5':
  453. case '6':
  454. case '7':
  455. case '8':
  456. case '9':
  457. state = nst_frac;
  458. d->i++;
  459. break;
  460. default:
  461. goto parse;
  462. }
  463. break;
  464. case nst_frac:
  465. switch(d->p[d->i]) {
  466. case 'e':
  467. case 'E':
  468. state = nst_esign;
  469. d->i++;
  470. break;
  471. case '0':
  472. case '1':
  473. case '2':
  474. case '3':
  475. case '4':
  476. case '5':
  477. case '6':
  478. case '7':
  479. case '8':
  480. case '9':
  481. d->i++;
  482. break;
  483. default:
  484. goto parse;
  485. }
  486. break;
  487. case nst_esign:
  488. has_exp = 1;
  489. switch(d->p[d->i]) {
  490. case '-':
  491. case '+':
  492. case '0':
  493. case '1':
  494. case '2':
  495. case '3':
  496. case '4':
  497. case '5':
  498. case '6':
  499. case '7':
  500. case '8':
  501. case '9':
  502. state = nst_edigit;
  503. d->i++;
  504. break;
  505. default:
  506. return 0;
  507. }
  508. break;
  509. case nst_edigit:
  510. switch(d->p[d->i]) {
  511. case '0':
  512. case '1':
  513. case '2':
  514. case '3':
  515. case '4':
  516. case '5':
  517. case '6':
  518. case '7':
  519. case '8':
  520. case '9':
  521. d->i++;
  522. break;
  523. default:
  524. goto parse;
  525. }
  526. break;
  527. default:
  528. return 0;
  529. }
  530. }
  531. parse:
  532. switch(state) {
  533. case nst_init:
  534. case nst_sign:
  535. case nst_frac1:
  536. case nst_esign:
  537. return 0;
  538. default:
  539. break;
  540. }
  541. errno = 0;
  542. if(d->i - st < NUM_BUF_LEN) {
  543. memset(nbuf, 0, NUM_BUF_LEN);
  544. memcpy(nbuf, &(d->p[st]), d->i - st);
  545. if(has_frac || has_exp) {
  546. dval = strtod(nbuf, NULL);
  547. if(errno != ERANGE) {
  548. *value = enif_make_double(d->env, dval);
  549. return 1;
  550. }
  551. } else {
  552. lval = strtol(nbuf, NULL, 10);
  553. if(errno != ERANGE) {
  554. *value = enif_make_int64(d->env, lval);
  555. return 1;
  556. }
  557. }
  558. }
  559. if(!has_frac && !has_exp) {
  560. num_type = d->atoms->atom_bignum;
  561. } else if(!has_frac && has_exp) {
  562. num_type = d->atoms->atom_bignum_e;
  563. } else {
  564. num_type = d->atoms->atom_bigdbl;
  565. }
  566. d->is_partial = 1;
  567. *value = enif_make_sub_binary(d->env, d->arg, st, d->i - st);
  568. *value = enif_make_tuple2(d->env, num_type, *value);
  569. return 1;
  570. }
  571. ERL_NIF_TERM
  572. make_empty_object(ErlNifEnv* env, int ret_map)
  573. {
  574. #if MAP_TYPE_PRESENT
  575. if(ret_map) {
  576. return enif_make_new_map(env);
  577. }
  578. #endif
  579. return enif_make_tuple1(env, enif_make_list(env, 0));
  580. }
  581. ERL_NIF_TERM
  582. make_array(ErlNifEnv* env, ERL_NIF_TERM list)
  583. {
  584. ERL_NIF_TERM ret = enif_make_list(env, 0);
  585. ERL_NIF_TERM item;
  586. while(enif_get_list_cell(env, list, &item, &list)) {
  587. ret = enif_make_list_cell(env, item, ret);
  588. }
  589. return ret;
  590. }
  591. ERL_NIF_TERM
  592. decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  593. {
  594. Decoder* d;
  595. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  596. ERL_NIF_TERM tmp_argv[5];
  597. ERL_NIF_TERM opts;
  598. ERL_NIF_TERM val;
  599. if(argc != 2) {
  600. return enif_make_badarg(env);
  601. }
  602. d = dec_new(env);
  603. if(d == NULL) {
  604. return make_error(st, env, "internal_error");
  605. }
  606. tmp_argv[0] = argv[0];
  607. tmp_argv[1] = enif_make_resource(env, d);
  608. tmp_argv[2] = st->atom_error;
  609. tmp_argv[3] = enif_make_list(env, 0);
  610. tmp_argv[4] = enif_make_list(env, 0);
  611. enif_release_resource(d);
  612. opts = argv[1];
  613. if(!enif_is_list(env, opts)) {
  614. return enif_make_badarg(env);
  615. }
  616. while(enif_get_list_cell(env, opts, &val, &opts)) {
  617. if(get_bytes_per_iter(env, val, &(d->bytes_per_red))) {
  618. continue;
  619. } else if(get_bytes_per_red(env, val, &(d->bytes_per_red))) {
  620. continue;
  621. } else if(enif_is_identical(val, d->atoms->atom_return_maps)) {
  622. #if MAP_TYPE_PRESENT
  623. d->return_maps = 1;
  624. #else
  625. return enif_make_badarg(env);
  626. #endif
  627. } else if(enif_is_identical(val, d->atoms->atom_return_trailer)) {
  628. d->return_trailer = 1;
  629. } else if(enif_is_identical(val, d->atoms->atom_dedupe_keys)) {
  630. d->dedupe_keys = 1;
  631. } else if(enif_is_identical(val, d->atoms->atom_copy_strings)) {
  632. d->copy_strings = 1;
  633. } else if(enif_is_identical(val, d->atoms->atom_use_nil)) {
  634. d->null_term = d->atoms->atom_nil;
  635. } else if(get_null_term(env, val, &(d->null_term))) {
  636. continue;
  637. } else {
  638. return enif_make_badarg(env);
  639. }
  640. }
  641. return decode_iter(env, 5, tmp_argv);
  642. }
  643. ERL_NIF_TERM
  644. decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  645. {
  646. Decoder* d;
  647. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  648. ErlNifBinary bin;
  649. ERL_NIF_TERM objs;
  650. ERL_NIF_TERM curr;
  651. ERL_NIF_TERM val = argv[2];
  652. ERL_NIF_TERM trailer;
  653. ERL_NIF_TERM ret;
  654. size_t start;
  655. size_t bytes_processed = 0;
  656. if(argc != 5) {
  657. return enif_make_badarg(env);
  658. } else if(!enif_inspect_binary(env, argv[0], &bin)) {
  659. return enif_make_badarg(env);
  660. } else if(!enif_get_resource(env, argv[1], st->res_dec, (void**) &d)) {
  661. return enif_make_badarg(env);
  662. } else if(!enif_is_list(env, argv[3])) {
  663. return enif_make_badarg(env);
  664. } else if(!enif_is_list(env, argv[4])) {
  665. return enif_make_badarg(env);
  666. }
  667. dec_init(d, env, argv[0], &bin);
  668. objs = argv[3];
  669. curr = argv[4];
  670. start = d->i;
  671. while(d->i < bin.size) {
  672. bytes_processed = d->i - start;
  673. if(should_yield(env, bytes_processed, d->bytes_per_red)) {
  674. ERL_NIF_TERM tmp_argv[5];
  675. tmp_argv[0] = argv[0];
  676. tmp_argv[1] = argv[1];
  677. tmp_argv[2] = val;
  678. tmp_argv[3] = objs;
  679. tmp_argv[4] = curr;
  680. bump_used_reds(env, bytes_processed, d->bytes_per_red);
  681. return enif_schedule_nif(env,
  682. "nif_decode_iter",
  683. 0,
  684. decode_iter,
  685. 5,
  686. tmp_argv);
  687. }
  688. switch(dec_curr(d)) {
  689. case st_value:
  690. switch(d->p[d->i]) {
  691. case ' ':
  692. case '\n':
  693. case '\r':
  694. case '\t':
  695. d->i++;
  696. break;
  697. case 'n':
  698. if(d->i + 3 >= d->len) {
  699. ret = dec_error(d, "invalid_literal");
  700. goto done;
  701. }
  702. if(memcmp(&(d->p[d->i]), "null", 4) != 0) {
  703. ret = dec_error(d, "invalid_literal");
  704. goto done;
  705. }
  706. val = d->null_term;
  707. dec_pop_assert(d, st_value);
  708. d->i += 4;
  709. break;
  710. case 't':
  711. if(d->i + 3 >= d->len) {
  712. ret = dec_error(d, "invalid_literal");
  713. goto done;
  714. }
  715. if(memcmp(&(d->p[d->i]), "true", 4) != 0) {
  716. ret = dec_error(d, "invalid_literal");
  717. goto done;
  718. }
  719. val = d->atoms->atom_true;
  720. dec_pop_assert(d, st_value);
  721. d->i += 4;
  722. break;
  723. case 'f':
  724. if(d->i + 4 >= bin.size) {
  725. ret = dec_error(d, "invalid_literal");
  726. goto done;
  727. }
  728. if(memcmp(&(d->p[d->i]), "false", 5) != 0) {
  729. ret = dec_error(d, "invalid_literal");
  730. goto done;
  731. }
  732. val = d->atoms->atom_false;
  733. dec_pop_assert(d, st_value);
  734. d->i += 5;
  735. break;
  736. case '\"':
  737. if(!dec_string(d, &val)) {
  738. ret = dec_error(d, "invalid_string");
  739. goto done;
  740. }
  741. dec_pop_assert(d, st_value);
  742. break;
  743. case '-':
  744. case '0':
  745. case '1':
  746. case '2':
  747. case '3':
  748. case '4':
  749. case '5':
  750. case '6':
  751. case '7':
  752. case '8':
  753. case '9':
  754. if(!dec_number(d, &val)) {
  755. ret = dec_error(d, "invalid_number");
  756. goto done;
  757. }
  758. dec_pop_assert(d, st_value);
  759. break;
  760. case '{':
  761. dec_push(d, st_object);
  762. dec_push(d, st_key);
  763. objs = enif_make_list_cell(env, curr, objs);
  764. curr = enif_make_list(env, 0);
  765. d->i++;
  766. break;
  767. case '[':
  768. dec_push(d, st_array);
  769. dec_push(d, st_value);
  770. objs = enif_make_list_cell(env, curr, objs);
  771. curr = enif_make_list(env, 0);
  772. d->i++;
  773. break;
  774. case ']':
  775. if(!enif_is_empty_list(env, curr)) {
  776. ret = dec_error(d, "invalid_json");
  777. goto done;
  778. }
  779. dec_pop_assert(d, st_value);
  780. if(dec_pop(d) != st_array) {
  781. ret = dec_error(d, "invalid_json");
  782. goto done;
  783. }
  784. dec_pop_assert(d, st_value);
  785. val = curr; // curr is []
  786. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  787. ret = dec_error(d, "internal_error");
  788. goto done;
  789. }
  790. d->i++;
  791. break;
  792. default:
  793. ret = dec_error(d, "invalid_json");
  794. goto done;
  795. }
  796. if(dec_top(d) == 0) {
  797. dec_push(d, st_done);
  798. } else if(dec_curr(d) != st_value && dec_curr(d) != st_key) {
  799. dec_push(d, st_comma);
  800. curr = enif_make_list_cell(env, val, curr);
  801. }
  802. break;
  803. case st_key:
  804. switch(d->p[d->i]) {
  805. case ' ':
  806. case '\n':
  807. case '\r':
  808. case '\t':
  809. d->i++;
  810. break;
  811. case '\"':
  812. if(!dec_string(d, &val)) {
  813. ret = dec_error(d, "invalid_string");
  814. goto done;
  815. }
  816. dec_pop_assert(d, st_key);
  817. dec_push(d, st_colon);
  818. curr = enif_make_list_cell(env, val, curr);
  819. break;
  820. case '}':
  821. if(!enif_is_empty_list(env, curr)) {
  822. ret = dec_error(d, "invalid_json");
  823. goto done;
  824. }
  825. dec_pop_assert(d, st_key);
  826. dec_pop_assert(d, st_object);
  827. dec_pop_assert(d, st_value);
  828. val = make_empty_object(env, d->return_maps);
  829. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  830. ret = dec_error(d, "internal_error");
  831. goto done;
  832. }
  833. if(dec_top(d) == 0) {
  834. dec_push(d, st_done);
  835. } else {
  836. dec_push(d, st_comma);
  837. curr = enif_make_list_cell(env, val, curr);
  838. }
  839. d->i++;
  840. break;
  841. default:
  842. ret = dec_error(d, "invalid_json");
  843. goto done;
  844. }
  845. break;
  846. case st_colon:
  847. switch(d->p[d->i]) {
  848. case ' ':
  849. case '\n':
  850. case '\r':
  851. case '\t':
  852. d->i++;
  853. break;
  854. case ':':
  855. dec_pop_assert(d, st_colon);
  856. dec_push(d, st_value);
  857. d->i++;
  858. break;
  859. default:
  860. ret = dec_error(d, "invalid_json");
  861. goto done;
  862. }
  863. break;
  864. case st_comma:
  865. switch(d->p[d->i]) {
  866. case ' ':
  867. case '\n':
  868. case '\r':
  869. case '\t':
  870. d->i++;
  871. break;
  872. case ',':
  873. dec_pop_assert(d, st_comma);
  874. switch(dec_curr(d)) {
  875. case st_object:
  876. dec_push(d, st_key);
  877. break;
  878. case st_array:
  879. dec_push(d, st_value);
  880. break;
  881. default:
  882. ret = dec_error(d, "internal_error");
  883. goto done;
  884. }
  885. d->i++;
  886. break;
  887. case '}':
  888. dec_pop_assert(d, st_comma);
  889. if(dec_pop(d) != st_object) {
  890. ret = dec_error(d, "invalid_json");
  891. goto done;
  892. }
  893. dec_pop_assert(d, st_value);
  894. if(!make_object(env, curr, &val,
  895. d->return_maps, d->dedupe_keys)) {
  896. ret = dec_error(d, "internal_object_error");
  897. goto done;
  898. }
  899. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  900. ret = dec_error(d, "internal_error");
  901. goto done;
  902. }
  903. if(dec_top(d) > 0) {
  904. dec_push(d, st_comma);
  905. curr = enif_make_list_cell(env, val, curr);
  906. } else {
  907. dec_push(d, st_done);
  908. }
  909. d->i++;
  910. break;
  911. case ']':
  912. dec_pop_assert(d, st_comma);
  913. if(dec_pop(d) != st_array) {
  914. ret = dec_error(d, "invalid_json");
  915. goto done;
  916. }
  917. dec_pop_assert(d, st_value);
  918. val = make_array(env, curr);
  919. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  920. ret = dec_error(d, "internal_error");
  921. goto done;
  922. }
  923. if(dec_top(d) > 0) {
  924. dec_push(d, st_comma);
  925. curr = enif_make_list_cell(env, val, curr);
  926. } else {
  927. dec_push(d, st_done);
  928. }
  929. d->i++;
  930. break;
  931. default:
  932. ret = dec_error(d, "invalid_json");
  933. goto done;
  934. }
  935. break;
  936. case st_done:
  937. switch(d->p[d->i]) {
  938. case ' ':
  939. case '\n':
  940. case '\r':
  941. case '\t':
  942. d->i++;
  943. break;
  944. default:
  945. goto decode_done;
  946. }
  947. break;
  948. default:
  949. ret = dec_error(d, "invalid_internal_state");
  950. goto done;
  951. }
  952. }
  953. decode_done:
  954. if(d->i < bin.size && d->return_trailer) {
  955. trailer = enif_make_sub_binary(env, argv[0], d->i, bin.size - d->i);
  956. val = enif_make_tuple3(env, d->atoms->atom_has_trailer, val, trailer);
  957. } else if(d->i < bin.size) {
  958. ret = dec_error(d, "invalid_trailing_data");
  959. goto done;
  960. }
  961. if(dec_pop(d) != st_done) {
  962. ret = dec_error(d, "truncated_json");
  963. } else if(d->is_partial) {
  964. ret = enif_make_tuple2(env, d->atoms->atom_partial, val);
  965. } else {
  966. ret = val;
  967. }
  968. done:
  969. bump_used_reds(env, bytes_processed, d->bytes_per_red);
  970. return ret;
  971. }