Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

1065 rindas
30 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <errno.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include "erl_nif.h"
  9. #include "jiffy.h"
  10. #define U(c) ((unsigned char) (c))
  11. #define ERROR(i, msg) make_error(st, env, msg)
  12. #define STACK_SIZE_INC 64
  13. #define NUM_BUF_LEN 32
  14. #if WINDOWS || WIN32
  15. #define snprintf _snprintf
  16. #endif
  17. enum {
  18. st_value=0,
  19. st_object,
  20. st_array,
  21. st_key,
  22. st_colon,
  23. st_comma,
  24. st_done,
  25. st_invalid
  26. } JsonState;
  27. enum {
  28. nst_init=0,
  29. nst_sign,
  30. nst_mantissa,
  31. nst_frac0,
  32. nst_frac1,
  33. nst_frac,
  34. nst_esign,
  35. nst_edigit
  36. } JsonNumState;
  37. typedef struct {
  38. ErlNifEnv* env;
  39. jiffy_st* atoms;
  40. ERL_NIF_TERM arg;
  41. ErlNifBinary bin;
  42. size_t bytes_per_red;
  43. int is_partial;
  44. int return_maps;
  45. int return_trailer;
  46. int dedupe_keys;
  47. int copy_strings;
  48. ERL_NIF_TERM null_term;
  49. unsigned char* p;
  50. int i;
  51. int len;
  52. char* st_data;
  53. int st_size;
  54. int st_top;
  55. } Decoder;
  56. Decoder*
  57. dec_new(ErlNifEnv* env)
  58. {
  59. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  60. Decoder* d = enif_alloc_resource(st->res_dec, sizeof(Decoder));
  61. int i;
  62. if(d == NULL) {
  63. return NULL;
  64. }
  65. d->atoms = st;
  66. d->bytes_per_red = DEFAULT_BYTES_PER_REDUCTION;
  67. d->is_partial = 0;
  68. d->return_maps = 0;
  69. d->return_trailer = 0;
  70. d->dedupe_keys = 0;
  71. d->copy_strings = 0;
  72. d->null_term = d->atoms->atom_null;
  73. d->p = NULL;
  74. d->len = -1;
  75. d->i = -1;
  76. d->st_data = (char*) enif_alloc(STACK_SIZE_INC);
  77. d->st_size = STACK_SIZE_INC;
  78. d->st_top = 0;
  79. for(i = 0; i < d->st_size; i++) {
  80. d->st_data[i] = st_invalid;
  81. }
  82. d->st_data[0] = st_value;
  83. d->st_top++;
  84. return d;
  85. }
  86. void
  87. dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin)
  88. {
  89. d->env = env;
  90. d->arg = arg;
  91. d->p = bin->data;
  92. d->len = bin->size;
  93. // I'd like to be more forceful on this check so that when
  94. // we run a second iteration of the decoder we are sure
  95. // that we're using the same binary. Unfortunately, I don't
  96. // think there's a value to base this assertion on.
  97. if(d->i < 0) {
  98. d->i = 0;
  99. } else {
  100. assert(d->i <= d->len && "mismatched binary lengths");
  101. }
  102. }
  103. void
  104. dec_destroy(ErlNifEnv* env, void* obj)
  105. {
  106. Decoder* d = (Decoder*) obj;
  107. if(d->st_data != NULL) {
  108. enif_free(d->st_data);
  109. }
  110. }
  111. ERL_NIF_TERM
  112. dec_error(Decoder* d, const char* atom)
  113. {
  114. ERL_NIF_TERM pos = enif_make_int(d->env, d->i+1);
  115. ERL_NIF_TERM msg = make_atom(d->env, atom);
  116. ERL_NIF_TERM ret = enif_make_tuple2(d->env, pos, msg);
  117. return enif_make_tuple2(d->env, d->atoms->atom_error, ret);
  118. }
  119. char
  120. dec_curr(Decoder* d)
  121. {
  122. assert(d->st_top > 0);
  123. return d->st_data[d->st_top - 1];
  124. }
  125. int
  126. dec_top(Decoder* d)
  127. {
  128. return d->st_top;
  129. }
  130. void
  131. dec_push(Decoder* d, char val)
  132. {
  133. int new_sz;
  134. int i;
  135. if(d->st_top == d->st_size) {
  136. new_sz = d->st_size + STACK_SIZE_INC;
  137. d->st_data = (char*)enif_realloc(d->st_data, new_sz);
  138. d->st_size = new_sz;
  139. for(i = d->st_top; i < d->st_size; i++) {
  140. d->st_data[i] = st_invalid;
  141. }
  142. }
  143. assert(d->st_top < d->st_size);
  144. d->st_data[d->st_top++] = val;
  145. }
  146. char
  147. dec_pop(Decoder* d) {
  148. char current = st_invalid;
  149. if (d->st_top > 0) {
  150. current = d->st_data[d->st_top - 1];
  151. d->st_data[d->st_top - 1] = st_invalid;
  152. d->st_top--;
  153. }
  154. return current;
  155. }
  156. void
  157. dec_pop_assert(Decoder* d, char val)
  158. {
  159. char current = dec_pop(d);
  160. assert(current == val && "popped invalid state.");
  161. (void)current;
  162. }
  163. int
  164. dec_string(Decoder* d, ERL_NIF_TERM* value)
  165. {
  166. int has_escape = 0;
  167. int num_escapes = 0;
  168. int st;
  169. int ulen;
  170. int ui;
  171. int hi;
  172. int lo;
  173. char* chrbuf;
  174. int chrpos;
  175. if(d->p[d->i] != '\"') {
  176. return 0;
  177. }
  178. d->i++;
  179. st = d->i;
  180. while(d->i < d->len) {
  181. if(d->p[d->i] < 0x20) {
  182. return 0;
  183. } else if(d->p[d->i] == '\"') {
  184. d->i++;
  185. goto parse;
  186. } else if(d->p[d->i] == '\\') {
  187. if(d->i+1 >= d->len) {
  188. return 0;
  189. }
  190. has_escape = 1;
  191. num_escapes += 1;
  192. d->i++;
  193. switch(d->p[d->i]) {
  194. case '\"':
  195. case '\\':
  196. case '/':
  197. case 'b':
  198. case 'f':
  199. case 'n':
  200. case 'r':
  201. case 't':
  202. d->i++;
  203. break;
  204. case 'u':
  205. hi = 0;
  206. lo = 0;
  207. d->i++;
  208. if(d->i + 4 >= d->len) {
  209. return 0;
  210. }
  211. hi = int_from_hex(&(d->p[d->i]));
  212. if(hi < 0) {
  213. return 0;
  214. }
  215. d->i += 4;
  216. if(hi >= 0xD800 && hi < 0xDC00) {
  217. if(d->i + 6 >= d->len) {
  218. return 0;
  219. }
  220. if(d->p[d->i++] != '\\') {
  221. return 0;
  222. } else if(d->p[d->i++] != 'u') {
  223. return 0;
  224. }
  225. lo = int_from_hex(&(d->p[d->i]));
  226. if(lo < 0) {
  227. return 0;
  228. }
  229. hi = unicode_from_pair(hi, lo);
  230. if(hi < 0) {
  231. return 0;
  232. }
  233. }
  234. hi = utf8_len(hi);
  235. if(hi < 0) {
  236. return 0;
  237. }
  238. if(lo == 0) {
  239. num_escapes += 5 - hi;
  240. } else {
  241. num_escapes += 11 - hi;
  242. }
  243. break;
  244. default:
  245. return 0;
  246. }
  247. } else if(d->p[d->i] < 0x80) {
  248. d->i++;
  249. } else {
  250. ulen = utf8_validate(&(d->p[d->i]), d->len - d->i);
  251. if(ulen < 0) {
  252. return 0;
  253. }
  254. d->i += ulen;
  255. }
  256. }
  257. // The goto above ensures that we only
  258. // hit this when a string is not terminated
  259. // correctly.
  260. return 0;
  261. parse:
  262. if(!has_escape && !d->copy_strings) {
  263. *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1));
  264. return 1;
  265. } else if(!has_escape) {
  266. ulen = d->i - 1 - st;
  267. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value),
  268. memcpy(chrbuf, &(d->p[st]), ulen);
  269. return 1;
  270. }
  271. hi = 0;
  272. lo = 0;
  273. ulen = (d->i - 1) - st - num_escapes;
  274. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value);
  275. chrpos = 0;
  276. ui = st;
  277. while(ui < d->i - 1) {
  278. if(d->p[ui] != '\\') {
  279. chrbuf[chrpos++] = d->p[ui++];
  280. continue;
  281. }
  282. ui++;
  283. switch(d->p[ui]) {
  284. case '\"':
  285. case '\\':
  286. case '/':
  287. chrbuf[chrpos++] = d->p[ui];
  288. ui++;
  289. break;
  290. case 'b':
  291. chrbuf[chrpos++] = '\b';
  292. ui++;
  293. break;
  294. case 'f':
  295. chrbuf[chrpos++] = '\f';
  296. ui++;
  297. break;
  298. case 'n':
  299. chrbuf[chrpos++] = '\n';
  300. ui++;
  301. break;
  302. case 'r':
  303. chrbuf[chrpos++] = '\r';
  304. ui++;
  305. break;
  306. case 't':
  307. chrbuf[chrpos++] = '\t';
  308. ui++;
  309. break;
  310. case 'u':
  311. ui++;
  312. hi = int_from_hex(&(d->p[ui]));
  313. if(hi < 0) {
  314. return 0;
  315. }
  316. if(hi >= 0xD800 && hi < 0xDC00) {
  317. lo = int_from_hex(&(d->p[ui+6]));
  318. if(lo < 0) {
  319. return 0;
  320. }
  321. hi = unicode_from_pair(hi, lo);
  322. ui += 10;
  323. } else {
  324. ui += 4;
  325. }
  326. hi = unicode_to_utf8(hi, (unsigned char*) chrbuf+chrpos);
  327. if(hi < 0) {
  328. return 0;
  329. }
  330. chrpos += hi;
  331. break;
  332. default:
  333. return 0;
  334. }
  335. }
  336. return 1;
  337. }
  338. int
  339. dec_number(Decoder* d, ERL_NIF_TERM* value)
  340. {
  341. ERL_NIF_TERM num_type = d->atoms->atom_error;
  342. char state = nst_init;
  343. char nbuf[NUM_BUF_LEN];
  344. int st = d->i;
  345. int has_frac = 0;
  346. int has_exp = 0;
  347. double dval;
  348. long lval;
  349. while(d->i < d->len) {
  350. switch(state) {
  351. case nst_init:
  352. switch(d->p[d->i]) {
  353. case '-':
  354. state = nst_sign;
  355. d->i++;
  356. break;
  357. case '0':
  358. state = nst_frac0;
  359. d->i++;
  360. break;
  361. case '1':
  362. case '2':
  363. case '3':
  364. case '4':
  365. case '5':
  366. case '6':
  367. case '7':
  368. case '8':
  369. case '9':
  370. state = nst_mantissa;
  371. d->i++;
  372. break;
  373. default:
  374. return 0;
  375. }
  376. break;
  377. case nst_sign:
  378. switch(d->p[d->i]) {
  379. case '0':
  380. state = nst_frac0;
  381. d->i++;
  382. break;
  383. case '1':
  384. case '2':
  385. case '3':
  386. case '4':
  387. case '5':
  388. case '6':
  389. case '7':
  390. case '8':
  391. case '9':
  392. state = nst_mantissa;
  393. d->i++;
  394. break;
  395. default:
  396. return 0;
  397. }
  398. break;
  399. case nst_mantissa:
  400. switch(d->p[d->i]) {
  401. case '.':
  402. state = nst_frac1;
  403. d->i++;
  404. break;
  405. case 'e':
  406. case 'E':
  407. state = nst_esign;
  408. d->i++;
  409. break;
  410. case '0':
  411. case '1':
  412. case '2':
  413. case '3':
  414. case '4':
  415. case '5':
  416. case '6':
  417. case '7':
  418. case '8':
  419. case '9':
  420. d->i++;
  421. break;
  422. default:
  423. goto parse;
  424. }
  425. break;
  426. case nst_frac0:
  427. switch(d->p[d->i]) {
  428. case '.':
  429. state = nst_frac1;
  430. d->i++;
  431. break;
  432. case 'e':
  433. case 'E':
  434. state = nst_esign;
  435. d->i++;
  436. break;
  437. default:
  438. goto parse;
  439. }
  440. break;
  441. case nst_frac1:
  442. has_frac = 1;
  443. switch(d->p[d->i]) {
  444. case '0':
  445. case '1':
  446. case '2':
  447. case '3':
  448. case '4':
  449. case '5':
  450. case '6':
  451. case '7':
  452. case '8':
  453. case '9':
  454. state = nst_frac;
  455. d->i++;
  456. break;
  457. default:
  458. goto parse;
  459. }
  460. break;
  461. case nst_frac:
  462. switch(d->p[d->i]) {
  463. case 'e':
  464. case 'E':
  465. state = nst_esign;
  466. d->i++;
  467. break;
  468. case '0':
  469. case '1':
  470. case '2':
  471. case '3':
  472. case '4':
  473. case '5':
  474. case '6':
  475. case '7':
  476. case '8':
  477. case '9':
  478. d->i++;
  479. break;
  480. default:
  481. goto parse;
  482. }
  483. break;
  484. case nst_esign:
  485. has_exp = 1;
  486. switch(d->p[d->i]) {
  487. case '-':
  488. case '+':
  489. case '0':
  490. case '1':
  491. case '2':
  492. case '3':
  493. case '4':
  494. case '5':
  495. case '6':
  496. case '7':
  497. case '8':
  498. case '9':
  499. state = nst_edigit;
  500. d->i++;
  501. break;
  502. default:
  503. return 0;
  504. }
  505. break;
  506. case nst_edigit:
  507. switch(d->p[d->i]) {
  508. case '0':
  509. case '1':
  510. case '2':
  511. case '3':
  512. case '4':
  513. case '5':
  514. case '6':
  515. case '7':
  516. case '8':
  517. case '9':
  518. d->i++;
  519. break;
  520. default:
  521. goto parse;
  522. }
  523. break;
  524. default:
  525. return 0;
  526. }
  527. }
  528. parse:
  529. switch(state) {
  530. case nst_init:
  531. case nst_sign:
  532. case nst_frac1:
  533. case nst_esign:
  534. return 0;
  535. default:
  536. break;
  537. }
  538. errno = 0;
  539. if(d->i - st < NUM_BUF_LEN) {
  540. memset(nbuf, 0, NUM_BUF_LEN);
  541. memcpy(nbuf, &(d->p[st]), d->i - st);
  542. if(has_frac || has_exp) {
  543. dval = strtod(nbuf, NULL);
  544. if(errno != ERANGE) {
  545. *value = enif_make_double(d->env, dval);
  546. return 1;
  547. }
  548. } else {
  549. lval = strtol(nbuf, NULL, 10);
  550. if(errno != ERANGE) {
  551. *value = enif_make_int64(d->env, lval);
  552. return 1;
  553. }
  554. }
  555. }
  556. if(!has_frac && !has_exp) {
  557. num_type = d->atoms->atom_bignum;
  558. } else if(!has_frac && has_exp) {
  559. num_type = d->atoms->atom_bignum_e;
  560. } else {
  561. num_type = d->atoms->atom_bigdbl;
  562. }
  563. d->is_partial = 1;
  564. *value = enif_make_sub_binary(d->env, d->arg, st, d->i - st);
  565. *value = enif_make_tuple2(d->env, num_type, *value);
  566. return 1;
  567. }
  568. ERL_NIF_TERM
  569. make_empty_object(ErlNifEnv* env, int ret_map)
  570. {
  571. #if MAP_TYPE_PRESENT
  572. if(ret_map) {
  573. return enif_make_new_map(env);
  574. }
  575. #endif
  576. return enif_make_tuple1(env, enif_make_list(env, 0));
  577. }
  578. ERL_NIF_TERM
  579. make_array(ErlNifEnv* env, ERL_NIF_TERM list)
  580. {
  581. ERL_NIF_TERM ret = enif_make_list(env, 0);
  582. ERL_NIF_TERM item;
  583. while(enif_get_list_cell(env, list, &item, &list)) {
  584. ret = enif_make_list_cell(env, item, ret);
  585. }
  586. return ret;
  587. }
  588. ERL_NIF_TERM
  589. decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  590. {
  591. Decoder* d;
  592. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  593. ERL_NIF_TERM tmp_argv[5];
  594. ERL_NIF_TERM opts;
  595. ERL_NIF_TERM val;
  596. if(argc != 2) {
  597. return enif_make_badarg(env);
  598. }
  599. d = dec_new(env);
  600. if(d == NULL) {
  601. return make_error(st, env, "internal_error");
  602. }
  603. tmp_argv[0] = argv[0];
  604. tmp_argv[1] = enif_make_resource(env, d);
  605. tmp_argv[2] = st->atom_error;
  606. tmp_argv[3] = enif_make_list(env, 0);
  607. tmp_argv[4] = enif_make_list(env, 0);
  608. enif_release_resource(d);
  609. opts = argv[1];
  610. if(!enif_is_list(env, opts)) {
  611. return enif_make_badarg(env);
  612. }
  613. while(enif_get_list_cell(env, opts, &val, &opts)) {
  614. if(get_bytes_per_iter(env, val, &(d->bytes_per_red))) {
  615. continue;
  616. } else if(get_bytes_per_red(env, val, &(d->bytes_per_red))) {
  617. continue;
  618. } else if(enif_is_identical(val, d->atoms->atom_return_maps)) {
  619. #if MAP_TYPE_PRESENT
  620. d->return_maps = 1;
  621. #else
  622. return enif_make_badarg(env);
  623. #endif
  624. } else if(enif_is_identical(val, d->atoms->atom_return_trailer)) {
  625. d->return_trailer = 1;
  626. } else if(enif_is_identical(val, d->atoms->atom_dedupe_keys)) {
  627. d->dedupe_keys = 1;
  628. } else if(enif_is_identical(val, d->atoms->atom_copy_strings)) {
  629. d->copy_strings = 1;
  630. } else if(enif_is_identical(val, d->atoms->atom_use_nil)) {
  631. d->null_term = d->atoms->atom_nil;
  632. } else if(get_null_term(env, val, &(d->null_term))) {
  633. continue;
  634. } else {
  635. return enif_make_badarg(env);
  636. }
  637. }
  638. return decode_iter(env, 5, tmp_argv);
  639. }
  640. ERL_NIF_TERM
  641. decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  642. {
  643. Decoder* d;
  644. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  645. ErlNifBinary bin;
  646. ERL_NIF_TERM objs;
  647. ERL_NIF_TERM curr;
  648. ERL_NIF_TERM val = argv[2];
  649. ERL_NIF_TERM trailer;
  650. ERL_NIF_TERM ret;
  651. size_t start;
  652. size_t bytes_processed = 0;
  653. if(argc != 5) {
  654. return enif_make_badarg(env);
  655. } else if(!enif_inspect_binary(env, argv[0], &bin)) {
  656. return enif_make_badarg(env);
  657. } else if(!enif_get_resource(env, argv[1], st->res_dec, (void**) &d)) {
  658. return enif_make_badarg(env);
  659. } else if(!enif_is_list(env, argv[3])) {
  660. return enif_make_badarg(env);
  661. } else if(!enif_is_list(env, argv[4])) {
  662. return enif_make_badarg(env);
  663. }
  664. dec_init(d, env, argv[0], &bin);
  665. objs = argv[3];
  666. curr = argv[4];
  667. start = d->i;
  668. while(d->i < bin.size) {
  669. bytes_processed = d->i - start;
  670. if(should_yield(env, bytes_processed, d->bytes_per_red)) {
  671. ERL_NIF_TERM tmp_argv[5];
  672. tmp_argv[0] = argv[0];
  673. tmp_argv[1] = argv[1];
  674. tmp_argv[2] = val;
  675. tmp_argv[3] = objs;
  676. tmp_argv[4] = curr;
  677. bump_used_reds(env, bytes_processed, d->bytes_per_red);
  678. return enif_schedule_nif(env,
  679. "nif_decode_iter",
  680. 0,
  681. decode_iter,
  682. 5,
  683. tmp_argv);
  684. }
  685. switch(dec_curr(d)) {
  686. case st_value:
  687. switch(d->p[d->i]) {
  688. case ' ':
  689. case '\n':
  690. case '\r':
  691. case '\t':
  692. d->i++;
  693. break;
  694. case 'n':
  695. if(d->i + 3 >= d->len) {
  696. ret = dec_error(d, "invalid_literal");
  697. goto done;
  698. }
  699. if(memcmp(&(d->p[d->i]), "null", 4) != 0) {
  700. ret = dec_error(d, "invalid_literal");
  701. goto done;
  702. }
  703. val = d->null_term;
  704. dec_pop_assert(d, st_value);
  705. d->i += 4;
  706. break;
  707. case 't':
  708. if(d->i + 3 >= d->len) {
  709. ret = dec_error(d, "invalid_literal");
  710. goto done;
  711. }
  712. if(memcmp(&(d->p[d->i]), "true", 4) != 0) {
  713. ret = dec_error(d, "invalid_literal");
  714. goto done;
  715. }
  716. val = d->atoms->atom_true;
  717. dec_pop_assert(d, st_value);
  718. d->i += 4;
  719. break;
  720. case 'f':
  721. if(d->i + 4 >= bin.size) {
  722. ret = dec_error(d, "invalid_literal");
  723. goto done;
  724. }
  725. if(memcmp(&(d->p[d->i]), "false", 5) != 0) {
  726. ret = dec_error(d, "invalid_literal");
  727. goto done;
  728. }
  729. val = d->atoms->atom_false;
  730. dec_pop_assert(d, st_value);
  731. d->i += 5;
  732. break;
  733. case '\"':
  734. if(!dec_string(d, &val)) {
  735. ret = dec_error(d, "invalid_string");
  736. goto done;
  737. }
  738. dec_pop_assert(d, st_value);
  739. break;
  740. case '-':
  741. case '0':
  742. case '1':
  743. case '2':
  744. case '3':
  745. case '4':
  746. case '5':
  747. case '6':
  748. case '7':
  749. case '8':
  750. case '9':
  751. if(!dec_number(d, &val)) {
  752. ret = dec_error(d, "invalid_number");
  753. goto done;
  754. }
  755. dec_pop_assert(d, st_value);
  756. break;
  757. case '{':
  758. dec_push(d, st_object);
  759. dec_push(d, st_key);
  760. objs = enif_make_list_cell(env, curr, objs);
  761. curr = enif_make_list(env, 0);
  762. d->i++;
  763. break;
  764. case '[':
  765. dec_push(d, st_array);
  766. dec_push(d, st_value);
  767. objs = enif_make_list_cell(env, curr, objs);
  768. curr = enif_make_list(env, 0);
  769. d->i++;
  770. break;
  771. case ']':
  772. if(!enif_is_empty_list(env, curr)) {
  773. ret = dec_error(d, "invalid_json");
  774. goto done;
  775. }
  776. dec_pop_assert(d, st_value);
  777. if(dec_pop(d) != st_array) {
  778. ret = dec_error(d, "invalid_json");
  779. goto done;
  780. }
  781. dec_pop_assert(d, st_value);
  782. val = curr; // curr is []
  783. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  784. ret = dec_error(d, "internal_error");
  785. goto done;
  786. }
  787. d->i++;
  788. break;
  789. default:
  790. ret = dec_error(d, "invalid_json");
  791. goto done;
  792. }
  793. if(dec_top(d) == 0) {
  794. dec_push(d, st_done);
  795. } else if(dec_curr(d) != st_value && dec_curr(d) != st_key) {
  796. dec_push(d, st_comma);
  797. curr = enif_make_list_cell(env, val, curr);
  798. }
  799. break;
  800. case st_key:
  801. switch(d->p[d->i]) {
  802. case ' ':
  803. case '\n':
  804. case '\r':
  805. case '\t':
  806. d->i++;
  807. break;
  808. case '\"':
  809. if(!dec_string(d, &val)) {
  810. ret = dec_error(d, "invalid_string");
  811. goto done;
  812. }
  813. dec_pop_assert(d, st_key);
  814. dec_push(d, st_colon);
  815. curr = enif_make_list_cell(env, val, curr);
  816. break;
  817. case '}':
  818. if(!enif_is_empty_list(env, curr)) {
  819. ret = dec_error(d, "invalid_json");
  820. goto done;
  821. }
  822. dec_pop_assert(d, st_key);
  823. dec_pop_assert(d, st_object);
  824. dec_pop_assert(d, st_value);
  825. val = make_empty_object(env, d->return_maps);
  826. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  827. ret = dec_error(d, "internal_error");
  828. goto done;
  829. }
  830. if(dec_top(d) == 0) {
  831. dec_push(d, st_done);
  832. } else {
  833. dec_push(d, st_comma);
  834. curr = enif_make_list_cell(env, val, curr);
  835. }
  836. d->i++;
  837. break;
  838. default:
  839. ret = dec_error(d, "invalid_json");
  840. goto done;
  841. }
  842. break;
  843. case st_colon:
  844. switch(d->p[d->i]) {
  845. case ' ':
  846. case '\n':
  847. case '\r':
  848. case '\t':
  849. d->i++;
  850. break;
  851. case ':':
  852. dec_pop_assert(d, st_colon);
  853. dec_push(d, st_value);
  854. d->i++;
  855. break;
  856. default:
  857. ret = dec_error(d, "invalid_json");
  858. goto done;
  859. }
  860. break;
  861. case st_comma:
  862. switch(d->p[d->i]) {
  863. case ' ':
  864. case '\n':
  865. case '\r':
  866. case '\t':
  867. d->i++;
  868. break;
  869. case ',':
  870. dec_pop_assert(d, st_comma);
  871. switch(dec_curr(d)) {
  872. case st_object:
  873. dec_push(d, st_key);
  874. break;
  875. case st_array:
  876. dec_push(d, st_value);
  877. break;
  878. default:
  879. ret = dec_error(d, "internal_error");
  880. goto done;
  881. }
  882. d->i++;
  883. break;
  884. case '}':
  885. dec_pop_assert(d, st_comma);
  886. if(dec_pop(d) != st_object) {
  887. ret = dec_error(d, "invalid_json");
  888. goto done;
  889. }
  890. dec_pop_assert(d, st_value);
  891. if(!make_object(env, curr, &val,
  892. d->return_maps, d->dedupe_keys)) {
  893. ret = dec_error(d, "internal_object_error");
  894. goto done;
  895. }
  896. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  897. ret = dec_error(d, "internal_error");
  898. goto done;
  899. }
  900. if(dec_top(d) > 0) {
  901. dec_push(d, st_comma);
  902. curr = enif_make_list_cell(env, val, curr);
  903. } else {
  904. dec_push(d, st_done);
  905. }
  906. d->i++;
  907. break;
  908. case ']':
  909. dec_pop_assert(d, st_comma);
  910. if(dec_pop(d) != st_array) {
  911. ret = dec_error(d, "invalid_json");
  912. goto done;
  913. }
  914. dec_pop_assert(d, st_value);
  915. val = make_array(env, curr);
  916. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  917. ret = dec_error(d, "internal_error");
  918. goto done;
  919. }
  920. if(dec_top(d) > 0) {
  921. dec_push(d, st_comma);
  922. curr = enif_make_list_cell(env, val, curr);
  923. } else {
  924. dec_push(d, st_done);
  925. }
  926. d->i++;
  927. break;
  928. default:
  929. ret = dec_error(d, "invalid_json");
  930. goto done;
  931. }
  932. break;
  933. case st_done:
  934. switch(d->p[d->i]) {
  935. case ' ':
  936. case '\n':
  937. case '\r':
  938. case '\t':
  939. d->i++;
  940. break;
  941. default:
  942. goto decode_done;
  943. }
  944. break;
  945. default:
  946. ret = dec_error(d, "invalid_internal_state");
  947. goto done;
  948. }
  949. }
  950. decode_done:
  951. if(d->i < bin.size && d->return_trailer) {
  952. trailer = enif_make_sub_binary(env, argv[0], d->i, bin.size - d->i);
  953. val = enif_make_tuple3(env, d->atoms->atom_has_trailer, val, trailer);
  954. } else if(d->i < bin.size) {
  955. ret = dec_error(d, "invalid_trailing_data");
  956. goto done;
  957. }
  958. if(dec_pop(d) != st_done) {
  959. ret = dec_error(d, "truncated_json");
  960. } else if(d->is_partial) {
  961. ret = enif_make_tuple2(env, d->atoms->atom_partial, val);
  962. } else {
  963. ret = val;
  964. }
  965. done:
  966. bump_used_reds(env, bytes_processed, d->bytes_per_red);
  967. return ret;
  968. }