You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1206 rivejä
34 KiB

5 vuotta sitten
5 vuotta sitten
5 vuotta sitten
5 vuotta sitten
5 vuotta sitten
5 vuotta sitten
5 vuotta sitten
5 vuotta sitten
5 vuotta sitten
5 vuotta sitten
  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <errno.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include "erl_nif.h"
  9. #include "jiffy.h"
  10. #define U(c) ((unsigned char) (c))
  11. #define ERROR(i, msg) make_error(st, env, msg)
  12. #define STACK_SIZE_INC 64
  13. #define NUM_BUF_LEN 32
  14. #if WINDOWS || WIN32
  15. #define snprintf _snprintf
  16. #endif
  17. enum {
  18. st_value=0,
  19. st_object,
  20. st_array,
  21. st_key,
  22. st_colon,
  23. st_comma,
  24. st_done,
  25. st_invalid
  26. } JsonState;
  27. enum {
  28. nst_init=0,
  29. nst_sign,
  30. nst_mantissa,
  31. nst_frac0,
  32. nst_frac1,
  33. nst_frac,
  34. nst_esign,
  35. nst_edigit
  36. } JsonNumState;
  37. typedef struct {
  38. ErlNifEnv* env;
  39. jiffy_st* atoms;
  40. ERL_NIF_TERM arg;
  41. ErlNifBinary bin;
  42. size_t bytes_per_red;
  43. int is_partial;
  44. int return_maps;
  45. int return_trailer;
  46. int dedupe_keys;
  47. int copy_strings;
  48. ERL_NIF_TERM null_term;
  49. unsigned char* p;
  50. int i;
  51. int len;
  52. char* st_data;
  53. int st_size;
  54. int st_top;
  55. int current_depth;
  56. int max_levels;
  57. unsigned int level_start;
  58. unsigned int empty_element;
  59. } Decoder;
  60. Decoder*
  61. dec_new(ErlNifEnv* env)
  62. {
  63. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  64. Decoder* d = enif_alloc_resource(st->res_dec, sizeof(Decoder));
  65. int i;
  66. if(d == NULL) {
  67. return NULL;
  68. }
  69. d->atoms = st;
  70. d->bytes_per_red = DEFAULT_BYTES_PER_REDUCTION;
  71. d->is_partial = 0;
  72. d->return_maps = 0;
  73. d->return_trailer = 0;
  74. d->dedupe_keys = 0;
  75. d->copy_strings = 0;
  76. d->null_term = d->atoms->atom_null;
  77. d->p = NULL;
  78. d->len = -1;
  79. d->i = 0;
  80. d->st_data = (char*) enif_alloc(STACK_SIZE_INC);
  81. d->st_size = STACK_SIZE_INC;
  82. d->st_top = 0;
  83. for(i = 0; i < d->st_size; i++) {
  84. d->st_data[i] = st_invalid;
  85. }
  86. d->current_depth = 0;
  87. d->max_levels = -1;
  88. d->level_start = 0;
  89. d->empty_element = 1;
  90. d->st_data[0] = st_value;
  91. d->st_top++;
  92. return d;
  93. }
  94. void
  95. dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin)
  96. {
  97. d->env = env;
  98. d->arg = arg;
  99. d->p = bin->data;
  100. d->len = bin->size;
  101. }
  102. void
  103. dec_destroy(ErlNifEnv* env, void* obj)
  104. {
  105. Decoder* d = (Decoder*) obj;
  106. if(d->st_data != NULL) {
  107. enif_free(d->st_data);
  108. }
  109. }
  110. ERL_NIF_TERM
  111. dec_error(Decoder* d, const char* atom)
  112. {
  113. ERL_NIF_TERM pos = enif_make_int(d->env, d->i+1);
  114. ERL_NIF_TERM msg = make_atom(d->env, atom);
  115. ERL_NIF_TERM ret = enif_make_tuple2(d->env, pos, msg);
  116. return enif_make_tuple2(d->env, d->atoms->atom_error, ret);
  117. }
  118. char
  119. dec_curr(Decoder* d)
  120. {
  121. assert(d->st_top > 0);
  122. return d->st_data[d->st_top - 1];
  123. }
  124. int
  125. dec_top(Decoder* d)
  126. {
  127. return d->st_top;
  128. }
  129. void
  130. dec_push(Decoder* d, char val)
  131. {
  132. int new_sz;
  133. int i;
  134. if(d->st_top == d->st_size) {
  135. new_sz = d->st_size + STACK_SIZE_INC;
  136. d->st_data = (char*) enif_realloc(d->st_data, new_sz);
  137. d->st_size = new_sz;
  138. for(i = d->st_top; i < d->st_size; i++) {
  139. d->st_data[i] = st_invalid;
  140. }
  141. }
  142. assert(d->st_top < d->st_size);
  143. d->st_data[d->st_top++] = val;
  144. }
  145. char
  146. dec_pop(Decoder* d) {
  147. char current = st_invalid;
  148. if (d->st_top > 0) {
  149. current = d->st_data[d->st_top - 1];
  150. d->st_data[d->st_top - 1] = st_invalid;
  151. d->st_top--;
  152. }
  153. return current;
  154. }
  155. void
  156. dec_pop_assert(Decoder* d, char val)
  157. {
  158. char current = dec_pop(d);
  159. assert(current == val && "popped invalid state.");
  160. (void)current;
  161. }
  162. static void inline
  163. level_increase(Decoder* d) {
  164. if(d->max_levels >= 0 && (d->max_levels == d->current_depth++)) {
  165. d->level_start = d->i;
  166. }
  167. }
  168. static int inline
  169. level_decrease(Decoder* d, ERL_NIF_TERM* value) {
  170. if (d->max_levels >= 0 && d->max_levels == --d->current_depth) {
  171. // Only builds term in threshold
  172. unsigned ulen = d->i - d->level_start + 1;
  173. if(!d->copy_strings) {
  174. *value = wrap_enif_make_sub_binary(d->env, d->arg, d->level_start, ulen);
  175. } else {
  176. char* chrbuf = wrap_enif_make_new_binary(d->env, ulen, value);
  177. memcpy(chrbuf, &(d->p[d->level_start]), ulen);
  178. }
  179. return 1;
  180. }
  181. return 0;
  182. }
  183. static int inline
  184. level_allows_terms(Decoder* d) {
  185. return (d->max_levels < 0) || (d->max_levels >= d->current_depth);
  186. }
  187. int
  188. dec_string(Decoder* d, ERL_NIF_TERM* value)
  189. {
  190. int has_escape = 0;
  191. int num_escapes = 0;
  192. int st;
  193. int ulen;
  194. int ui;
  195. int hi;
  196. int lo;
  197. char* chrbuf = NULL;
  198. char buf[4]; // Substitute for chrbuf when no term is needed
  199. int chrpos;
  200. int chrpos_increment;
  201. if(d->p[d->i] != '\"') {
  202. return 0;
  203. }
  204. d->i++;
  205. st = d->i;
  206. while(d->i < d->len) {
  207. if(d->p[d->i] < 0x20) {
  208. return 0;
  209. } else if(d->p[d->i] == '\"') {
  210. d->i++;
  211. goto parse;
  212. } else if(d->p[d->i] == '\\') {
  213. if(d->i+1 >= d->len) {
  214. return 0;
  215. }
  216. has_escape = 1;
  217. num_escapes += 1;
  218. d->i++;
  219. switch(d->p[d->i]) {
  220. case '\"':
  221. case '\\':
  222. case '/':
  223. case 'b':
  224. case 'f':
  225. case 'n':
  226. case 'r':
  227. case 't':
  228. d->i++;
  229. break;
  230. case 'u':
  231. hi = 0;
  232. lo = 0;
  233. d->i++;
  234. if(d->i + 4 >= d->len) {
  235. return 0;
  236. }
  237. hi = int_from_hex(&(d->p[d->i]));
  238. if(hi < 0) {
  239. return 0;
  240. }
  241. d->i += 4;
  242. if(hi >= 0xD800 && hi < 0xDC00) {
  243. if(d->i + 6 >= d->len) {
  244. return 0;
  245. }
  246. if(d->p[d->i++] != '\\') {
  247. return 0;
  248. } else if(d->p[d->i++] != 'u') {
  249. return 0;
  250. }
  251. lo = int_from_hex(&(d->p[d->i]));
  252. if(lo < 0) {
  253. return 0;
  254. }
  255. hi = unicode_from_pair(hi, lo);
  256. if(hi < 0) {
  257. return 0;
  258. }
  259. }
  260. hi = utf8_len(hi);
  261. if(hi < 0) {
  262. return 0;
  263. }
  264. if(lo == 0) {
  265. num_escapes += 5 - hi;
  266. } else {
  267. num_escapes += 11 - hi;
  268. }
  269. break;
  270. default:
  271. return 0;
  272. }
  273. } else if(d->p[d->i] < 0x80) {
  274. d->i++;
  275. } else {
  276. ulen = utf8_validate(&(d->p[d->i]), d->len - d->i);
  277. if(ulen < 0) {
  278. return 0;
  279. }
  280. d->i += ulen;
  281. }
  282. }
  283. // The goto above ensures that we only
  284. // hit this when a string is not terminated
  285. // correctly.
  286. return 0;
  287. parse:
  288. if(!has_escape && !level_allows_terms(d)) {
  289. // If has_escape, the binary is still constructed as a side effect of
  290. // the escape validation, although it's ignored by the caller
  291. return 1;
  292. } else if(!has_escape && !d->copy_strings) {
  293. *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1));
  294. return 1;
  295. } else if(!has_escape) {
  296. ulen = d->i - 1 - st;
  297. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value),
  298. memcpy(chrbuf, &(d->p[st]), ulen);
  299. return 1;
  300. }
  301. hi = 0;
  302. lo = 0;
  303. ulen = (d->i - 1) - st - num_escapes;
  304. if(level_allows_terms(d)) {
  305. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value);
  306. chrpos_increment = 1;
  307. chrpos = -1;
  308. } else {
  309. // No term is created, but the string is still validated
  310. // (Thus the chrpos_increment = 0, so we overwrite buf)
  311. chrbuf = &buf[0];
  312. chrpos_increment = 0;
  313. chrpos = 0;
  314. }
  315. ui = st;
  316. while(ui < d->i - 1) {
  317. chrpos += chrpos_increment;
  318. if(d->p[ui] != '\\') {
  319. chrbuf[chrpos] = d->p[ui++];
  320. continue;
  321. }
  322. ui++;
  323. switch(d->p[ui]) {
  324. case '\"':
  325. case '\\':
  326. case '/':
  327. chrbuf[chrpos] = d->p[ui];
  328. ui++;
  329. break;
  330. case 'b':
  331. chrbuf[chrpos] = '\b';
  332. ui++;
  333. break;
  334. case 'f':
  335. chrbuf[chrpos] = '\f';
  336. ui++;
  337. break;
  338. case 'n':
  339. chrbuf[chrpos] = '\n';
  340. ui++;
  341. break;
  342. case 'r':
  343. chrbuf[chrpos] = '\r';
  344. ui++;
  345. break;
  346. case 't':
  347. chrbuf[chrpos] = '\t';
  348. ui++;
  349. break;
  350. case 'u':
  351. ui++;
  352. hi = int_from_hex(&(d->p[ui]));
  353. if(hi < 0) {
  354. return 0;
  355. }
  356. if(hi >= 0xD800 && hi < 0xDC00) {
  357. lo = int_from_hex(&(d->p[ui+6]));
  358. if(lo < 0) {
  359. return 0;
  360. }
  361. hi = unicode_from_pair(hi, lo);
  362. ui += 10;
  363. } else {
  364. ui += 4;
  365. }
  366. hi = unicode_to_utf8(hi, (unsigned char*) &chrbuf[chrpos]);
  367. if(hi < 0) {
  368. return 0;
  369. }
  370. chrpos += (hi-1) * chrpos_increment;
  371. break;
  372. default:
  373. return 0;
  374. }
  375. }
  376. return 1;
  377. }
  378. int
  379. dec_number(Decoder* d, ERL_NIF_TERM* value)
  380. {
  381. ERL_NIF_TERM num_type = d->atoms->atom_error;
  382. char state = nst_init;
  383. char nbuf[NUM_BUF_LEN];
  384. int st = d->i;
  385. int has_frac = 0;
  386. int has_exp = 0;
  387. double dval;
  388. long lval;
  389. while(d->i < d->len) {
  390. switch(state) {
  391. case nst_init:
  392. switch(d->p[d->i]) {
  393. case '-':
  394. state = nst_sign;
  395. d->i++;
  396. break;
  397. case '0':
  398. state = nst_frac0;
  399. d->i++;
  400. break;
  401. case '1':
  402. case '2':
  403. case '3':
  404. case '4':
  405. case '5':
  406. case '6':
  407. case '7':
  408. case '8':
  409. case '9':
  410. state = nst_mantissa;
  411. d->i++;
  412. break;
  413. default:
  414. return 0;
  415. }
  416. break;
  417. case nst_sign:
  418. switch(d->p[d->i]) {
  419. case '0':
  420. state = nst_frac0;
  421. d->i++;
  422. break;
  423. case '1':
  424. case '2':
  425. case '3':
  426. case '4':
  427. case '5':
  428. case '6':
  429. case '7':
  430. case '8':
  431. case '9':
  432. state = nst_mantissa;
  433. d->i++;
  434. break;
  435. default:
  436. return 0;
  437. }
  438. break;
  439. case nst_mantissa:
  440. switch(d->p[d->i]) {
  441. case '.':
  442. state = nst_frac1;
  443. d->i++;
  444. break;
  445. case 'e':
  446. case 'E':
  447. state = nst_esign;
  448. d->i++;
  449. break;
  450. case '0':
  451. case '1':
  452. case '2':
  453. case '3':
  454. case '4':
  455. case '5':
  456. case '6':
  457. case '7':
  458. case '8':
  459. case '9':
  460. d->i++;
  461. break;
  462. default:
  463. goto parse;
  464. }
  465. break;
  466. case nst_frac0:
  467. switch(d->p[d->i]) {
  468. case '.':
  469. state = nst_frac1;
  470. d->i++;
  471. break;
  472. case 'e':
  473. case 'E':
  474. state = nst_esign;
  475. d->i++;
  476. break;
  477. default:
  478. goto parse;
  479. }
  480. break;
  481. case nst_frac1:
  482. has_frac = 1;
  483. switch(d->p[d->i]) {
  484. case '0':
  485. case '1':
  486. case '2':
  487. case '3':
  488. case '4':
  489. case '5':
  490. case '6':
  491. case '7':
  492. case '8':
  493. case '9':
  494. state = nst_frac;
  495. d->i++;
  496. break;
  497. default:
  498. goto parse;
  499. }
  500. break;
  501. case nst_frac:
  502. switch(d->p[d->i]) {
  503. case 'e':
  504. case 'E':
  505. state = nst_esign;
  506. d->i++;
  507. break;
  508. case '0':
  509. case '1':
  510. case '2':
  511. case '3':
  512. case '4':
  513. case '5':
  514. case '6':
  515. case '7':
  516. case '8':
  517. case '9':
  518. d->i++;
  519. break;
  520. default:
  521. goto parse;
  522. }
  523. break;
  524. case nst_esign:
  525. has_exp = 1;
  526. switch(d->p[d->i]) {
  527. case '-':
  528. case '+':
  529. case '0':
  530. case '1':
  531. case '2':
  532. case '3':
  533. case '4':
  534. case '5':
  535. case '6':
  536. case '7':
  537. case '8':
  538. case '9':
  539. state = nst_edigit;
  540. d->i++;
  541. break;
  542. default:
  543. return 0;
  544. }
  545. break;
  546. case nst_edigit:
  547. switch(d->p[d->i]) {
  548. case '0':
  549. case '1':
  550. case '2':
  551. case '3':
  552. case '4':
  553. case '5':
  554. case '6':
  555. case '7':
  556. case '8':
  557. case '9':
  558. d->i++;
  559. break;
  560. default:
  561. goto parse;
  562. }
  563. break;
  564. default:
  565. return 0;
  566. }
  567. }
  568. parse:
  569. switch(state) {
  570. case nst_init:
  571. case nst_sign:
  572. case nst_frac1:
  573. case nst_esign:
  574. return 0;
  575. default:
  576. break;
  577. }
  578. if(!level_allows_terms(d)) {
  579. return 1;
  580. }
  581. errno = 0;
  582. if(d->i - st < NUM_BUF_LEN) {
  583. memset(nbuf, 0, NUM_BUF_LEN);
  584. memcpy(nbuf, &(d->p[st]), d->i - st);
  585. if(has_frac || has_exp) {
  586. dval = strtod(nbuf, NULL);
  587. if(errno != ERANGE) {
  588. *value = enif_make_double(d->env, dval);
  589. return 1;
  590. }
  591. } else {
  592. lval = strtol(nbuf, NULL, 10);
  593. if(errno != ERANGE) {
  594. *value = enif_make_int64(d->env, lval);
  595. return 1;
  596. }
  597. }
  598. }
  599. if(!has_frac && !has_exp) {
  600. num_type = d->atoms->atom_bignum;
  601. } else if(!has_frac && has_exp) {
  602. num_type = d->atoms->atom_bignum_e;
  603. } else {
  604. num_type = d->atoms->atom_bigdbl;
  605. }
  606. d->is_partial = 1;
  607. *value = enif_make_sub_binary(d->env, d->arg, st, d->i - st);
  608. *value = enif_make_tuple2(d->env, num_type, *value);
  609. return 1;
  610. }
  611. ERL_NIF_TERM
  612. make_empty_object(ErlNifEnv* env, int ret_map)
  613. {
  614. #if MAP_TYPE_PRESENT
  615. if(ret_map) {
  616. return enif_make_new_map(env);
  617. }
  618. #endif
  619. return enif_make_tuple1(env, enif_make_list(env, 0));
  620. }
  621. ERL_NIF_TERM
  622. make_array(ErlNifEnv* env, ERL_NIF_TERM list)
  623. {
  624. ERL_NIF_TERM ret = enif_make_list(env, 0);
  625. ERL_NIF_TERM item;
  626. while(enif_get_list_cell(env, list, &item, &list)) {
  627. ret = enif_make_list_cell(env, item, ret);
  628. }
  629. return ret;
  630. }
  631. int
  632. get_max_levels(ErlNifEnv* env, ERL_NIF_TERM val, int* max_levels_p)
  633. {
  634. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  635. const ERL_NIF_TERM* tuple;
  636. int arity;
  637. int max_levels;
  638. if(!enif_get_tuple(env, val, &arity, &tuple)) {
  639. return 0;
  640. }
  641. if(arity != 2) {
  642. return 0;
  643. }
  644. if(enif_compare(tuple[0], st->atom_max_levels) != 0) {
  645. return 0;
  646. }
  647. if(!enif_get_int(env, tuple[1], &max_levels)) {
  648. return 0;
  649. }
  650. if(max_levels < 0) {
  651. return 0;
  652. }
  653. *max_levels_p = max_levels;
  654. return 1;
  655. }
  656. ERL_NIF_TERM
  657. decode_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  658. {
  659. Decoder* d;
  660. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  661. ERL_NIF_TERM tmp_argv[5];
  662. ERL_NIF_TERM opts;
  663. ERL_NIF_TERM val;
  664. if(argc != 2) {
  665. return enif_make_badarg(env);
  666. }
  667. d = dec_new(env);
  668. if(d == NULL) {
  669. return make_error(st, env, "internal_error");
  670. }
  671. tmp_argv[0] = argv[0];
  672. tmp_argv[1] = enif_make_resource(env, d);
  673. tmp_argv[2] = st->atom_error;
  674. tmp_argv[3] = enif_make_list(env, 0);
  675. tmp_argv[4] = enif_make_list(env, 0);
  676. enif_release_resource(d);
  677. opts = argv[1];
  678. if(!enif_is_list(env, opts)) {
  679. return enif_make_badarg(env);
  680. }
  681. while(enif_get_list_cell(env, opts, &val, &opts)) {
  682. if(get_bytes_per_iter(env, val, &(d->bytes_per_red))) {
  683. continue;
  684. } else if(get_bytes_per_red(env, val, &(d->bytes_per_red))) {
  685. continue;
  686. } else if(enif_is_identical(val, d->atoms->atom_return_maps)) {
  687. #if MAP_TYPE_PRESENT
  688. d->return_maps = 1;
  689. #else
  690. return enif_make_badarg(env);
  691. #endif
  692. } else if(enif_is_identical(val, d->atoms->atom_return_trailer)) {
  693. d->return_trailer = 1;
  694. } else if(enif_is_identical(val, d->atoms->atom_dedupe_keys)) {
  695. d->dedupe_keys = 1;
  696. } else if(enif_is_identical(val, d->atoms->atom_copy_strings)) {
  697. d->copy_strings = 1;
  698. } else if(enif_is_identical(val, d->atoms->atom_use_nil)) {
  699. d->null_term = d->atoms->atom_nil;
  700. } else if(get_null_term(env, val, &(d->null_term))) {
  701. continue;
  702. } else if(get_max_levels(env, val, &(d->max_levels))) {
  703. continue;
  704. } else {
  705. return enif_make_badarg(env);
  706. }
  707. }
  708. return decode_iter(env, 5, tmp_argv);
  709. }
  710. ERL_NIF_TERM
  711. decode_iter(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  712. {
  713. Decoder* d;
  714. jiffy_st* st = (jiffy_st*) enif_priv_data(env);
  715. ErlNifBinary bin;
  716. ERL_NIF_TERM objs;
  717. ERL_NIF_TERM curr;
  718. ERL_NIF_TERM val = argv[2];
  719. ERL_NIF_TERM trailer;
  720. ERL_NIF_TERM ret;
  721. ERL_NIF_TERM tmp_argv[5];
  722. void* res;
  723. size_t start;
  724. size_t bytes_processed = 0;
  725. if(!enif_inspect_binary(env, argv[0], &bin)) {
  726. return enif_make_badarg(env);
  727. } else if(!enif_get_resource(env, argv[1], st->res_dec, &res)) {
  728. return enif_make_badarg(env);
  729. }
  730. d = (Decoder*) res;
  731. dec_init(d, env, argv[0], &bin);
  732. objs = argv[3];
  733. curr = argv[4];
  734. start = d->i;
  735. while(d->i < bin.size) {
  736. bytes_processed = d->i - start;
  737. if(should_yield(bytes_processed, d->bytes_per_red)) {
  738. assert(enif_is_list(env, objs));
  739. assert(enif_is_list(env, curr));
  740. tmp_argv[0] = argv[0];
  741. tmp_argv[1] = argv[1];
  742. tmp_argv[2] = val;
  743. tmp_argv[3] = objs;
  744. tmp_argv[4] = curr;
  745. bump_used_reds(env, bytes_processed, d->bytes_per_red);
  746. #if SCHEDULE_NIF_PRESENT
  747. return enif_schedule_nif(
  748. env,
  749. "nif_decode_iter",
  750. 0,
  751. decode_iter,
  752. 5,
  753. tmp_argv
  754. );
  755. #else
  756. return enif_make_tuple2(
  757. env,
  758. st->atom_iter,
  759. enif_make_tuple_from_array(env, tmp_argv, 5)
  760. );
  761. #endif
  762. }
  763. switch(dec_curr(d)) {
  764. case st_value:
  765. switch(d->p[d->i]) {
  766. case ' ':
  767. case '\n':
  768. case '\r':
  769. case '\t':
  770. d->i++;
  771. break;
  772. case 'n':
  773. if(d->i + 3 >= d->len) {
  774. ret = dec_error(d, "invalid_literal");
  775. goto done;
  776. }
  777. if(memcmp(&(d->p[d->i]), "null", 4) != 0) {
  778. ret = dec_error(d, "invalid_literal");
  779. goto done;
  780. }
  781. val = d->null_term;
  782. dec_pop_assert(d, st_value);
  783. d->i += 4;
  784. d->empty_element = 0;
  785. break;
  786. case 't':
  787. if(d->i + 3 >= d->len) {
  788. ret = dec_error(d, "invalid_literal");
  789. goto done;
  790. }
  791. if(memcmp(&(d->p[d->i]), "true", 4) != 0) {
  792. ret = dec_error(d, "invalid_literal");
  793. goto done;
  794. }
  795. val = d->atoms->atom_true;
  796. dec_pop_assert(d, st_value);
  797. d->i += 4;
  798. d->empty_element = 0;
  799. break;
  800. case 'f':
  801. if(d->i + 4 >= bin.size) {
  802. ret = dec_error(d, "invalid_literal");
  803. goto done;
  804. }
  805. if(memcmp(&(d->p[d->i]), "false", 5) != 0) {
  806. ret = dec_error(d, "invalid_literal");
  807. goto done;
  808. }
  809. val = d->atoms->atom_false;
  810. dec_pop_assert(d, st_value);
  811. d->i += 5;
  812. d->empty_element = 0;
  813. break;
  814. case '\"':
  815. if(!dec_string(d, &val)) {
  816. ret = dec_error(d, "invalid_string");
  817. goto done;
  818. }
  819. dec_pop_assert(d, st_value);
  820. d->empty_element = 0;
  821. break;
  822. case '-':
  823. case '0':
  824. case '1':
  825. case '2':
  826. case '3':
  827. case '4':
  828. case '5':
  829. case '6':
  830. case '7':
  831. case '8':
  832. case '9':
  833. if(!dec_number(d, &val)) {
  834. ret = dec_error(d, "invalid_number");
  835. goto done;
  836. }
  837. dec_pop_assert(d, st_value);
  838. d->empty_element = 0;
  839. break;
  840. case '{':
  841. dec_push(d, st_object);
  842. dec_push(d, st_key);
  843. level_increase(d);
  844. if(level_allows_terms(d)) {
  845. objs = enif_make_list_cell(env, curr, objs);
  846. curr = enif_make_list(env, 0);
  847. }
  848. d->i++;
  849. d->empty_element = 1;
  850. break;
  851. case '[':
  852. dec_push(d, st_array);
  853. dec_push(d, st_value);
  854. level_increase(d);
  855. if(level_allows_terms(d)) {
  856. objs = enif_make_list_cell(env, curr, objs);
  857. curr = enif_make_list(env, 0);
  858. }
  859. d->i++;
  860. d->empty_element = 1;
  861. break;
  862. case ']':
  863. if(!d->empty_element) {
  864. ret = dec_error(d, "invalid_json");
  865. goto done;
  866. }
  867. dec_pop_assert(d, st_value);
  868. if(dec_pop(d) != st_array) {
  869. ret = dec_error(d, "invalid_json");
  870. goto done;
  871. }
  872. dec_pop_assert(d, st_value);
  873. if(level_allows_terms(d)) {
  874. val = curr; // curr is []
  875. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  876. ret = dec_error(d, "internal_error");
  877. goto done;
  878. }
  879. }
  880. level_decrease(d, &val);
  881. d->i++;
  882. d->empty_element = 0;
  883. break;
  884. default:
  885. ret = dec_error(d, "invalid_json");
  886. goto done;
  887. }
  888. if(dec_top(d) == 0) {
  889. dec_push(d, st_done);
  890. } else if(dec_curr(d) != st_value && dec_curr(d) != st_key) {
  891. dec_push(d, st_comma);
  892. if(level_allows_terms(d)) {
  893. curr = enif_make_list_cell(env, val, curr);
  894. }
  895. }
  896. break;
  897. case st_key:
  898. switch(d->p[d->i]) {
  899. case ' ':
  900. case '\n':
  901. case '\r':
  902. case '\t':
  903. d->i++;
  904. break;
  905. case '\"':
  906. if(!dec_string(d, &val)) {
  907. ret = dec_error(d, "invalid_string");
  908. goto done;
  909. }
  910. dec_pop_assert(d, st_key);
  911. dec_push(d, st_colon);
  912. if(level_allows_terms(d)) {
  913. curr = enif_make_list_cell(env, val, curr);
  914. }
  915. break;
  916. case '}':
  917. if(!d->empty_element) {
  918. ret = dec_error(d, "invalid_json");
  919. goto done;
  920. }
  921. dec_pop_assert(d, st_key);
  922. dec_pop_assert(d, st_object);
  923. dec_pop_assert(d, st_value);
  924. if(level_allows_terms(d)) {
  925. val = make_empty_object(env, d->return_maps);
  926. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  927. ret = dec_error(d, "internal_error");
  928. goto done;
  929. }
  930. }
  931. level_decrease(d, &val);
  932. if(dec_top(d) == 0) {
  933. dec_push(d, st_done);
  934. } else {
  935. dec_push(d, st_comma);
  936. if(level_allows_terms(d)) {
  937. curr = enif_make_list_cell(env, val, curr);
  938. }
  939. }
  940. d->i++;
  941. d->empty_element = 0;
  942. break;
  943. default:
  944. ret = dec_error(d, "invalid_json");
  945. goto done;
  946. }
  947. break;
  948. case st_colon:
  949. switch(d->p[d->i]) {
  950. case ' ':
  951. case '\n':
  952. case '\r':
  953. case '\t':
  954. d->i++;
  955. break;
  956. case ':':
  957. dec_pop_assert(d, st_colon);
  958. dec_push(d, st_value);
  959. d->i++;
  960. break;
  961. default:
  962. ret = dec_error(d, "invalid_json");
  963. goto done;
  964. }
  965. break;
  966. case st_comma:
  967. switch(d->p[d->i]) {
  968. case ' ':
  969. case '\n':
  970. case '\r':
  971. case '\t':
  972. d->i++;
  973. break;
  974. case ',':
  975. dec_pop_assert(d, st_comma);
  976. switch(dec_curr(d)) {
  977. case st_object:
  978. dec_push(d, st_key);
  979. break;
  980. case st_array:
  981. dec_push(d, st_value);
  982. break;
  983. default:
  984. ret = dec_error(d, "internal_error");
  985. goto done;
  986. }
  987. d->i++;
  988. break;
  989. case '}':
  990. dec_pop_assert(d, st_comma);
  991. if(dec_pop(d) != st_object) {
  992. ret = dec_error(d, "invalid_json");
  993. goto done;
  994. }
  995. dec_pop_assert(d, st_value);
  996. if(level_allows_terms(d)) {
  997. if(!make_object(env, curr, &val,
  998. d->return_maps, d->dedupe_keys)) {
  999. ret = dec_error(d, "internal_object_error");
  1000. goto done;
  1001. }
  1002. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  1003. ret = dec_error(d, "internal_error");
  1004. goto done;
  1005. }
  1006. }
  1007. level_decrease(d, &val);
  1008. if(dec_top(d) > 0) {
  1009. dec_push(d, st_comma);
  1010. if(level_allows_terms(d)) {
  1011. curr = enif_make_list_cell(env, val, curr);
  1012. }
  1013. } else {
  1014. dec_push(d, st_done);
  1015. }
  1016. d->i++;
  1017. break;
  1018. case ']':
  1019. dec_pop_assert(d, st_comma);
  1020. if(dec_pop(d) != st_array) {
  1021. ret = dec_error(d, "invalid_json");
  1022. goto done;
  1023. }
  1024. dec_pop_assert(d, st_value);
  1025. if(level_allows_terms(d)) {
  1026. val = make_array(env, curr);
  1027. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  1028. ret = dec_error(d, "internal_error");
  1029. goto done;
  1030. }
  1031. }
  1032. level_decrease(d, &val);
  1033. if(dec_top(d) > 0) {
  1034. dec_push(d, st_comma);
  1035. if(level_allows_terms(d)) {
  1036. curr = enif_make_list_cell(env, val, curr);
  1037. }
  1038. } else {
  1039. dec_push(d, st_done);
  1040. }
  1041. d->i++;
  1042. break;
  1043. default:
  1044. ret = dec_error(d, "invalid_json");
  1045. goto done;
  1046. }
  1047. break;
  1048. case st_done:
  1049. switch(d->p[d->i]) {
  1050. case ' ':
  1051. case '\n':
  1052. case '\r':
  1053. case '\t':
  1054. d->i++;
  1055. break;
  1056. default:
  1057. goto decode_done;
  1058. }
  1059. break;
  1060. default:
  1061. ret = dec_error(d, "invalid_internal_state");
  1062. goto done;
  1063. }
  1064. }
  1065. decode_done:
  1066. level_decrease(d, &val);
  1067. if(d->i < bin.size && d->return_trailer) {
  1068. trailer = enif_make_sub_binary(env, argv[0], d->i, bin.size - d->i);
  1069. val = enif_make_tuple3(env, d->atoms->atom_has_trailer, val, trailer);
  1070. } else if(d->i < bin.size) {
  1071. ret = dec_error(d, "invalid_trailing_data");
  1072. goto done;
  1073. }
  1074. if(dec_pop(d) != st_done) {
  1075. ret = dec_error(d, "truncated_json");
  1076. } else if(d->is_partial) {
  1077. ret = enif_make_tuple2(env, d->atoms->atom_partial, val);
  1078. } else {
  1079. ret = val;
  1080. }
  1081. done:
  1082. bump_used_reds(env, bytes_processed, d->bytes_per_red);
  1083. return ret;
  1084. }