您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

909 行
26 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <errno.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include "erl_nif.h"
  9. #include "jiffy.h"
  10. #define U(c) ((unsigned char) (c))
  11. #define ERROR(i, msg) make_error(st, env, msg)
  12. #define STACK_SIZE_INC 64
  13. #define NUM_BUF_LEN 32
  14. enum {
  15. st_value=0,
  16. st_object,
  17. st_array,
  18. st_key,
  19. st_colon,
  20. st_comma,
  21. st_done,
  22. st_invalid
  23. } JsonState;
  24. enum {
  25. nst_init=0,
  26. nst_sign,
  27. nst_mantissa,
  28. nst_frac0,
  29. nst_frac1,
  30. nst_frac,
  31. nst_esign,
  32. nst_edigit
  33. } JsonNumState;
  34. typedef struct {
  35. ErlNifEnv* env;
  36. jiffy_st* atoms;
  37. ERL_NIF_TERM arg;
  38. ErlNifBinary bin;
  39. int is_partial;
  40. char* p;
  41. unsigned char* u;
  42. int i;
  43. int len;
  44. char* st_data;
  45. int st_size;
  46. int st_top;
  47. } Decoder;
  48. void
  49. dec_init(Decoder* d, ErlNifEnv* env, ERL_NIF_TERM arg, ErlNifBinary* bin)
  50. {
  51. int i;
  52. d->env = env;
  53. d->atoms = enif_priv_data(env);
  54. d->arg = arg;
  55. d->is_partial = 0;
  56. d->p = (char*) bin->data;
  57. d->u = bin->data;
  58. d->len = bin->size;
  59. d->i = 0;
  60. d->st_data = (char*) enif_alloc(STACK_SIZE_INC * sizeof(char));
  61. d->st_size = STACK_SIZE_INC;
  62. d->st_top = 0;
  63. for(i = 0; i < d->st_size; i++) {
  64. d->st_data[i] = st_invalid;
  65. }
  66. d->st_data[0] = st_value;
  67. d->st_top++;
  68. }
  69. void
  70. dec_destroy(Decoder* d)
  71. {
  72. if(d->st_data != NULL) {
  73. enif_free(d->st_data);
  74. }
  75. }
  76. ERL_NIF_TERM
  77. dec_error(Decoder* d, const char* atom)
  78. {
  79. ERL_NIF_TERM pos = enif_make_int(d->env, d->i+1);
  80. ERL_NIF_TERM msg = make_atom(d->env, atom);
  81. ERL_NIF_TERM ret = enif_make_tuple2(d->env, pos, msg);
  82. return enif_make_tuple2(d->env, d->atoms->atom_error, ret);
  83. }
  84. char
  85. dec_curr(Decoder* d)
  86. {
  87. return d->st_data[d->st_top-1];
  88. }
  89. int
  90. dec_top(Decoder* d)
  91. {
  92. return d->st_top;
  93. }
  94. void
  95. dec_push(Decoder* d, char val)
  96. {
  97. char* tmp;
  98. int new_sz;
  99. int i;
  100. if(d->st_top >= d->st_size) {
  101. new_sz = d->st_size + STACK_SIZE_INC;
  102. tmp = (char*) enif_alloc(new_sz * sizeof(char));
  103. memcpy(tmp, d->st_data, d->st_size * sizeof(char));
  104. enif_free(d->st_data);
  105. d->st_data = tmp;
  106. d->st_size = new_sz;
  107. for(i = d->st_top; i < d->st_size; i++) {
  108. d->st_data[i] = st_invalid;
  109. }
  110. }
  111. d->st_data[d->st_top++] = val;
  112. }
  113. void
  114. dec_pop(Decoder* d, char val)
  115. {
  116. assert(d->st_data[d->st_top-1] == val && "popped invalid state.");
  117. d->st_data[d->st_top-1] = st_invalid;
  118. d->st_top--;
  119. }
  120. int
  121. dec_string(Decoder* d, ERL_NIF_TERM* value)
  122. {
  123. int has_escape = 0;
  124. int num_escapes = 0;
  125. int st;
  126. int ulen;
  127. int ui;
  128. int hi;
  129. int lo;
  130. char* chrbuf;
  131. int chrpos;
  132. if(d->p[d->i] != '\"') {
  133. return 0;
  134. }
  135. d->i++;
  136. st = d->i;
  137. while(d->i < d->len) {
  138. if(d->u[d->i] < 0x20) {
  139. return 0;
  140. } else if(d->p[d->i] == '\"') {
  141. d->i++;
  142. goto parse;
  143. } else if(d->p[d->i] == '\\') {
  144. if(d->i+1 >= d->len) {
  145. return 0;
  146. }
  147. has_escape = 1;
  148. num_escapes += 1;
  149. d->i++;
  150. switch(d->p[d->i]) {
  151. case '\"':
  152. case '\\':
  153. case '/':
  154. case 'b':
  155. case 'f':
  156. case 'n':
  157. case 'r':
  158. case 't':
  159. d->i++;
  160. break;
  161. case 'u':
  162. hi = 0;
  163. lo = 0;
  164. d->i++;
  165. if(d->i + 4 >= d->len) {
  166. return 0;
  167. }
  168. hi = int_from_hex(&(d->u[d->i]));
  169. if(hi < 0) {
  170. return 0;
  171. }
  172. d->i += 4;
  173. if(hi >= 0xD800 && hi < 0xDC00) {
  174. if(d->i + 6 >= d->len) {
  175. return 0;
  176. }
  177. if(d->p[d->i++] != '\\') {
  178. return 0;
  179. } else if(d->p[d->i++] != 'u') {
  180. return 0;
  181. }
  182. lo = int_from_hex(&(d->u[d->i]));
  183. if(lo < 0) {
  184. return 0;
  185. }
  186. hi = unicode_from_pair(hi, lo);
  187. if(hi < 0) {
  188. return 0;
  189. }
  190. }
  191. hi = utf8_len(hi);
  192. if(hi < 0) {
  193. return 0;
  194. }
  195. if(lo == 0) {
  196. num_escapes += 5 - hi;
  197. } else {
  198. num_escapes += 11 - hi;
  199. }
  200. break;
  201. default:
  202. return 0;
  203. }
  204. } else if(d->u[d->i] < 0x80) {
  205. d->i++;
  206. } else {
  207. ulen = utf8_validate(&(d->u[d->i]), d->len - d->i);
  208. if(ulen < 0) {
  209. return 0;
  210. }
  211. d->i += ulen;
  212. }
  213. }
  214. parse:
  215. if(d->p[d->i-1] != '\"') {
  216. return 0;
  217. }
  218. if(!has_escape) {
  219. *value = enif_make_sub_binary(d->env, d->arg, st, (d->i - st - 1));
  220. return 1;
  221. }
  222. hi = 0;
  223. lo = 0;
  224. ulen = (d->i - 1) - st - num_escapes;
  225. chrbuf = (char*) enif_make_new_binary(d->env, ulen, value);
  226. chrpos = 0;
  227. ui = st;
  228. while(ui < d->i - 1) {
  229. if(d->p[ui] != '\\') {
  230. chrbuf[chrpos++] = d->p[ui++];
  231. continue;
  232. }
  233. ui++;
  234. switch(d->p[ui]) {
  235. case '\"':
  236. case '\\':
  237. case '/':
  238. chrbuf[chrpos++] = d->p[ui];
  239. ui++;
  240. break;
  241. case 'b':
  242. chrbuf[chrpos++] = '\b';
  243. ui++;
  244. break;
  245. case 'f':
  246. chrbuf[chrpos++] = '\f';
  247. ui++;
  248. break;
  249. case 'n':
  250. chrbuf[chrpos++] = '\n';
  251. ui++;
  252. break;
  253. case 'r':
  254. chrbuf[chrpos++] = '\r';
  255. ui++;
  256. break;
  257. case 't':
  258. chrbuf[chrpos++] = '\t';
  259. ui++;
  260. break;
  261. case 'u':
  262. ui++;
  263. hi = int_from_hex(&(d->u[ui]));
  264. if(hi < 0) {
  265. return 0;
  266. }
  267. if(hi >= 0xD800 && hi < 0xDC00) {
  268. lo = int_from_hex(&(d->u[ui+6]));
  269. if(lo < 0) {
  270. return 0;
  271. }
  272. hi = unicode_from_pair(hi, lo);
  273. ui += 10;
  274. } else {
  275. ui += 4;
  276. }
  277. hi = unicode_to_utf8(hi, (unsigned char*) chrbuf+chrpos);
  278. if(hi < 0) {
  279. return 0;
  280. }
  281. chrpos += hi;
  282. break;
  283. default:
  284. return 0;
  285. }
  286. }
  287. return 1;
  288. }
  289. int
  290. dec_number(Decoder* d, ERL_NIF_TERM* value)
  291. {
  292. ERL_NIF_TERM num_type = d->atoms->atom_error;
  293. char state = nst_init;
  294. char nbuf[NUM_BUF_LEN];
  295. int st = d->i;
  296. int has_frac = 0;
  297. int has_exp = 0;
  298. double dval;
  299. long lval;
  300. while(d->i < d->len) {
  301. switch(state) {
  302. case nst_init:
  303. switch(d->p[d->i]) {
  304. case '-':
  305. state = nst_sign;
  306. d->i++;
  307. break;
  308. case '0':
  309. state = nst_frac0;
  310. d->i++;
  311. break;
  312. case '1':
  313. case '2':
  314. case '3':
  315. case '4':
  316. case '5':
  317. case '6':
  318. case '7':
  319. case '8':
  320. case '9':
  321. state = nst_mantissa;
  322. d->i++;
  323. break;
  324. default:
  325. return 0;
  326. }
  327. break;
  328. case nst_sign:
  329. switch(d->p[d->i]) {
  330. case '0':
  331. state = nst_frac0;
  332. d->i++;
  333. break;
  334. case '1':
  335. case '2':
  336. case '3':
  337. case '4':
  338. case '5':
  339. case '6':
  340. case '7':
  341. case '8':
  342. case '9':
  343. state = nst_mantissa;
  344. d->i++;
  345. break;
  346. default:
  347. return 0;
  348. }
  349. break;
  350. case nst_mantissa:
  351. switch(d->p[d->i]) {
  352. case '.':
  353. state = nst_frac1;
  354. d->i++;
  355. break;
  356. case 'e':
  357. case 'E':
  358. state = nst_esign;
  359. d->i++;
  360. break;
  361. case '0':
  362. case '1':
  363. case '2':
  364. case '3':
  365. case '4':
  366. case '5':
  367. case '6':
  368. case '7':
  369. case '8':
  370. case '9':
  371. d->i++;
  372. break;
  373. default:
  374. goto parse;
  375. }
  376. break;
  377. case nst_frac0:
  378. switch(d->p[d->i]) {
  379. case '.':
  380. state = nst_frac1;
  381. d->i++;
  382. break;
  383. case 'e':
  384. case 'E':
  385. state = nst_esign;
  386. d->i++;
  387. break;
  388. default:
  389. goto parse;
  390. }
  391. break;
  392. case nst_frac1:
  393. has_frac = 1;
  394. switch(d->p[d->i]) {
  395. case '0':
  396. case '1':
  397. case '2':
  398. case '3':
  399. case '4':
  400. case '5':
  401. case '6':
  402. case '7':
  403. case '8':
  404. case '9':
  405. state = nst_frac;
  406. d->i++;
  407. break;
  408. default:
  409. goto parse;
  410. }
  411. break;
  412. case nst_frac:
  413. switch(d->p[d->i]) {
  414. case 'e':
  415. case 'E':
  416. state = nst_esign;
  417. d->i++;
  418. break;
  419. case '0':
  420. case '1':
  421. case '2':
  422. case '3':
  423. case '4':
  424. case '5':
  425. case '6':
  426. case '7':
  427. case '8':
  428. case '9':
  429. d->i++;
  430. break;
  431. default:
  432. goto parse;
  433. }
  434. break;
  435. case nst_esign:
  436. has_exp = 1;
  437. switch(d->p[d->i]) {
  438. case '-':
  439. case '+':
  440. case '0':
  441. case '1':
  442. case '2':
  443. case '3':
  444. case '4':
  445. case '5':
  446. case '6':
  447. case '7':
  448. case '8':
  449. case '9':
  450. state = nst_edigit;
  451. d->i++;
  452. break;
  453. default:
  454. return 0;
  455. }
  456. break;
  457. case nst_edigit:
  458. switch(d->p[d->i]) {
  459. case '0':
  460. case '1':
  461. case '2':
  462. case '3':
  463. case '4':
  464. case '5':
  465. case '6':
  466. case '7':
  467. case '8':
  468. case '9':
  469. d->i++;
  470. break;
  471. default:
  472. goto parse;
  473. }
  474. break;
  475. default:
  476. return 0;
  477. }
  478. }
  479. parse:
  480. switch(state) {
  481. case nst_init:
  482. case nst_sign:
  483. case nst_frac1:
  484. case nst_esign:
  485. return 0;
  486. default:
  487. break;
  488. }
  489. errno = 0;
  490. if(d->i - st < NUM_BUF_LEN) {
  491. memset(nbuf, 0, NUM_BUF_LEN);
  492. memcpy(nbuf, &(d->p[st]), d->i - st);
  493. if(has_frac || has_exp) {
  494. dval = strtod(nbuf, NULL);
  495. if(errno != ERANGE) {
  496. *value = enif_make_double(d->env, dval);
  497. return 1;
  498. }
  499. } else {
  500. lval = strtol(nbuf, NULL, 10);
  501. if(errno != ERANGE) {
  502. *value = enif_make_int64(d->env, lval);
  503. return 1;
  504. }
  505. }
  506. }
  507. if(!has_frac && !has_exp) {
  508. num_type = d->atoms->atom_bignum;
  509. } else if(has_exp) {
  510. num_type = d->atoms->atom_bignum_e;
  511. } else {
  512. num_type = d->atoms->atom_bigdbl;
  513. }
  514. d->is_partial = 1;
  515. *value = enif_make_sub_binary(d->env, d->arg, st, d->i - st);
  516. *value = enif_make_tuple2(d->env, num_type, *value);
  517. return 1;
  518. }
  519. ERL_NIF_TERM
  520. make_object(ErlNifEnv* env, ERL_NIF_TERM pairs)
  521. {
  522. ERL_NIF_TERM ret = enif_make_list(env, 0);
  523. ERL_NIF_TERM key, val;
  524. while(enif_get_list_cell(env, pairs, &val, &pairs)) {
  525. if(!enif_get_list_cell(env, pairs, &key, &pairs)) {
  526. assert(0 == 1 && "Unbalanced object pairs.");
  527. }
  528. val = enif_make_tuple2(env, key, val);
  529. ret = enif_make_list_cell(env, val, ret);
  530. }
  531. return enif_make_tuple1(env, ret);
  532. }
  533. ERL_NIF_TERM
  534. make_array(ErlNifEnv* env, ERL_NIF_TERM list)
  535. {
  536. ERL_NIF_TERM ret = enif_make_list(env, 0);
  537. ERL_NIF_TERM item;
  538. while(enif_get_list_cell(env, list, &item, &list)) {
  539. ret = enif_make_list_cell(env, item, ret);
  540. }
  541. return ret;
  542. }
  543. ERL_NIF_TERM
  544. decode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  545. {
  546. Decoder dec;
  547. Decoder* d = &dec;
  548. ErlNifBinary bin;
  549. ERL_NIF_TERM objs = enif_make_list(env, 0);
  550. ERL_NIF_TERM curr = enif_make_list(env, 0);
  551. ERL_NIF_TERM val;
  552. ERL_NIF_TERM ret;
  553. if(argc != 1) {
  554. return enif_make_badarg(env);
  555. } else if(!enif_inspect_binary(env, argv[0], &bin)) {
  556. return enif_make_badarg(env);
  557. }
  558. dec_init(d, env, argv[0], &bin);
  559. //fprintf(stderr, "Parsing:\r\n");
  560. while(d->i < bin.size) {
  561. //fprintf(stderr, "state: %d\r\n", dec_curr(d));
  562. switch(dec_curr(d)) {
  563. case st_value:
  564. switch(d->p[d->i]) {
  565. case ' ':
  566. case '\n':
  567. case '\r':
  568. case '\t':
  569. d->i++;
  570. break;
  571. case 'n':
  572. if(d->i + 3 >= d->len) {
  573. ret = dec_error(d, "invalid_literal");
  574. goto done;
  575. }
  576. if(memcmp(&(d->p[d->i]), "null", 4) != 0) {
  577. ret = dec_error(d, "invalid_literal");
  578. goto done;
  579. }
  580. val = d->atoms->atom_null;
  581. dec_pop(d, st_value);
  582. d->i += 4;
  583. break;
  584. case 't':
  585. if(d->i + 3 >= d->len) {
  586. ret = dec_error(d, "invalid_literal");
  587. goto done;
  588. }
  589. if(memcmp(&(d->p[d->i]), "true", 4) != 0) {
  590. ret = dec_error(d, "invalid_literal");
  591. goto done;
  592. }
  593. val = d->atoms->atom_true;
  594. dec_pop(d, st_value);
  595. d->i += 4;
  596. break;
  597. case 'f':
  598. if(d->i + 4 >= bin.size) {
  599. ret = dec_error(d, "invalid_literal");
  600. goto done;
  601. }
  602. if(memcmp(&(d->p[d->i]), "false", 5) != 0) {
  603. ret = dec_error(d, "invalid_literal");
  604. goto done;
  605. }
  606. val = d->atoms->atom_false;
  607. dec_pop(d, st_value);
  608. d->i += 5;
  609. break;
  610. case '\"':
  611. if(!dec_string(d, &val)) {
  612. ret = dec_error(d, "invalid_string");
  613. goto done;
  614. }
  615. dec_pop(d, st_value);
  616. break;
  617. case '-':
  618. case '0':
  619. case '1':
  620. case '2':
  621. case '3':
  622. case '4':
  623. case '5':
  624. case '6':
  625. case '7':
  626. case '8':
  627. case '9':
  628. if(!dec_number(d, &val)) {
  629. ret = dec_error(d, "invalid_number");
  630. goto done;
  631. }
  632. dec_pop(d, st_value);
  633. break;
  634. case '{':
  635. dec_push(d, st_object);
  636. dec_push(d, st_key);
  637. objs = enif_make_list_cell(env, curr, objs);
  638. curr = enif_make_list(env, 0);
  639. d->i++;
  640. break;
  641. case '[':
  642. dec_push(d, st_array);
  643. dec_push(d, st_value);
  644. objs = enif_make_list_cell(env, curr, objs);
  645. curr = enif_make_list(env, 0);
  646. d->i++;
  647. break;
  648. case ']':
  649. if(!enif_is_empty_list(env, curr)) {
  650. ret = dec_error(d, "invalid_json");
  651. goto done;
  652. }
  653. dec_pop(d, st_value);
  654. if(dec_curr(d) != st_array) {
  655. ret = dec_error(d, "invalid_json");
  656. goto done;
  657. }
  658. dec_pop(d, st_array);
  659. dec_pop(d, st_value);
  660. val = curr; // curr is []
  661. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  662. ret = dec_error(d, "internal_error");
  663. goto done;
  664. }
  665. d->i++;
  666. break;
  667. default:
  668. ret = dec_error(d, "invalid_json");
  669. goto done;
  670. }
  671. if(dec_top(d) == 0) {
  672. dec_push(d, st_done);
  673. } else if(dec_curr(d) != st_value && dec_curr(d) != st_key) {
  674. dec_push(d, st_comma);
  675. curr = enif_make_list_cell(env, val, curr);
  676. }
  677. break;
  678. case st_key:
  679. switch(d->p[d->i]) {
  680. case ' ':
  681. case '\n':
  682. case '\r':
  683. case '\t':
  684. d->i++;
  685. break;
  686. case '\"':
  687. if(!dec_string(d, &val)) {
  688. ret = dec_error(d, "invalid_string");
  689. goto done;
  690. }
  691. dec_pop(d, st_key);
  692. dec_push(d, st_colon);
  693. curr = enif_make_list_cell(env, val, curr);
  694. break;
  695. case '}':
  696. if(!enif_is_empty_list(env, curr)) {
  697. ret = dec_error(d, "invalid_json");
  698. goto done;
  699. }
  700. dec_pop(d, st_key);
  701. dec_pop(d, st_object);
  702. dec_pop(d, st_value);
  703. val = enif_make_tuple1(env, curr);
  704. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  705. ret = dec_error(d, "internal_error");
  706. goto done;
  707. }
  708. if(dec_top(d) == 0) {
  709. dec_push(d, st_done);
  710. } else {
  711. dec_push(d, st_comma);
  712. curr = enif_make_list_cell(env, val, curr);
  713. }
  714. d->i++;
  715. break;
  716. default:
  717. ret = dec_error(d, "invalid_json");
  718. goto done;
  719. }
  720. break;
  721. case st_colon:
  722. switch(d->p[d->i]) {
  723. case ' ':
  724. case '\n':
  725. case '\r':
  726. case '\t':
  727. d->i++;
  728. break;
  729. case ':':
  730. dec_pop(d, st_colon);
  731. dec_push(d, st_value);
  732. d->i++;
  733. break;
  734. default:
  735. ret = dec_error(d, "invalid_json");
  736. goto done;
  737. }
  738. break;
  739. case st_comma:
  740. switch(d->p[d->i]) {
  741. case ' ':
  742. case '\n':
  743. case '\r':
  744. case '\t':
  745. d->i++;
  746. break;
  747. case ',':
  748. dec_pop(d, st_comma);
  749. switch(dec_curr(d)) {
  750. case st_object:
  751. dec_push(d, st_key);
  752. break;
  753. case st_array:
  754. dec_push(d, st_value);
  755. break;
  756. default:
  757. ret = dec_error(d, "internal_error");
  758. goto done;
  759. }
  760. d->i++;
  761. break;
  762. case '}':
  763. dec_pop(d, st_comma);
  764. if(dec_curr(d) != st_object) {
  765. ret = dec_error(d, "invalid_json");
  766. goto done;
  767. }
  768. dec_pop(d, st_object);
  769. dec_pop(d, st_value);
  770. val = make_object(env, curr);
  771. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  772. ret = dec_error(d, "internal_error");
  773. goto done;
  774. }
  775. if(dec_top(d) > 0) {
  776. dec_push(d, st_comma);
  777. curr = enif_make_list_cell(env, val, curr);
  778. } else {
  779. dec_push(d, st_done);
  780. }
  781. d->i++;
  782. break;
  783. case ']':
  784. dec_pop(d, st_comma);
  785. if(dec_curr(d) != st_array) {
  786. ret = dec_error(d, "invalid_json");
  787. goto done;
  788. }
  789. dec_pop(d, st_array);
  790. dec_pop(d, st_value);
  791. val = make_array(env, curr);
  792. if(!enif_get_list_cell(env, objs, &curr, &objs)) {
  793. ret = dec_error(d, "internal_error");
  794. goto done;
  795. }
  796. if(dec_top(d) > 0) {
  797. dec_push(d, st_comma);
  798. curr = enif_make_list_cell(env, val, curr);
  799. } else {
  800. dec_push(d, st_done);
  801. }
  802. d->i++;
  803. break;
  804. default:
  805. ret = dec_error(d, "invalid_json");
  806. goto done;
  807. }
  808. break;
  809. case st_done:
  810. switch(d->p[d->i]) {
  811. case ' ':
  812. case '\n':
  813. case '\r':
  814. case '\t':
  815. d->i++;
  816. break;
  817. default:
  818. ret = dec_error(d, "invalid_trailing_data");
  819. goto done;
  820. }
  821. break;
  822. default:
  823. ret = dec_error(d, "invalid_internal_state");
  824. goto done;
  825. }
  826. }
  827. if(dec_curr(d) != st_done) {
  828. ret = dec_error(d, "truncated_json");
  829. } else if(d->is_partial) {
  830. ret = enif_make_tuple2(env, d->atoms->atom_partial, val);
  831. } else {
  832. ret = val;
  833. }
  834. done:
  835. dec_destroy(d);
  836. return ret;
  837. }