You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

630 lines
16 KiB

пре 13 година
пре 13 година
пре 13 година
пре 13 година
пре 13 година
пре 13 година
пре 13 година
пре 13 година
пре 13 година
пре 13 година
  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 2048
  9. typedef struct {
  10. ErlNifEnv* env;
  11. jiffy_st* atoms;
  12. int uescape;
  13. int count;
  14. int iolen;
  15. ERL_NIF_TERM iolist;
  16. ErlNifBinary* curr;
  17. char* p;
  18. unsigned char* u;
  19. size_t i;
  20. } Encoder;
  21. int
  22. enc_init(Encoder* e, ErlNifEnv* env, ERL_NIF_TERM opts, ErlNifBinary* bin)
  23. {
  24. ERL_NIF_TERM val;
  25. e->env = env;
  26. e->atoms = enif_priv_data(env);
  27. e->uescape = 0;
  28. e->count = 0;
  29. if(!enif_is_list(env, opts)) {
  30. return 0;
  31. }
  32. while(enif_get_list_cell(env, opts, &val, &opts)) {
  33. if(enif_compare(val, e->atoms->atom_uescape) == 0) {
  34. e->uescape = 1;
  35. } else {
  36. return 0;
  37. }
  38. }
  39. e->iolen = 0;
  40. e->iolist = enif_make_list(env, 0);
  41. e->curr = bin;
  42. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  43. return 0;
  44. }
  45. memset(e->curr->data, 0, e->curr->size);
  46. e->p = (char*) e->curr->data;
  47. e->u = (unsigned char*) e->curr->data;
  48. e->i = 0;
  49. return 1;
  50. }
  51. void
  52. enc_destroy(Encoder* e)
  53. {
  54. if(e->curr != NULL) {
  55. enif_release_binary(e->curr);
  56. }
  57. }
  58. ERL_NIF_TERM
  59. enc_error(Encoder* e, const char* msg)
  60. {
  61. //assert(0 && msg);
  62. return make_error(e->atoms, e->env, msg);
  63. }
  64. static inline int
  65. enc_ensure(Encoder* e, size_t req)
  66. {
  67. size_t need = e->curr->size;
  68. while(req >= (need - e->i)) need <<= 1;
  69. if(need != e->curr->size) {
  70. if(!enif_realloc_binary(e->curr, need)) {
  71. return 0;
  72. }
  73. e->p = (char*) e->curr->data;
  74. e->u = (unsigned char*) e->curr->data;
  75. }
  76. return 1;
  77. }
  78. int
  79. enc_result(Encoder* e, ERL_NIF_TERM* value)
  80. {
  81. if(e->i != e->curr->size) {
  82. if(!enif_realloc_binary(e->curr, e->i)) {
  83. return 0;
  84. }
  85. }
  86. *value = enif_make_binary(e->env, e->curr);
  87. e->curr = NULL;
  88. return 1;
  89. }
  90. int
  91. enc_done(Encoder* e, ERL_NIF_TERM* value)
  92. {
  93. ERL_NIF_TERM last;
  94. if(e->iolen == 0) {
  95. return enc_result(e, value);
  96. }
  97. if(e->i > 0 ) {
  98. if(!enc_result(e, &last)) {
  99. return 0;
  100. }
  101. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  102. e->iolen++;
  103. }
  104. *value = e->iolist;
  105. return 1;
  106. }
  107. static inline int
  108. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  109. {
  110. ErlNifBinary* bin = e->curr;
  111. ERL_NIF_TERM curr;
  112. if(e->i > 0) {
  113. if(!enc_result(e, &curr)) {
  114. return 0;
  115. }
  116. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  117. e->iolen++;
  118. }
  119. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  120. e->iolen++;
  121. // Reinitialize our binary for the next buffer.
  122. e->curr = bin;
  123. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  124. return 0;
  125. }
  126. memset(e->curr->data, 0, e->curr->size);
  127. e->p = (char*) e->curr->data;
  128. e->u = (unsigned char*) e->curr->data;
  129. e->i = 0;
  130. return 1;
  131. }
  132. static inline int
  133. enc_literal(Encoder* e, const char* literal, size_t len)
  134. {
  135. if(!enc_ensure(e, len)) {
  136. return 0;
  137. }
  138. memcpy(&(e->p[e->i]), literal, len);
  139. e->i += len;
  140. e->count++;
  141. return 1;
  142. }
  143. static inline int
  144. enc_string(Encoder* e, ERL_NIF_TERM val)
  145. {
  146. ErlNifBinary bin;
  147. char atom[512];
  148. unsigned char* data;
  149. size_t size;
  150. int esc_extra = 0;
  151. int ulen;
  152. int uval;
  153. int i;
  154. if(enif_is_binary(e->env, val)) {
  155. if(!enif_inspect_binary(e->env, val, &bin)) {
  156. return 0;
  157. }
  158. data = bin.data;
  159. size = bin.size;
  160. } else if(enif_is_atom(e->env, val)) {
  161. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  162. return 0;
  163. }
  164. data = (unsigned char*) atom;
  165. size = strlen(atom);
  166. } else {
  167. return 0;
  168. }
  169. i = 0;
  170. while(i < size) {
  171. switch((char) data[i]) {
  172. case '\"':
  173. case '\\':
  174. case '/':
  175. case '\b':
  176. case '\f':
  177. case '\n':
  178. case '\r':
  179. case '\t':
  180. esc_extra += 1;
  181. i++;
  182. continue;
  183. default:
  184. if(data[i] < 0x20) {
  185. esc_extra += 5;
  186. i++;
  187. continue;
  188. } else if(data[i] < 0x80) {
  189. i++;
  190. continue;
  191. }
  192. ulen = utf8_validate(&(data[i]), size - i);
  193. if(ulen < 0) {
  194. return 0;
  195. }
  196. if(e->uescape) {
  197. uval = utf8_to_unicode(&(data[i]), ulen);
  198. if(uval < 0) {
  199. return 0;
  200. }
  201. esc_extra = utf8_esc_len(uval);
  202. if(ulen < 0) {
  203. return 0;
  204. }
  205. }
  206. i += ulen;
  207. }
  208. }
  209. if(!enc_ensure(e, size + esc_extra + 2)) {
  210. return 0;
  211. }
  212. e->p[e->i++] = '\"';
  213. i = 0;
  214. while(i < size) {
  215. switch((char) data[i]) {
  216. case '\"':
  217. case '\\':
  218. case '/':
  219. e->p[e->i++] = '\\';
  220. e->u[e->i++] = data[i];
  221. i++;
  222. continue;
  223. case '\b':
  224. e->p[e->i++] = '\\';
  225. e->p[e->i++] = 'b';
  226. i++;
  227. continue;
  228. case '\f':
  229. e->p[e->i++] = '\\';
  230. e->p[e->i++] = 'f';
  231. i++;
  232. continue;
  233. case '\n':
  234. e->p[e->i++] = '\\';
  235. e->p[e->i++] = 'n';
  236. i++;
  237. continue;
  238. case '\r':
  239. e->p[e->i++] = '\\';
  240. e->p[e->i++] = 'r';
  241. i++;
  242. continue;
  243. case '\t':
  244. e->p[e->i++] = '\\';
  245. e->p[e->i++] = 't';
  246. i++;
  247. continue;
  248. default:
  249. if(data[i] < 0x20) {
  250. ulen = unicode_uescape(data[i], &(e->p[e->i]));
  251. if(ulen < 0) {
  252. return 0;
  253. }
  254. e->i += ulen;
  255. i++;
  256. } else if((data[i] & 0x80) && e->uescape) {
  257. uval = utf8_to_unicode(&(data[i]), size-i);
  258. if(uval < 0) {
  259. return 0;
  260. }
  261. ulen = unicode_uescape(uval, &(e->p[e->i]));
  262. if(ulen < 0) {
  263. return 0;
  264. }
  265. e->i += ulen;
  266. ulen = utf8_len(uval);
  267. if(ulen < 0) {
  268. return 0;
  269. }
  270. i += ulen;
  271. } else {
  272. e->u[e->i++] = data[i++];
  273. }
  274. }
  275. }
  276. e->p[e->i++] = '\"';
  277. e->count++;
  278. return 1;
  279. }
  280. static inline int
  281. enc_long(Encoder* e, ErlNifSInt64 val)
  282. {
  283. if(!enc_ensure(e, 32)) {
  284. return 0;
  285. }
  286. #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_))
  287. snprintf(&(e->p[e->i]), 32, "%ld", val);
  288. #elif SIZEOF_LONG == 8
  289. snprintf(&(e->p[e->i]), 32, "%ld", val);
  290. #else
  291. snprintf(&(e->p[e->i]), 32, "%lld", val);
  292. #endif
  293. e->i += strlen(&(e->p[e->i]));
  294. e->count++;
  295. return 1;
  296. }
  297. static inline int
  298. enc_double(Encoder* e, double val)
  299. {
  300. if(!enc_ensure(e, 32)) {
  301. return 0;
  302. }
  303. //snprintf(&(e->p[e->i]), 31, "%0.20g", val);
  304. sprintf(&(e->p[e->i]), "%.20g", val);
  305. e->i += strlen(&(e->p[e->i]));
  306. e->count++;
  307. return 1;
  308. }
  309. static inline int
  310. enc_char(Encoder* e, char c)
  311. {
  312. if(!enc_ensure(e, 1)) {
  313. return 0;
  314. }
  315. e->p[e->i++] = c;
  316. return 1;
  317. }
  318. static inline int
  319. enc_start_object(Encoder* e)
  320. {
  321. e->count++;
  322. return enc_char(e, '{');
  323. }
  324. static inline int
  325. enc_end_object(Encoder* e)
  326. {
  327. return enc_char(e, '}');
  328. }
  329. static inline int
  330. enc_start_array(Encoder* e)
  331. {
  332. e->count++;
  333. return enc_char(e, '[');
  334. }
  335. static inline int
  336. enc_end_array(Encoder* e)
  337. {
  338. return enc_char(e, ']');
  339. }
  340. static inline int
  341. enc_colon(Encoder* e)
  342. {
  343. return enc_char(e, ':');
  344. }
  345. static inline int
  346. enc_comma(Encoder* e)
  347. {
  348. return enc_char(e, ',');
  349. }
  350. ERL_NIF_TERM
  351. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  352. {
  353. Encoder enc;
  354. Encoder* e = &enc;
  355. ErlNifBinary bin;
  356. ERL_NIF_TERM ret;
  357. ERL_NIF_TERM stack;
  358. ERL_NIF_TERM curr;
  359. ERL_NIF_TERM item;
  360. const ERL_NIF_TERM* tuple;
  361. int arity;
  362. ErlNifSInt64 lval;
  363. double dval;
  364. if(argc != 2) {
  365. return enif_make_badarg(env);
  366. }
  367. if(!enc_init(e, env, argv[1], &bin)) {
  368. return enif_make_badarg(env);
  369. }
  370. stack = enif_make_list(env, 1, argv[0]);
  371. while(!enif_is_empty_list(env, stack)) {
  372. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  373. ret = enc_error(e, "internal_error");
  374. goto done;
  375. }
  376. if(enif_is_identical(curr, e->atoms->ref_object)) {
  377. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  378. ret = enc_error(e, "internal_error");
  379. goto done;
  380. }
  381. if(enif_is_empty_list(env, curr)) {
  382. if(!enc_end_object(e)) {
  383. ret = enc_error(e, "internal_error");
  384. goto done;
  385. }
  386. continue;
  387. }
  388. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  389. ret = enc_error(e, "internal_error");
  390. goto done;
  391. }
  392. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  393. ret = enc_error(e, "invalid_object_pair");
  394. goto done;
  395. }
  396. if(arity != 2) {
  397. ret = enc_error(e, "invalid_object_pair");
  398. goto done;
  399. }
  400. if(!enc_comma(e)) {
  401. ret = enc_error(e, "internal_error");
  402. goto done;
  403. }
  404. if(!enc_string(e, tuple[0])) {
  405. ret = enc_error(e, "invalid_object_key");
  406. goto done;
  407. }
  408. if(!enc_colon(e)) {
  409. ret = enc_error(e, "internal_error");
  410. goto done;
  411. }
  412. stack = enif_make_list_cell(env, curr, stack);
  413. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  414. stack = enif_make_list_cell(env, tuple[1], stack);
  415. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  416. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  417. ret = enc_error(e, "internal_error");
  418. goto done;
  419. }
  420. if(enif_is_empty_list(env, curr)) {
  421. if(!enc_end_array(e)) {
  422. ret = enc_error(e, "internal_error");
  423. goto done;
  424. }
  425. continue;
  426. }
  427. if(!enc_comma(e)) {
  428. ret = enc_error(e, "internal_error");
  429. goto done;
  430. }
  431. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  432. ret = enc_error(e, "internal_error");
  433. goto done;
  434. }
  435. stack = enif_make_list_cell(env, curr, stack);
  436. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  437. stack = enif_make_list_cell(env, item, stack);
  438. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  439. if(!enc_literal(e, "null", 4)) {
  440. ret = enc_error(e, "null");
  441. goto done;
  442. }
  443. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  444. if(!enc_literal(e, "true", 4)) {
  445. ret = enc_error(e, "true");
  446. goto done;
  447. }
  448. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  449. if(!enc_literal(e, "false", 5)) {
  450. ret = enc_error(e, "false");
  451. goto done;
  452. }
  453. } else if(enif_is_binary(env, curr)) {
  454. if(!enc_string(e, curr)) {
  455. ret = enc_error(e, "invalid_string");
  456. goto done;
  457. }
  458. } else if(enif_is_atom(env, curr)) {
  459. if(!enc_string(e, curr)) {
  460. ret = enc_error(e, "invalid_string");
  461. goto done;
  462. }
  463. } else if(enif_get_int64(env, curr, &lval)) {
  464. if(!enc_long(e, lval)) {
  465. ret = enc_error(e, "internal_error");
  466. goto done;
  467. }
  468. } else if(enif_get_double(env, curr, &dval)) {
  469. if(!enc_double(e, dval)) {
  470. ret = enc_error(e, "internal_error");
  471. goto done;
  472. }
  473. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  474. if(arity != 1) {
  475. ret = enc_error(e, "invalid_ejson");
  476. goto done;
  477. }
  478. if(!enif_is_list(env, tuple[0])) {
  479. ret = enc_error(e, "invalid_object");
  480. goto done;
  481. }
  482. if(!enc_start_object(e)) {
  483. ret = enc_error(e, "internal_error");
  484. goto done;
  485. }
  486. if(enif_is_empty_list(env, tuple[0])) {
  487. if(!enc_end_object(e)) {
  488. ret = enc_error(e, "internal_error");
  489. goto done;
  490. }
  491. continue;
  492. }
  493. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  494. ret = enc_error(e, "internal_error");
  495. goto done;
  496. }
  497. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  498. ret = enc_error(e, "invalid_object_member");
  499. goto done;
  500. }
  501. if(arity != 2) {
  502. ret = enc_error(e, "invalid_object_member_arity");
  503. goto done;
  504. }
  505. if(!enc_string(e, tuple[0])) {
  506. ret = enc_error(e, "invalid_object_member_key");
  507. goto done;
  508. }
  509. if(!enc_colon(e)) {
  510. ret = enc_error(e, "internal_error");
  511. goto done;
  512. }
  513. stack = enif_make_list_cell(env, curr, stack);
  514. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  515. stack = enif_make_list_cell(env, tuple[1], stack);
  516. } else if(enif_is_list(env, curr)) {
  517. if(!enc_start_array(e)) {
  518. ret = enc_error(e, "internal_error");
  519. goto done;
  520. }
  521. if(enif_is_empty_list(env, curr)) {
  522. if(!enc_end_array(e)) {
  523. ret = enc_error(e, "internal_error");
  524. goto done;
  525. }
  526. continue;
  527. }
  528. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  529. ret = enc_error(e, "internal_error");
  530. goto done;
  531. }
  532. stack = enif_make_list_cell(env, curr, stack);
  533. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  534. stack = enif_make_list_cell(env, item, stack);
  535. } else {
  536. if(!enc_unknown(e, curr)) {
  537. ret = enc_error(e, "internal_error");
  538. goto done;
  539. }
  540. }
  541. } while(!enif_is_empty_list(env, stack));
  542. if(!enc_done(e, &item)) {
  543. ret = enc_error(e, "internal_error");
  544. goto done;
  545. }
  546. if(e->iolen == 0) {
  547. ret = item;
  548. } else {
  549. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  550. }
  551. done:
  552. enc_destroy(e);
  553. return ret;
  554. }