You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

623 rivejä
15 KiB

  1. #include <assert.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include "erl_nif.h"
  5. #include "jiffy.h"
  6. #define BIN_INC_SIZE 1024
  7. typedef struct {
  8. ErlNifEnv* env;
  9. jiffy_st* atoms;
  10. int count;
  11. int iolen;
  12. ERL_NIF_TERM iolist;
  13. ErlNifBinary* curr;
  14. char* p;
  15. unsigned char* u;
  16. size_t i;
  17. } Encoder;
  18. int
  19. enc_init(Encoder* e, ErlNifEnv* env, ErlNifBinary* bin)
  20. {
  21. e->env = env;
  22. e->atoms = enif_priv_data(env);
  23. e->count = 0;
  24. e->iolen = 0;
  25. e->iolist = enif_make_list(env, 0);
  26. e->curr = bin;
  27. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  28. return 0;
  29. }
  30. memset(e->curr->data, 0, e->curr->size);
  31. e->p = (char*) e->curr->data;
  32. e->u = (unsigned char*) e->curr->data;
  33. e->i = 0;
  34. return 1;
  35. }
  36. void
  37. enc_destroy(Encoder* e)
  38. {
  39. if(e->curr != NULL) {
  40. enif_release_binary(e->curr);
  41. }
  42. }
  43. ERL_NIF_TERM
  44. enc_error(Encoder* e, const char* msg)
  45. {
  46. //assert(0 && msg);
  47. return make_error(e->atoms, e->env, msg);
  48. }
  49. int
  50. enc_ensure(Encoder* e, size_t req)
  51. {
  52. size_t new_sz;
  53. if(req < e->curr->size - e->i) {
  54. return 1;
  55. }
  56. new_sz = req - (e->curr->size - e->i) + e->curr->size;
  57. new_sz += BIN_INC_SIZE - (new_sz % BIN_INC_SIZE);
  58. assert(new_sz > e->curr->size && "Invalid size calculation.");
  59. if(!enif_realloc_binary(e->curr, new_sz)) {
  60. return 0;
  61. }
  62. e->p = (char*) e->curr->data;
  63. e->u = (unsigned char*) e->curr->data;
  64. memset(&(e->u[e->i]), 0, e->curr->size - e->i);
  65. return 1;
  66. }
  67. int
  68. enc_result(Encoder* e, ERL_NIF_TERM* value)
  69. {
  70. if(e->i != e->curr->size) {
  71. if(!enif_realloc_binary(e->curr, e->i)) {
  72. return 0;
  73. }
  74. }
  75. *value = enif_make_binary(e->env, e->curr);
  76. e->curr = NULL;
  77. return 1;
  78. }
  79. int
  80. enc_done(Encoder* e, ERL_NIF_TERM* value)
  81. {
  82. ERL_NIF_TERM last;
  83. if(e->iolen == 0) {
  84. return enc_result(e, value);
  85. }
  86. if(e->i > 0 ) {
  87. if(!enc_result(e, &last)) {
  88. return 0;
  89. }
  90. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  91. e->iolen++;
  92. }
  93. *value = e->iolist;
  94. return 1;
  95. }
  96. int
  97. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  98. {
  99. ErlNifBinary* bin = e->curr;
  100. ERL_NIF_TERM curr;
  101. if(e->i > 0) {
  102. if(!enc_result(e, &curr)) {
  103. return 0;
  104. }
  105. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  106. e->iolen++;
  107. }
  108. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  109. e->iolen++;
  110. // Reinitialize our binary for the next buffer.
  111. e->curr = bin;
  112. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  113. return 0;
  114. }
  115. memset(e->curr->data, 0, e->curr->size);
  116. e->p = (char*) e->curr->data;
  117. e->u = (unsigned char*) e->curr->data;
  118. e->i = 0;
  119. return 1;
  120. }
  121. int
  122. enc_literal(Encoder* e, const char* literal, size_t len)
  123. {
  124. if(!enc_ensure(e, len)) {
  125. return 0;
  126. }
  127. memcpy(&(e->p[e->i]), literal, len);
  128. e->i += len;
  129. e->count++;
  130. return 1;
  131. }
  132. int
  133. enc_string(Encoder* e, ERL_NIF_TERM val)
  134. {
  135. ErlNifBinary bin;
  136. char atom[512];
  137. unsigned char* data;
  138. size_t size;
  139. int esc_extra = 0;
  140. int ulen;
  141. int ui;
  142. int i;
  143. if(enif_is_binary(e->env, val)) {
  144. if(!enif_inspect_binary(e->env, val, &bin)) {
  145. return 0;
  146. }
  147. data = bin.data;
  148. size = bin.size;
  149. } else if(enif_is_atom(e->env, val)) {
  150. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  151. return 0;
  152. }
  153. data = (unsigned char*) atom;
  154. size = strlen(atom);
  155. } else {
  156. return 0;
  157. }
  158. i = 0;
  159. while(i < size) {
  160. switch((char) data[i]) {
  161. case '\"':
  162. case '\\':
  163. case '/':
  164. case '\b':
  165. case '\f':
  166. case '\n':
  167. case '\r':
  168. case '\t':
  169. esc_extra += 1;
  170. i++;
  171. continue;
  172. default:
  173. if(data[i] < 0x20) {
  174. esc_extra += 5;
  175. i++;
  176. continue;
  177. } else if(data[i] < 0x80) {
  178. i++;
  179. continue;
  180. }
  181. ulen = -1;
  182. if((data[i] & 0xE0) == 0xC0) {
  183. ulen = 1;
  184. } else if((data[i] & 0xF0) == 0xE0) {
  185. ulen = 2;
  186. } else if((data[i] & 0xF8) == 0xF0) {
  187. ulen = 3;
  188. } else if((data[i] & 0xFC) == 0xF8) {
  189. ulen = 4;
  190. } else if((data[i] & 0xFE) == 0xFC) {
  191. ulen = 5;
  192. }
  193. if(ulen < 0) {
  194. return 0;
  195. }
  196. if(i+1+ulen > size) {
  197. return 0;
  198. }
  199. for(ui = 0; ui < ulen; ui++) {
  200. if((data[i+1+ui] & 0xC0) != 0x80) {
  201. return 0;
  202. }
  203. }
  204. if(ulen == 1) {
  205. if((data[i] & 0x1E) == 0)
  206. return 0;
  207. } else if(ulen == 2) {
  208. if((data[i] & 0x0F) + (data[i+1] & 0x20) == 0)
  209. return 0;
  210. } else if(ulen == 3) {
  211. if((data[i] & 0x07) + (data[i+1] & 0x30) == 0)
  212. return 0;
  213. } else if(ulen == 4) {
  214. if((data[i] & 0x03) + (data[i+1] & 0x38) == 0)
  215. return 0;
  216. } else if(ulen == 5) {
  217. if((data[i] & 0x01) + (data[i+1] & 0x3C) == 0)
  218. return 0;
  219. }
  220. i += 1 + ulen;
  221. }
  222. }
  223. if(!enc_ensure(e, size + esc_extra + 2)) {
  224. return 0;
  225. }
  226. e->p[e->i++] = '\"';
  227. i = 0;
  228. while(i < size) {
  229. switch((char) data[i]) {
  230. case '\"':
  231. case '\\':
  232. case '/':
  233. e->p[e->i++] = '\\';
  234. e->u[e->i++] = data[i];
  235. i++;
  236. continue;
  237. case '\b':
  238. e->p[e->i++] = '\\';
  239. e->p[e->i++] = 'b';
  240. i++;
  241. continue;
  242. case '\f':
  243. e->p[e->i++] = '\\';
  244. e->p[e->i++] = 'f';
  245. i++;
  246. continue;
  247. case '\n':
  248. e->p[e->i++] = '\\';
  249. e->p[e->i++] = 'n';
  250. i++;
  251. continue;
  252. case '\r':
  253. e->p[e->i++] = '\\';
  254. e->p[e->i++] = 'r';
  255. i++;
  256. continue;
  257. case '\t':
  258. e->p[e->i++] = '\\';
  259. e->p[e->i++] = 't';
  260. i++;
  261. continue;
  262. default:
  263. if(data[i] < 0x20) {
  264. e->p[e->i++] = '\\';
  265. e->p[e->i++] = 'u';
  266. if(!int_to_hex(data[i], &(e->p[e->i]))) {
  267. return 0;
  268. }
  269. e->i += 4;
  270. i++;
  271. } else {
  272. e->u[e->i++] = data[i++];
  273. }
  274. }
  275. }
  276. e->p[e->i++] = '\"';
  277. e->count++;
  278. return 1;
  279. }
  280. int
  281. enc_long(Encoder* e, long val)
  282. {
  283. if(!enc_ensure(e, 32)) {
  284. return 0;
  285. }
  286. snprintf(&(e->p[e->i]), 32, "%ld", val);
  287. e->i += strlen(&(e->p[e->i]));
  288. e->count++;
  289. return 1;
  290. }
  291. int
  292. enc_double(Encoder* e, double val)
  293. {
  294. if(!enc_ensure(e, 32)) {
  295. return 0;
  296. }
  297. snprintf(&(e->p[e->i]), 31, "%0.20g", val);
  298. e->i += strlen(&(e->p[e->i]));
  299. e->count++;
  300. return 1;
  301. }
  302. int
  303. enc_char(Encoder* e, char c)
  304. {
  305. if(!enc_ensure(e, 1)) {
  306. return 0;
  307. }
  308. e->p[e->i++] = c;
  309. return 1;
  310. }
  311. int
  312. enc_start_object(Encoder* e)
  313. {
  314. e->count++;
  315. return enc_char(e, '{');
  316. }
  317. int
  318. enc_end_object(Encoder* e)
  319. {
  320. return enc_char(e, '}');
  321. }
  322. int
  323. enc_start_array(Encoder* e)
  324. {
  325. e->count++;
  326. return enc_char(e, '[');
  327. }
  328. int
  329. enc_end_array(Encoder* e)
  330. {
  331. return enc_char(e, ']');
  332. }
  333. int
  334. enc_colon(Encoder* e)
  335. {
  336. return enc_char(e, ':');
  337. }
  338. int
  339. enc_comma(Encoder* e)
  340. {
  341. return enc_char(e, ',');
  342. }
  343. ERL_NIF_TERM
  344. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  345. {
  346. Encoder enc;
  347. Encoder* e = &enc;
  348. ErlNifBinary bin;
  349. ERL_NIF_TERM ret;
  350. ERL_NIF_TERM stack;
  351. ERL_NIF_TERM curr;
  352. ERL_NIF_TERM item;
  353. const ERL_NIF_TERM* tuple;
  354. int arity;
  355. double dval;
  356. long lval;
  357. int is_partial = 0;
  358. if(argc != 1) {
  359. return enif_make_badarg(env);
  360. }
  361. if(!enc_init(e, env, &bin)) {
  362. return enif_make_badarg(env);
  363. }
  364. stack = enif_make_list(env, 1, argv[0]);
  365. while(!enif_is_empty_list(env, stack)) {
  366. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  367. ret = enc_error(e, "internal_error");
  368. goto done;
  369. }
  370. if(enif_is_identical(curr, e->atoms->ref_object)) {
  371. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  372. ret = enc_error(e, "internal_error");
  373. goto done;
  374. }
  375. if(enif_is_empty_list(env, curr)) {
  376. if(!enc_end_object(e)) {
  377. ret = enc_error(e, "internal_error");
  378. goto done;
  379. }
  380. continue;
  381. }
  382. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  383. ret = enc_error(e, "internal_error");
  384. goto done;
  385. }
  386. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  387. ret = enc_error(e, "invalid_object_pair");
  388. goto done;
  389. }
  390. if(arity != 2) {
  391. ret = enc_error(e, "invalid_object_pair");
  392. goto done;
  393. }
  394. if(!enc_comma(e)) {
  395. ret = enc_error(e, "internal_error");
  396. goto done;
  397. }
  398. if(!enc_string(e, tuple[0])) {
  399. ret = enc_error(e, "invalid_object_key");
  400. goto done;
  401. }
  402. if(!enc_colon(e)) {
  403. ret = enc_error(e, "internal_error");
  404. goto done;
  405. }
  406. stack = enif_make_list_cell(env, curr, stack);
  407. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  408. stack = enif_make_list_cell(env, tuple[1], stack);
  409. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  410. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  411. ret = enc_error(e, "internal_error");
  412. goto done;
  413. }
  414. if(enif_is_empty_list(env, curr)) {
  415. if(!enc_end_array(e)) {
  416. ret = enc_error(e, "internal_error");
  417. goto done;
  418. }
  419. continue;
  420. }
  421. if(!enc_comma(e)) {
  422. ret = enc_error(e, "internal_error");
  423. goto done;
  424. }
  425. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  426. ret = enc_error(e, "internal_error");
  427. goto done;
  428. }
  429. stack = enif_make_list_cell(env, curr, stack);
  430. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  431. stack = enif_make_list_cell(env, item, stack);
  432. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  433. if(!enc_literal(e, "null", 4)) {
  434. ret = enc_error(e, "null");
  435. goto done;
  436. }
  437. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  438. if(!enc_literal(e, "true", 4)) {
  439. ret = enc_error(e, "true");
  440. goto done;
  441. }
  442. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  443. if(!enc_literal(e, "false", 5)) {
  444. ret = enc_error(e, "false");
  445. goto done;
  446. }
  447. } else if(enif_is_binary(env, curr)) {
  448. if(!enc_string(e, curr)) {
  449. ret = enc_error(e, "invalid_string");
  450. goto done;
  451. }
  452. } else if(enif_is_atom(env, curr)) {
  453. if(!enc_string(e, curr)) {
  454. ret = enc_error(e, "invalid_string");
  455. goto done;
  456. }
  457. } else if(enif_get_int64(env, curr, &lval)) {
  458. if(!enc_long(e, lval)) {
  459. ret = enc_error(e, "internal_error");
  460. goto done;
  461. }
  462. } else if(enif_get_double(env, curr, &dval)) {
  463. if(!enc_double(e, dval)) {
  464. ret = enc_error(e, "internal_error");
  465. goto done;
  466. }
  467. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  468. if(arity != 1) {
  469. ret = enc_error(e, "invalid_ejson");
  470. goto done;
  471. }
  472. if(!enif_is_list(env, tuple[0])) {
  473. ret = enc_error(e, "invalid_object");
  474. goto done;
  475. }
  476. if(!enc_start_object(e)) {
  477. ret = enc_error(e, "internal_error");
  478. goto done;
  479. }
  480. if(enif_is_empty_list(env, tuple[0])) {
  481. if(!enc_end_object(e)) {
  482. ret = enc_error(e, "internal_error");
  483. goto done;
  484. }
  485. continue;
  486. }
  487. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  488. ret = enc_error(e, "internal_error");
  489. goto done;
  490. }
  491. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  492. ret = enc_error(e, "invalid_object_pair");
  493. goto done;
  494. }
  495. if(arity != 2) {
  496. ret = enc_error(e, "invalid_object_pair");
  497. goto done;
  498. }
  499. if(!enc_string(e, tuple[0])) {
  500. ret = enc_error(e, "invalid_object_key");
  501. goto done;
  502. }
  503. if(!enc_colon(e)) {
  504. ret = enc_error(e, "internal_error");
  505. goto done;
  506. }
  507. stack = enif_make_list_cell(env, curr, stack);
  508. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  509. stack = enif_make_list_cell(env, tuple[1], stack);
  510. } else if(enif_is_list(env, curr)) {
  511. if(!enc_start_array(e)) {
  512. ret = enc_error(e, "internal_error");
  513. goto done;
  514. }
  515. if(enif_is_empty_list(env, curr)) {
  516. if(!enc_end_array(e)) {
  517. ret = enc_error(e, "internal_error");
  518. goto done;
  519. }
  520. continue;
  521. }
  522. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  523. ret = enc_error(e, "internal_error");
  524. goto done;
  525. }
  526. stack = enif_make_list_cell(env, curr, stack);
  527. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  528. stack = enif_make_list_cell(env, item, stack);
  529. } else {
  530. is_partial = 1;
  531. if(!enc_unknown(e, curr)) {
  532. ret = enc_error(e, "internal_error");
  533. goto done;
  534. }
  535. }
  536. } while(!enif_is_empty_list(env, stack));
  537. if(!enc_done(e, &item)) {
  538. ret = enc_error(e, "internal_error");
  539. goto done;
  540. }
  541. if(!is_partial) {
  542. ret = enif_make_tuple2(env, e->atoms->atom_ok, item);
  543. } else {
  544. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  545. }
  546. done:
  547. enc_destroy(e);
  548. return ret;
  549. }