You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

623 lines
16 KiB

  1. // This file is part of Jiffy released under the MIT license.
  2. // See the LICENSE file for more information.
  3. #include <assert.h>
  4. #include <stdio.h>
  5. #include <string.h>
  6. #include "erl_nif.h"
  7. #include "jiffy.h"
  8. #define BIN_INC_SIZE 1024
  9. typedef struct {
  10. ErlNifEnv* env;
  11. jiffy_st* atoms;
  12. int count;
  13. int iolen;
  14. ERL_NIF_TERM iolist;
  15. ErlNifBinary* curr;
  16. char* p;
  17. unsigned char* u;
  18. size_t i;
  19. } Encoder;
  20. int
  21. enc_init(Encoder* e, ErlNifEnv* env, ErlNifBinary* bin)
  22. {
  23. e->env = env;
  24. e->atoms = enif_priv_data(env);
  25. e->count = 0;
  26. e->iolen = 0;
  27. e->iolist = enif_make_list(env, 0);
  28. e->curr = bin;
  29. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  30. return 0;
  31. }
  32. memset(e->curr->data, 0, e->curr->size);
  33. e->p = (char*) e->curr->data;
  34. e->u = (unsigned char*) e->curr->data;
  35. e->i = 0;
  36. return 1;
  37. }
  38. void
  39. enc_destroy(Encoder* e)
  40. {
  41. if(e->curr != NULL) {
  42. enif_release_binary(e->curr);
  43. }
  44. }
  45. ERL_NIF_TERM
  46. enc_error(Encoder* e, const char* msg)
  47. {
  48. //assert(0 && msg);
  49. return make_error(e->atoms, e->env, msg);
  50. }
  51. int
  52. enc_ensure(Encoder* e, size_t req)
  53. {
  54. size_t new_sz;
  55. if(req < e->curr->size - e->i) {
  56. return 1;
  57. }
  58. new_sz = req - (e->curr->size - e->i) + e->curr->size;
  59. new_sz += BIN_INC_SIZE - (new_sz % BIN_INC_SIZE);
  60. assert(new_sz > e->curr->size && "Invalid size calculation.");
  61. if(!enif_realloc_binary(e->curr, new_sz)) {
  62. return 0;
  63. }
  64. e->p = (char*) e->curr->data;
  65. e->u = (unsigned char*) e->curr->data;
  66. memset(&(e->u[e->i]), 0, e->curr->size - e->i);
  67. return 1;
  68. }
  69. int
  70. enc_result(Encoder* e, ERL_NIF_TERM* value)
  71. {
  72. if(e->i != e->curr->size) {
  73. if(!enif_realloc_binary(e->curr, e->i)) {
  74. return 0;
  75. }
  76. }
  77. *value = enif_make_binary(e->env, e->curr);
  78. e->curr = NULL;
  79. return 1;
  80. }
  81. int
  82. enc_done(Encoder* e, ERL_NIF_TERM* value)
  83. {
  84. ERL_NIF_TERM last;
  85. if(e->iolen == 0) {
  86. return enc_result(e, value);
  87. }
  88. if(e->i > 0 ) {
  89. if(!enc_result(e, &last)) {
  90. return 0;
  91. }
  92. e->iolist = enif_make_list_cell(e->env, last, e->iolist);
  93. e->iolen++;
  94. }
  95. *value = e->iolist;
  96. return 1;
  97. }
  98. int
  99. enc_unknown(Encoder* e, ERL_NIF_TERM value)
  100. {
  101. ErlNifBinary* bin = e->curr;
  102. ERL_NIF_TERM curr;
  103. if(e->i > 0) {
  104. if(!enc_result(e, &curr)) {
  105. return 0;
  106. }
  107. e->iolist = enif_make_list_cell(e->env, curr, e->iolist);
  108. e->iolen++;
  109. }
  110. e->iolist = enif_make_list_cell(e->env, value, e->iolist);
  111. e->iolen++;
  112. // Reinitialize our binary for the next buffer.
  113. e->curr = bin;
  114. if(!enif_alloc_binary(BIN_INC_SIZE, e->curr)) {
  115. return 0;
  116. }
  117. memset(e->curr->data, 0, e->curr->size);
  118. e->p = (char*) e->curr->data;
  119. e->u = (unsigned char*) e->curr->data;
  120. e->i = 0;
  121. return 1;
  122. }
  123. int
  124. enc_literal(Encoder* e, const char* literal, size_t len)
  125. {
  126. if(!enc_ensure(e, len)) {
  127. return 0;
  128. }
  129. memcpy(&(e->p[e->i]), literal, len);
  130. e->i += len;
  131. e->count++;
  132. return 1;
  133. }
  134. int
  135. enc_string(Encoder* e, ERL_NIF_TERM val)
  136. {
  137. ErlNifBinary bin;
  138. char atom[512];
  139. unsigned char* data;
  140. size_t size;
  141. int esc_extra = 0;
  142. int ulen;
  143. int ui;
  144. int i;
  145. if(enif_is_binary(e->env, val)) {
  146. if(!enif_inspect_binary(e->env, val, &bin)) {
  147. return 0;
  148. }
  149. data = bin.data;
  150. size = bin.size;
  151. } else if(enif_is_atom(e->env, val)) {
  152. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  153. return 0;
  154. }
  155. data = (unsigned char*) atom;
  156. size = strlen(atom);
  157. } else {
  158. return 0;
  159. }
  160. i = 0;
  161. while(i < size) {
  162. switch((char) data[i]) {
  163. case '\"':
  164. case '\\':
  165. case '/':
  166. case '\b':
  167. case '\f':
  168. case '\n':
  169. case '\r':
  170. case '\t':
  171. esc_extra += 1;
  172. i++;
  173. continue;
  174. default:
  175. if(data[i] < 0x20) {
  176. esc_extra += 5;
  177. i++;
  178. continue;
  179. } else if(data[i] < 0x80) {
  180. i++;
  181. continue;
  182. }
  183. ulen = -1;
  184. if((data[i] & 0xE0) == 0xC0) {
  185. ulen = 1;
  186. } else if((data[i] & 0xF0) == 0xE0) {
  187. ulen = 2;
  188. } else if((data[i] & 0xF8) == 0xF0) {
  189. ulen = 3;
  190. } else if((data[i] & 0xFC) == 0xF8) {
  191. ulen = 4;
  192. } else if((data[i] & 0xFE) == 0xFC) {
  193. ulen = 5;
  194. }
  195. if(ulen < 0) {
  196. return 0;
  197. }
  198. if(i+1+ulen > size) {
  199. return 0;
  200. }
  201. for(ui = 0; ui < ulen; ui++) {
  202. if((data[i+1+ui] & 0xC0) != 0x80) {
  203. return 0;
  204. }
  205. }
  206. if(ulen == 1) {
  207. if((data[i] & 0x1E) == 0)
  208. return 0;
  209. } else if(ulen == 2) {
  210. if((data[i] & 0x0F) + (data[i+1] & 0x20) == 0)
  211. return 0;
  212. } else if(ulen == 3) {
  213. if((data[i] & 0x07) + (data[i+1] & 0x30) == 0)
  214. return 0;
  215. } else if(ulen == 4) {
  216. if((data[i] & 0x03) + (data[i+1] & 0x38) == 0)
  217. return 0;
  218. } else if(ulen == 5) {
  219. if((data[i] & 0x01) + (data[i+1] & 0x3C) == 0)
  220. return 0;
  221. }
  222. i += 1 + ulen;
  223. }
  224. }
  225. if(!enc_ensure(e, size + esc_extra + 2)) {
  226. return 0;
  227. }
  228. e->p[e->i++] = '\"';
  229. i = 0;
  230. while(i < size) {
  231. switch((char) data[i]) {
  232. case '\"':
  233. case '\\':
  234. case '/':
  235. e->p[e->i++] = '\\';
  236. e->u[e->i++] = data[i];
  237. i++;
  238. continue;
  239. case '\b':
  240. e->p[e->i++] = '\\';
  241. e->p[e->i++] = 'b';
  242. i++;
  243. continue;
  244. case '\f':
  245. e->p[e->i++] = '\\';
  246. e->p[e->i++] = 'f';
  247. i++;
  248. continue;
  249. case '\n':
  250. e->p[e->i++] = '\\';
  251. e->p[e->i++] = 'n';
  252. i++;
  253. continue;
  254. case '\r':
  255. e->p[e->i++] = '\\';
  256. e->p[e->i++] = 'r';
  257. i++;
  258. continue;
  259. case '\t':
  260. e->p[e->i++] = '\\';
  261. e->p[e->i++] = 't';
  262. i++;
  263. continue;
  264. default:
  265. if(data[i] < 0x20) {
  266. e->p[e->i++] = '\\';
  267. e->p[e->i++] = 'u';
  268. if(!int_to_hex(data[i], &(e->p[e->i]))) {
  269. return 0;
  270. }
  271. e->i += 4;
  272. i++;
  273. } else {
  274. e->u[e->i++] = data[i++];
  275. }
  276. }
  277. }
  278. e->p[e->i++] = '\"';
  279. e->count++;
  280. return 1;
  281. }
  282. int
  283. enc_long(Encoder* e, long val)
  284. {
  285. if(!enc_ensure(e, 32)) {
  286. return 0;
  287. }
  288. snprintf(&(e->p[e->i]), 32, "%ld", val);
  289. e->i += strlen(&(e->p[e->i]));
  290. e->count++;
  291. return 1;
  292. }
  293. int
  294. enc_double(Encoder* e, double val)
  295. {
  296. if(!enc_ensure(e, 32)) {
  297. return 0;
  298. }
  299. snprintf(&(e->p[e->i]), 31, "%0.20g", val);
  300. e->i += strlen(&(e->p[e->i]));
  301. e->count++;
  302. return 1;
  303. }
  304. int
  305. enc_char(Encoder* e, char c)
  306. {
  307. if(!enc_ensure(e, 1)) {
  308. return 0;
  309. }
  310. e->p[e->i++] = c;
  311. return 1;
  312. }
  313. int
  314. enc_start_object(Encoder* e)
  315. {
  316. e->count++;
  317. return enc_char(e, '{');
  318. }
  319. int
  320. enc_end_object(Encoder* e)
  321. {
  322. return enc_char(e, '}');
  323. }
  324. int
  325. enc_start_array(Encoder* e)
  326. {
  327. e->count++;
  328. return enc_char(e, '[');
  329. }
  330. int
  331. enc_end_array(Encoder* e)
  332. {
  333. return enc_char(e, ']');
  334. }
  335. int
  336. enc_colon(Encoder* e)
  337. {
  338. return enc_char(e, ':');
  339. }
  340. int
  341. enc_comma(Encoder* e)
  342. {
  343. return enc_char(e, ',');
  344. }
  345. ERL_NIF_TERM
  346. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  347. {
  348. Encoder enc;
  349. Encoder* e = &enc;
  350. ErlNifBinary bin;
  351. ERL_NIF_TERM ret;
  352. ERL_NIF_TERM stack;
  353. ERL_NIF_TERM curr;
  354. ERL_NIF_TERM item;
  355. const ERL_NIF_TERM* tuple;
  356. int arity;
  357. double dval;
  358. long lval;
  359. if(argc != 1) {
  360. return enif_make_badarg(env);
  361. }
  362. if(!enc_init(e, env, &bin)) {
  363. return enif_make_badarg(env);
  364. }
  365. stack = enif_make_list(env, 1, argv[0]);
  366. while(!enif_is_empty_list(env, stack)) {
  367. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  368. ret = enc_error(e, "internal_error");
  369. goto done;
  370. }
  371. if(enif_is_identical(curr, e->atoms->ref_object)) {
  372. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  373. ret = enc_error(e, "internal_error");
  374. goto done;
  375. }
  376. if(enif_is_empty_list(env, curr)) {
  377. if(!enc_end_object(e)) {
  378. ret = enc_error(e, "internal_error");
  379. goto done;
  380. }
  381. continue;
  382. }
  383. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  384. ret = enc_error(e, "internal_error");
  385. goto done;
  386. }
  387. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  388. ret = enc_error(e, "invalid_object_pair");
  389. goto done;
  390. }
  391. if(arity != 2) {
  392. ret = enc_error(e, "invalid_object_pair");
  393. goto done;
  394. }
  395. if(!enc_comma(e)) {
  396. ret = enc_error(e, "internal_error");
  397. goto done;
  398. }
  399. if(!enc_string(e, tuple[0])) {
  400. ret = enc_error(e, "invalid_object_key");
  401. goto done;
  402. }
  403. if(!enc_colon(e)) {
  404. ret = enc_error(e, "internal_error");
  405. goto done;
  406. }
  407. stack = enif_make_list_cell(env, curr, stack);
  408. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  409. stack = enif_make_list_cell(env, tuple[1], stack);
  410. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  411. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  412. ret = enc_error(e, "internal_error");
  413. goto done;
  414. }
  415. if(enif_is_empty_list(env, curr)) {
  416. if(!enc_end_array(e)) {
  417. ret = enc_error(e, "internal_error");
  418. goto done;
  419. }
  420. continue;
  421. }
  422. if(!enc_comma(e)) {
  423. ret = enc_error(e, "internal_error");
  424. goto done;
  425. }
  426. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  427. ret = enc_error(e, "internal_error");
  428. goto done;
  429. }
  430. stack = enif_make_list_cell(env, curr, stack);
  431. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  432. stack = enif_make_list_cell(env, item, stack);
  433. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  434. if(!enc_literal(e, "null", 4)) {
  435. ret = enc_error(e, "null");
  436. goto done;
  437. }
  438. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  439. if(!enc_literal(e, "true", 4)) {
  440. ret = enc_error(e, "true");
  441. goto done;
  442. }
  443. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  444. if(!enc_literal(e, "false", 5)) {
  445. ret = enc_error(e, "false");
  446. goto done;
  447. }
  448. } else if(enif_is_binary(env, curr)) {
  449. if(!enc_string(e, curr)) {
  450. ret = enc_error(e, "invalid_string");
  451. goto done;
  452. }
  453. } else if(enif_is_atom(env, curr)) {
  454. if(!enc_string(e, curr)) {
  455. ret = enc_error(e, "invalid_string");
  456. goto done;
  457. }
  458. } else if(enif_get_int64(env, curr, &lval)) {
  459. if(!enc_long(e, lval)) {
  460. ret = enc_error(e, "internal_error");
  461. goto done;
  462. }
  463. } else if(enif_get_double(env, curr, &dval)) {
  464. if(!enc_double(e, dval)) {
  465. ret = enc_error(e, "internal_error");
  466. goto done;
  467. }
  468. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  469. if(arity != 1) {
  470. ret = enc_error(e, "invalid_ejson");
  471. goto done;
  472. }
  473. if(!enif_is_list(env, tuple[0])) {
  474. ret = enc_error(e, "invalid_object");
  475. goto done;
  476. }
  477. if(!enc_start_object(e)) {
  478. ret = enc_error(e, "internal_error");
  479. goto done;
  480. }
  481. if(enif_is_empty_list(env, tuple[0])) {
  482. if(!enc_end_object(e)) {
  483. ret = enc_error(e, "internal_error");
  484. goto done;
  485. }
  486. continue;
  487. }
  488. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  489. ret = enc_error(e, "internal_error");
  490. goto done;
  491. }
  492. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  493. ret = enc_error(e, "invalid_object_member");
  494. goto done;
  495. }
  496. if(arity != 2) {
  497. ret = enc_error(e, "invalid_object_member_arity");
  498. goto done;
  499. }
  500. if(!enc_string(e, tuple[0])) {
  501. ret = enc_error(e, "invalid_object_member_key");
  502. goto done;
  503. }
  504. if(!enc_colon(e)) {
  505. ret = enc_error(e, "internal_error");
  506. goto done;
  507. }
  508. stack = enif_make_list_cell(env, curr, stack);
  509. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  510. stack = enif_make_list_cell(env, tuple[1], stack);
  511. } else if(enif_is_list(env, curr)) {
  512. if(!enc_start_array(e)) {
  513. ret = enc_error(e, "internal_error");
  514. goto done;
  515. }
  516. if(enif_is_empty_list(env, curr)) {
  517. if(!enc_end_array(e)) {
  518. ret = enc_error(e, "internal_error");
  519. goto done;
  520. }
  521. continue;
  522. }
  523. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  524. ret = enc_error(e, "internal_error");
  525. goto done;
  526. }
  527. stack = enif_make_list_cell(env, curr, stack);
  528. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  529. stack = enif_make_list_cell(env, item, stack);
  530. } else {
  531. if(!enc_unknown(e, curr)) {
  532. ret = enc_error(e, "internal_error");
  533. goto done;
  534. }
  535. }
  536. } while(!enif_is_empty_list(env, stack));
  537. if(!enc_done(e, &item)) {
  538. ret = enc_error(e, "internal_error");
  539. goto done;
  540. }
  541. if(e->iolen == 0) {
  542. ret = item;
  543. } else {
  544. ret = enif_make_tuple2(env, e->atoms->atom_partial, item);
  545. }
  546. done:
  547. enc_destroy(e);
  548. return ret;
  549. }