You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

556 lines
14 KiB

  1. #include <assert.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include "erl_nif.h"
  5. #include "jiffy.h"
  6. #define BIN_INC_SIZE 1024
  7. typedef struct {
  8. ErlNifEnv* env;
  9. jiffy_st* atoms;
  10. int count;
  11. ERL_NIF_TERM iolist;
  12. ErlNifBinary curr;
  13. int cleared;
  14. char* p;
  15. unsigned char* u;
  16. size_t i;
  17. } Encoder;
  18. int
  19. enc_init(Encoder* e, ErlNifEnv* env)
  20. {
  21. e->env = env;
  22. e->atoms = enif_priv_data(env);
  23. e->count = 0;
  24. e->iolist = enif_make_list(env, 0);
  25. if(!enif_alloc_binary(BIN_INC_SIZE, &(e->curr))) {
  26. return 0;
  27. }
  28. e->cleared = 0;
  29. e->p = (char*) e->curr.data;
  30. e->u = (unsigned char*) e->curr.data;
  31. e->i = 0;
  32. return 1;
  33. }
  34. void
  35. enc_destroy(Encoder* e)
  36. {
  37. if(!e->cleared) {
  38. enif_release_binary(&(e->curr));
  39. }
  40. }
  41. ERL_NIF_TERM
  42. enc_error(Encoder* e, const char* msg)
  43. {
  44. assert(0 && msg);
  45. return make_error(e->atoms, e->env, msg);
  46. }
  47. int
  48. enc_result(Encoder* e, ERL_NIF_TERM* value)
  49. {
  50. if(e->i != e->curr.size) {
  51. if(!enif_realloc_binary(&(e->curr), e->i)) {
  52. return 0;
  53. }
  54. }
  55. *value = enif_make_binary(e->env, &(e->curr));
  56. e->cleared = 1;
  57. return 1;
  58. }
  59. int
  60. enc_ensure(Encoder* e, size_t req)
  61. {
  62. size_t new_sz;
  63. if(req < e->curr.size - e->i) {
  64. return 1;
  65. }
  66. new_sz = req - (e->curr.size - e->i);
  67. new_sz += BIN_INC_SIZE - (new_sz % BIN_INC_SIZE);
  68. assert(new_sz % BIN_INC_SIZE == 0 && "Invalid modulo math.");
  69. if(!enif_realloc_binary(&(e->curr), new_sz)) {
  70. return 0;
  71. }
  72. memset(&(e->u[e->i]), 0, e->curr.size - e->i);
  73. return 1;
  74. }
  75. int
  76. enc_literal(Encoder* e, const char* literal, size_t len)
  77. {
  78. if(!enc_ensure(e, len)) {
  79. return 0;
  80. }
  81. memcpy(&(e->p[e->i]), literal, len);
  82. e->i += len;
  83. e->count++;
  84. return 1;
  85. }
  86. int
  87. enc_string(Encoder* e, ERL_NIF_TERM val)
  88. {
  89. ErlNifBinary bin;
  90. char atom[512];
  91. int esc_extra = 0;
  92. int ulen;
  93. int ui;
  94. int i;
  95. if(enif_is_binary(e->env, val)) {
  96. if(!enif_inspect_binary(e->env, val, &bin)) {
  97. return 0;
  98. }
  99. } else if(enif_is_atom(e->env, val)) {
  100. if(!enif_get_atom(e->env, val, atom, 512, ERL_NIF_LATIN1)) {
  101. return 0;
  102. }
  103. // Fake as a binary for code below.
  104. bin.data = (unsigned char*) atom;
  105. bin.size = strlen(atom);
  106. } else {
  107. return 0;
  108. }
  109. i = 0;
  110. while(i < bin.size) {
  111. switch((char) bin.data[i]) {
  112. case '\"':
  113. case '\\':
  114. case '/':
  115. case '\b':
  116. case '\f':
  117. case '\n':
  118. case '\r':
  119. case '\t':
  120. esc_extra += 1;
  121. i++;
  122. continue;
  123. default:
  124. if(bin.data[i] < 0x20) {
  125. esc_extra += 5;
  126. i++;
  127. continue;
  128. } else if(bin.data[i] < 0x80) {
  129. i++;
  130. continue;
  131. }
  132. ulen = -1;
  133. if((bin.data[i] & 0xE0) == 0xC0) {
  134. ulen = 1;
  135. } else if((bin.data[i] & 0xF0) == 0xE0) {
  136. ulen = 2;
  137. } else if((bin.data[i] & 0xF8) == 0xF0) {
  138. ulen = 3;
  139. } else if((bin.data[i] & 0xFC) == 0xF8) {
  140. ulen = 4;
  141. } else if((bin.data[i] & 0xFE) == 0xFC) {
  142. ulen = 5;
  143. }
  144. if(ulen < 0) {
  145. return 0;
  146. }
  147. if(i+1+ulen > bin.size) {
  148. return 0;
  149. }
  150. for(ui = 0; ui < ulen; ui++) {
  151. if((bin.data[i+1+ui] & 0xC0) != 0x80) {
  152. return 0;
  153. }
  154. }
  155. if(ulen == 1) {
  156. if((bin.data[i] & 0x1E) == 0)
  157. return 0;
  158. } else if(ulen == 2) {
  159. if((bin.data[i] & 0x0F) + (bin.data[i+1] & 0x20) == 0)
  160. return 0;
  161. } else if(ulen == 3) {
  162. if((bin.data[i] & 0x07) + (bin.data[i+1] & 0x30) == 0)
  163. return 0;
  164. } else if(ulen == 4) {
  165. if((bin.data[i] & 0x03) + (bin.data[i+1] & 0x38) == 0)
  166. return 0;
  167. } else if(ulen == 5) {
  168. if((bin.data[i] & 0x01) + (bin.data[i+1] & 0x3C) == 0)
  169. return 0;
  170. }
  171. i += 1 + ulen;
  172. }
  173. }
  174. if(!enc_ensure(e, bin.size + esc_extra + 2)) {
  175. return 0;
  176. }
  177. e->p[e->i++] = '\"';
  178. i = 0;
  179. while(i < bin.size) {
  180. switch((char) bin.data[i]) {
  181. case '\"':
  182. case '\\':
  183. case '/':
  184. e->p[e->i++] = '\\';
  185. e->u[e->i++] = bin.data[i];
  186. i++;
  187. continue;
  188. case '\b':
  189. e->p[e->i++] = '\\';
  190. e->p[e->i++] = 'b';
  191. i++;
  192. continue;
  193. case '\f':
  194. e->p[e->i++] = '\\';
  195. e->p[e->i++] = 'f';
  196. i++;
  197. continue;
  198. case '\n':
  199. e->p[e->i++] = '\\';
  200. e->p[e->i++] = 'n';
  201. i++;
  202. continue;
  203. case '\r':
  204. e->p[e->i++] = '\\';
  205. e->p[e->i++] = 'r';
  206. i++;
  207. continue;
  208. case '\t':
  209. e->p[e->i++] = '\\';
  210. e->p[e->i++] = 't';
  211. i++;
  212. continue;
  213. default:
  214. if(bin.data[i] < 0x20) {
  215. e->p[e->i++] = '\\';
  216. e->p[e->i++] = 'u';
  217. if(!int_to_hex(bin.data[i], &(e->p[e->i]))) {
  218. return 0;
  219. }
  220. e->i += 4;
  221. i++;
  222. } else {
  223. e->u[e->i++] = bin.data[i++];
  224. }
  225. }
  226. }
  227. e->p[e->i++] = '\"';
  228. e->count++;
  229. return 1;
  230. }
  231. int
  232. enc_long(Encoder* e, long val)
  233. {
  234. if(!enc_ensure(e, 32)) {
  235. return 0;
  236. }
  237. snprintf(&(e->p[e->i]), 32, "%ld", val);
  238. e->i += strlen(&(e->p[e->i]));
  239. e->count++;
  240. return 1;
  241. }
  242. int
  243. enc_double(Encoder* e, double val)
  244. {
  245. if(!enc_ensure(e, 32)) {
  246. return 0;
  247. }
  248. snprintf(&(e->p[e->i]), 32, "%g", val);
  249. e->i += strlen(&(e->p[e->i]));
  250. e->count++;
  251. return 1;
  252. }
  253. int
  254. enc_char(Encoder* e, char c)
  255. {
  256. if(!enc_ensure(e, 1)) {
  257. return 0;
  258. }
  259. e->p[e->i++] = c;
  260. return 1;
  261. }
  262. int
  263. enc_start_object(Encoder* e)
  264. {
  265. e->count++;
  266. return enc_char(e, '{');
  267. }
  268. int
  269. enc_end_object(Encoder* e)
  270. {
  271. return enc_char(e, '}');
  272. }
  273. int
  274. enc_start_array(Encoder* e)
  275. {
  276. e->count++;
  277. return enc_char(e, '[');
  278. }
  279. int
  280. enc_end_array(Encoder* e)
  281. {
  282. return enc_char(e, ']');
  283. }
  284. int
  285. enc_colon(Encoder* e)
  286. {
  287. return enc_char(e, ':');
  288. }
  289. int
  290. enc_comma(Encoder* e)
  291. {
  292. return enc_char(e, ',');
  293. }
  294. ERL_NIF_TERM
  295. encode(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
  296. {
  297. Encoder enc;
  298. Encoder* e = &enc;
  299. ERL_NIF_TERM ret;
  300. ERL_NIF_TERM stack;
  301. ERL_NIF_TERM curr;
  302. ERL_NIF_TERM item;
  303. const ERL_NIF_TERM* tuple;
  304. int arity;
  305. double dval;
  306. long lval;
  307. int has_unknown = 0;
  308. if(argc != 1) {
  309. return enif_make_badarg(env);
  310. }
  311. if(!enc_init(e, env)) {
  312. return enif_make_badarg(env);
  313. }
  314. stack = enif_make_list1(env, argv[0]);
  315. while(!enif_is_empty_list(env, stack)) {
  316. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  317. ret = enc_error(e, "internal_error");
  318. goto done;
  319. }
  320. if(enif_is_identical(curr, e->atoms->ref_object)) {
  321. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  322. ret = enc_error(e, "internal_error");
  323. goto done;
  324. }
  325. if(enif_is_empty_list(env, curr)) {
  326. if(!enc_end_object(e)) {
  327. ret = enc_error(e, "internal_error");
  328. goto done;
  329. }
  330. continue;
  331. }
  332. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  333. ret = enc_error(e, "internal_error");
  334. goto done;
  335. }
  336. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  337. ret = enc_error(e, "invalid_object_pair");
  338. goto done;
  339. }
  340. if(arity != 2) {
  341. ret = enc_error(e, "invalid_object_pair");
  342. goto done;
  343. }
  344. if(!enc_comma(e)) {
  345. ret = enc_error(e, "internal_error");
  346. goto done;
  347. }
  348. if(!enc_string(e, tuple[0])) {
  349. ret = enc_error(e, "invalid_object_key");
  350. goto done;
  351. }
  352. if(!enc_colon(e)) {
  353. ret = enc_error(e, "internal_error");
  354. goto done;
  355. }
  356. stack = enif_make_list_cell(env, curr, stack);
  357. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  358. stack = enif_make_list_cell(env, tuple[1], stack);
  359. } else if(enif_is_identical(curr, e->atoms->ref_array)) {
  360. if(!enif_get_list_cell(env, stack, &curr, &stack)) {
  361. ret = enc_error(e, "internal_error.5");
  362. goto done;
  363. }
  364. if(enif_is_empty_list(env, curr)) {
  365. if(!enc_end_array(e)) {
  366. ret = enc_error(e, "internal_error");
  367. goto done;
  368. }
  369. continue;
  370. }
  371. if(!enc_comma(e)) {
  372. ret = enc_error(e, "internal_error");
  373. goto done;
  374. }
  375. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  376. ret = enc_error(e, "internal_error");
  377. goto done;
  378. }
  379. stack = enif_make_list_cell(env, curr, stack);
  380. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  381. stack = enif_make_list_cell(env, item, stack);
  382. } else if(enif_compare(curr, e->atoms->atom_null) == 0) {
  383. if(!enc_literal(e, "null", 4)) {
  384. ret = enc_error(e, "null");
  385. goto done;
  386. }
  387. } else if(enif_compare(curr, e->atoms->atom_true) == 0) {
  388. if(!enc_literal(e, "true", 4)) {
  389. ret = enc_error(e, "true");
  390. goto done;
  391. }
  392. } else if(enif_compare(curr, e->atoms->atom_false) == 0) {
  393. if(!enc_literal(e, "false", 5)) {
  394. ret = enc_error(e, "false");
  395. goto done;
  396. }
  397. } else if(enif_is_binary(env, curr)) {
  398. if(!enc_string(e, curr)) {
  399. ret = enc_error(e, "invalid_string");
  400. goto done;
  401. }
  402. } else if(enif_is_atom(env, curr)) {
  403. if(!enc_string(e, curr)) {
  404. ret = enc_error(e, "invalid_string");
  405. goto done;
  406. }
  407. } else if(enif_get_int64(env, curr, &lval)) {
  408. if(!enc_long(e, lval)) {
  409. ret = enc_error(e, "internal_error");
  410. goto done;
  411. }
  412. } else if(enif_get_double(env, curr, &dval)) {
  413. if(!enc_double(e, dval)) {
  414. ret = enc_error(e, "internal_error");
  415. goto done;
  416. }
  417. } else if(enif_get_tuple(env, curr, &arity, &tuple)) {
  418. if(arity != 1) {
  419. ret = enc_error(e, "invalid_ejson");
  420. goto done;
  421. }
  422. if(!enif_is_list(env, tuple[0])) {
  423. ret = enc_error(e, "invalid_object");
  424. goto done;
  425. }
  426. if(!enc_start_object(e)) {
  427. ret = enc_error(e, "internal_error");
  428. goto done;
  429. }
  430. if(enif_is_empty_list(env, tuple[0])) {
  431. if(!enc_end_object(e)) {
  432. ret = enc_error(e, "internal_error");
  433. goto done;
  434. }
  435. continue;
  436. }
  437. if(!enif_get_list_cell(env, tuple[0], &item, &curr)) {
  438. ret = enc_error(e, "internal_error");
  439. goto done;
  440. }
  441. if(!enif_get_tuple(env, item, &arity, &tuple)) {
  442. ret = enc_error(e, "invalid_object_pair");
  443. goto done;
  444. }
  445. if(arity != 2) {
  446. ret = enc_error(e, "invalid_object_pair");
  447. goto done;
  448. }
  449. if(!enc_string(e, tuple[0])) {
  450. ret = enc_error(e, "invalid_object_key");
  451. goto done;
  452. }
  453. if(!enc_colon(e)) {
  454. ret = enc_error(e, "internal_error");
  455. goto done;
  456. }
  457. stack = enif_make_list_cell(env, curr, stack);
  458. stack = enif_make_list_cell(env, e->atoms->ref_object, stack);
  459. stack = enif_make_list_cell(env, tuple[1], stack);
  460. } else if(enif_is_list(env, curr)) {
  461. if(!enc_start_array(e)) {
  462. ret = enc_error(e, "internal_error");
  463. goto done;
  464. }
  465. if(enif_is_empty_list(env, curr)) {
  466. if(!enc_end_array(e)) {
  467. ret = enc_error(e, "internal_error");
  468. goto done;
  469. }
  470. continue;
  471. }
  472. if(!enif_get_list_cell(env, curr, &item, &curr)) {
  473. ret = enc_error(e, "internal_error");
  474. goto done;
  475. }
  476. stack = enif_make_list_cell(env, curr, stack);
  477. stack = enif_make_list_cell(env, e->atoms->ref_array, stack);
  478. stack = enif_make_list_cell(env, item, stack);
  479. } else {
  480. has_unknown = 1;
  481. ret = enc_error(e, "invalid_ejson");
  482. goto done;
  483. /*
  484. if(!enc_unknown(env, curr)) {
  485. ret = enc_error(e, "internal_error");
  486. goto done;
  487. }
  488. */
  489. }
  490. } while(!enif_is_empty_list(env, stack));
  491. if(!enc_result(e, &item)) {
  492. ret = enc_error(e, "internal_error");
  493. goto done;
  494. }
  495. ret = enif_make_tuple2(env, e->atoms->atom_ok, item);
  496. done:
  497. enc_destroy(e);
  498. return ret;
  499. }