源战役
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

996 rindas
34 KiB

pirms 1 mēnesi
  1. %% @author Bob Ippolito <bob@mochimedia.com>
  2. %% @copyright 2007 Mochi Media, Inc.
  3. %%
  4. %% Permission is hereby granted, free of charge, to any person obtaining a
  5. %% copy of this software and associated documentation files (the "Software"),
  6. %% to deal in the Software without restriction, including without limitation
  7. %% the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. %% and/or sell copies of the Software, and to permit persons to whom the
  9. %% Software is furnished to do so, subject to the following conditions:
  10. %%
  11. %% The above copyright notice and this permission notice shall be included in
  12. %% all copies or substantial portions of the Software.
  13. %%
  14. %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. %% THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. %% FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. %% DEALINGS IN THE SOFTWARE.
  21. %% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
  22. %% with binaries as strings, arrays as lists (without an {array, _})
  23. %% wrapper and it only knows how to decode UTF-8 (and ASCII).
  24. %%
  25. %% JSON terms are decoded as follows (javascript -> erlang):
  26. %% <ul>
  27. %% <li>{"key": "value"} ->
  28. %% {struct, [{&lt;&lt;"key">>, &lt;&lt;"value">>}]}</li>
  29. %% <li>["array", 123, 12.34, true, false, null] ->
  30. %% [&lt;&lt;"array">>, 123, 12.34, true, false, null]
  31. %% </li>
  32. %% </ul>
  33. %% <ul>
  34. %% <li>Strings in JSON decode to UTF-8 binaries in Erlang</li>
  35. %% <li>Objects decode to {struct, PropList}</li>
  36. %% <li>Numbers decode to integer or float</li>
  37. %% <li>true, false, null decode to their respective terms.</li>
  38. %% </ul>
  39. %% The encoder will accept the same format that the decoder will produce,
  40. %% but will also allow additional cases for leniency:
  41. %% <ul>
  42. %% <li>atoms other than true, false, null will be considered UTF-8
  43. %% strings (even as a proplist key)
  44. %% </li>
  45. %% <li>{json, IoList} will insert IoList directly into the output
  46. %% with no validation
  47. %% </li>
  48. %% <li>{array, Array} will be encoded as Array
  49. %% (legacy mochijson style)
  50. %% </li>
  51. %% <li>A non-empty raw proplist will be encoded as an object as long
  52. %% as the first pair does not have an atom key of json, struct,
  53. %% or array
  54. %% </li>
  55. %% </ul>
  56. -module(mochijson2).
  57. -author('bob@mochimedia.com').
  58. -export([encoder/1, encode/1]).
  59. -export([decoder/1, decode/1, decode/2]).
  60. %% This is a macro to placate syntax highlighters..
  61. -define(Q, $\").
  62. -define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
  63. column=N+S#decoder.column}).
  64. -define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
  65. column=1+S#decoder.column}).
  66. -define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
  67. column=1,
  68. line=1+S#decoder.line}).
  69. -define(INC_CHAR(S, C),
  70. case C of
  71. $\n ->
  72. S#decoder{column=1,
  73. line=1+S#decoder.line,
  74. offset=1+S#decoder.offset};
  75. _ ->
  76. S#decoder{column=1+S#decoder.column,
  77. offset=1+S#decoder.offset}
  78. end).
  79. -define(IS_WHITESPACE(C),
  80. (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
  81. -ifdef(map_unavailable).
  82. -define(IS_MAP(_), false).
  83. -else.
  84. -define(IS_MAP(X), is_map(X)).
  85. -endif.
  86. %% @type json_string() = atom | binary()
  87. %% @type json_number() = integer() | float()
  88. %% @type json_array() = [json_term()]
  89. %% @type json_object() = {struct, [{json_string(), json_term()}]}
  90. %% @type json_eep18_object() = {[{json_string(), json_term()}]}
  91. %% @type json_iolist() = {json, iolist()}
  92. %% @type json_term() = json_string() | json_number() | json_array() |
  93. %% json_object() | json_eep18_object() | json_iolist()
  94. -record(encoder, {handler=null,
  95. utf8=false}).
  96. -record(decoder, {object_hook=null,
  97. offset=0,
  98. line=1,
  99. column=1,
  100. state=null}).
  101. %% @spec encoder([encoder_option()]) -> function()
  102. %% @doc Create an encoder/1 with the given options.
  103. %% @type encoder_option() = handler_option() | utf8_option()
  104. %% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
  105. encoder(Options) ->
  106. State = parse_encoder_options(Options, #encoder{}),
  107. fun (O) -> json_encode(O, State) end.
  108. %% @spec encode(json_term()) -> iolist()
  109. %% @doc Encode the given as JSON to an iolist.
  110. encode(Any) ->
  111. json_encode(Any, #encoder{}).
  112. %% @spec decoder([decoder_option()]) -> function()
  113. %% @doc Create a decoder/1 with the given options.
  114. decoder(Options) ->
  115. State = parse_decoder_options(Options, #decoder{}),
  116. fun (O) -> json_decode(O, State) end.
  117. %% @spec decode(iolist(), [{format, proplist | eep18 | struct | map}]) -> json_term()
  118. %% @doc Decode the given iolist to Erlang terms using the given object format
  119. %% for decoding, where proplist returns JSON objects as [{binary(), json_term()}]
  120. %% proplists, eep18 returns JSON objects as {[binary(), json_term()]},
  121. %% map returns JSON objects as #{binary() => json_term()}, and struct
  122. %% returns them as-is.
  123. decode(S, Options) ->
  124. json_decode(S, parse_decoder_options(Options, #decoder{})).
  125. %% @spec decode(iolist()) -> json_term()
  126. %% @doc Decode the given iolist to Erlang terms.
  127. decode(S) ->
  128. json_decode(S, #decoder{}).
  129. %% Internal API
  130. parse_encoder_options([], State) ->
  131. State;
  132. parse_encoder_options([{handler, Handler} | Rest], State) ->
  133. parse_encoder_options(Rest, State#encoder{handler=Handler});
  134. parse_encoder_options([{utf8, Switch} | Rest], State) ->
  135. parse_encoder_options(Rest, State#encoder{utf8=Switch}).
  136. parse_decoder_options([], State) ->
  137. State;
  138. parse_decoder_options([{object_hook, Hook} | Rest], State) ->
  139. parse_decoder_options(Rest, State#decoder{object_hook=Hook});
  140. parse_decoder_options([{format, map} | Rest], State) ->
  141. Hook = make_object_hook_for_map(),
  142. parse_decoder_options(Rest, State#decoder{object_hook=Hook});
  143. parse_decoder_options([{format, Format} | Rest], State)
  144. when Format =:= struct orelse Format =:= eep18 orelse Format =:= proplist ->
  145. parse_decoder_options(Rest, State#decoder{object_hook=Format}).
  146. -ifdef(map_unavailable).
  147. make_object_hook_for_map() ->
  148. exit({json_decode, {bad_format, map_unavailable}}).
  149. -else.
  150. make_object_hook_for_map() ->
  151. fun ({struct, P}) -> maps:from_list(P) end.
  152. -endif.
  153. json_encode(true, _State) ->
  154. <<"true">>;
  155. json_encode(false, _State) ->
  156. <<"false">>;
  157. json_encode(null, _State) ->
  158. <<"null">>;
  159. json_encode(I, _State) when is_integer(I) ->
  160. integer_to_list(I);
  161. json_encode(F, _State) when is_float(F) ->
  162. mochinum:digits(F);
  163. json_encode(S, State) when is_binary(S); is_atom(S) ->
  164. json_encode_string(S, State);
  165. json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso
  166. K =/= array andalso
  167. K =/= json) ->
  168. json_encode_proplist(Props, State);
  169. json_encode({struct, Props}, State) when is_list(Props) ->
  170. json_encode_proplist(Props, State);
  171. json_encode({Props}, State) when is_list(Props) ->
  172. json_encode_proplist(Props, State);
  173. json_encode({}, State) ->
  174. json_encode_proplist([], State);
  175. json_encode(Array, State) when is_list(Array) ->
  176. json_encode_array(Array, State);
  177. json_encode({array, Array}, State) when is_list(Array) ->
  178. json_encode_array(Array, State);
  179. json_encode(M, State) when ?IS_MAP(M) ->
  180. json_encode_map(M, State);
  181. json_encode({json, IoList}, _State) ->
  182. IoList;
  183. json_encode(Bad, #encoder{handler=null}) ->
  184. exit({json_encode, {bad_term, Bad}});
  185. json_encode(Bad, State=#encoder{handler=Handler}) ->
  186. json_encode(Handler(Bad), State).
  187. json_encode_array([], _State) ->
  188. <<"[]">>;
  189. json_encode_array(L, State) ->
  190. F = fun (O, Acc) ->
  191. [$,, json_encode(O, State) | Acc]
  192. end,
  193. [$, | Acc1] = lists:foldl(F, "[", L),
  194. lists:reverse([$\] | Acc1]).
  195. json_encode_proplist([], _State) ->
  196. <<"{}">>;
  197. json_encode_proplist(Props, State) ->
  198. F = fun ({K, V}, Acc) ->
  199. KS = json_encode_string(K, State),
  200. VS = json_encode(V, State),
  201. [$,, VS, $:, KS | Acc]
  202. end,
  203. [$, | Acc1] = lists:foldl(F, "{", Props),
  204. lists:reverse([$\} | Acc1]).
  205. -ifdef(map_unavailable).
  206. json_encode_map(Bad, _State) ->
  207. %% IS_MAP definition guarantees that this branch is dead
  208. exit({json_encode, {bad_term, Bad}}).
  209. -else.
  210. json_encode_map(Map, _State) when map_size(Map) =:= 0 ->
  211. <<"{}">>;
  212. json_encode_map(Map, State) ->
  213. F = fun(K, V, Acc) ->
  214. KS = json_encode_string(K, State),
  215. VS = json_encode(V, State),
  216. [$,, VS, $:, KS | Acc]
  217. end,
  218. [$, | Acc1] = maps:fold(F, "{", Map),
  219. lists:reverse([$\} | Acc1]).
  220. -endif.
  221. json_encode_string(A, State) when is_atom(A) ->
  222. json_encode_string(atom_to_binary(A, latin1), State);
  223. json_encode_string(B, State) when is_binary(B) ->
  224. case json_bin_is_safe(B) of
  225. true ->
  226. [?Q, B, ?Q];
  227. false ->
  228. json_encode_string_unicode(unicode:characters_to_list(B), State, [?Q])
  229. end;
  230. json_encode_string(I, _State) when is_integer(I) ->
  231. [?Q, integer_to_list(I), ?Q];
  232. json_encode_string(L, State) when is_list(L) ->
  233. case json_string_is_safe(L) of
  234. true ->
  235. [?Q, L, ?Q];
  236. false ->
  237. json_encode_string_unicode(L, State, [?Q])
  238. end.
  239. json_string_is_safe([]) ->
  240. true;
  241. json_string_is_safe([C | Rest]) ->
  242. case C of
  243. ?Q ->
  244. false;
  245. $\\ ->
  246. false;
  247. $\b ->
  248. false;
  249. $\f ->
  250. false;
  251. $\n ->
  252. false;
  253. $\r ->
  254. false;
  255. $\t ->
  256. false;
  257. C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
  258. false;
  259. C when C < 16#7f ->
  260. json_string_is_safe(Rest);
  261. _ ->
  262. exit({json_encode, {bad_char, C}})
  263. end.
  264. json_bin_is_safe(<<>>) ->
  265. true;
  266. json_bin_is_safe(<<C, Rest/binary>>) ->
  267. case C of
  268. ?Q ->
  269. false;
  270. $\\ ->
  271. false;
  272. $\b ->
  273. false;
  274. $\f ->
  275. false;
  276. $\n ->
  277. false;
  278. $\r ->
  279. false;
  280. $\t ->
  281. false;
  282. C when C >= 0, C < $\s; C >= 16#7f ->
  283. false;
  284. C when C < 16#7f ->
  285. json_bin_is_safe(Rest)
  286. end.
  287. json_encode_string_unicode([], _State, Acc) ->
  288. lists:reverse([$\" | Acc]);
  289. json_encode_string_unicode([C | Cs], State, Acc) ->
  290. Acc1 = case C of
  291. ?Q ->
  292. [?Q, $\\ | Acc];
  293. %% Escaping solidus is only useful when trying to protect
  294. %% against "</script>" injection attacks which are only
  295. %% possible when JSON is inserted into a HTML document
  296. %% in-line. mochijson2 does not protect you from this, so
  297. %% if you do insert directly into HTML then you need to
  298. %% uncomment the following case or escape the output of encode.
  299. %%
  300. %% $/ ->
  301. %% [$/, $\\ | Acc];
  302. %%
  303. $\\ ->
  304. [$\\, $\\ | Acc];
  305. $\b ->
  306. [$b, $\\ | Acc];
  307. $\f ->
  308. [$f, $\\ | Acc];
  309. $\n ->
  310. [$n, $\\ | Acc];
  311. $\r ->
  312. [$r, $\\ | Acc];
  313. $\t ->
  314. [$t, $\\ | Acc];
  315. C when C >= 0, C < $\s ->
  316. [unihex(C) | Acc];
  317. C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
  318. [unicode:characters_to_binary([C]) | Acc];
  319. C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
  320. [unihex(C) | Acc];
  321. C when C < 16#7f ->
  322. [C | Acc];
  323. _ ->
  324. %% json_string_is_safe guarantees that this branch is dead
  325. exit({json_encode, {bad_char, C}})
  326. end,
  327. json_encode_string_unicode(Cs, State, Acc1).
  328. hexdigit(C) when C >= 0, C =< 9 ->
  329. C + $0;
  330. hexdigit(C) when C =< 15 ->
  331. C + $a - 10.
  332. unihex(C) when C < 16#10000 ->
  333. <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
  334. Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
  335. [$\\, $u | Digits];
  336. unihex(C) when C =< 16#10FFFF ->
  337. N = C - 16#10000,
  338. S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
  339. S2 = 16#dc00 bor (N band 16#3ff),
  340. [unihex(S1), unihex(S2)].
  341. json_decode(L, S) when is_list(L) ->
  342. json_decode(iolist_to_binary(L), S);
  343. json_decode(B, S) ->
  344. {Res, S1} = decode1(B, S),
  345. {eof, _} = tokenize(B, S1#decoder{state=trim}),
  346. Res.
  347. decode1(B, S=#decoder{state=null}) ->
  348. case tokenize(B, S#decoder{state=any}) of
  349. {{const, C}, S1} ->
  350. {C, S1};
  351. {start_array, S1} ->
  352. decode_array(B, S1);
  353. {start_object, S1} ->
  354. decode_object(B, S1)
  355. end.
  356. make_object(V, #decoder{object_hook=N}) when N =:= null orelse N =:= struct ->
  357. V;
  358. make_object({struct, P}, #decoder{object_hook=eep18}) ->
  359. {P};
  360. make_object({struct, P}, #decoder{object_hook=proplist}) ->
  361. P;
  362. make_object(V, #decoder{object_hook=Hook}) ->
  363. Hook(V).
  364. decode_object(B, S) ->
  365. decode_object(B, S#decoder{state=key}, []).
  366. decode_object(B, S=#decoder{state=key}, Acc) ->
  367. case tokenize(B, S) of
  368. {end_object, S1} ->
  369. V = make_object({struct, lists:reverse(Acc)}, S1),
  370. {V, S1#decoder{state=null}};
  371. {{const, K}, S1} ->
  372. {colon, S2} = tokenize(B, S1),
  373. {V, S3} = decode1(B, S2#decoder{state=null}),
  374. decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
  375. end;
  376. decode_object(B, S=#decoder{state=comma}, Acc) ->
  377. case tokenize(B, S) of
  378. {end_object, S1} ->
  379. V = make_object({struct, lists:reverse(Acc)}, S1),
  380. {V, S1#decoder{state=null}};
  381. {comma, S1} ->
  382. decode_object(B, S1#decoder{state=key}, Acc)
  383. end.
  384. decode_array(B, S) ->
  385. decode_array(B, S#decoder{state=any}, []).
  386. decode_array(B, S=#decoder{state=any}, Acc) ->
  387. case tokenize(B, S) of
  388. {end_array, S1} ->
  389. {lists:reverse(Acc), S1#decoder{state=null}};
  390. {start_array, S1} ->
  391. {Array, S2} = decode_array(B, S1),
  392. decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
  393. {start_object, S1} ->
  394. {Array, S2} = decode_object(B, S1),
  395. decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
  396. {{const, Const}, S1} ->
  397. decode_array(B, S1#decoder{state=comma}, [Const | Acc])
  398. end;
  399. decode_array(B, S=#decoder{state=comma}, Acc) ->
  400. case tokenize(B, S) of
  401. {end_array, S1} ->
  402. {lists:reverse(Acc), S1#decoder{state=null}};
  403. {comma, S1} ->
  404. decode_array(B, S1#decoder{state=any}, Acc)
  405. end.
  406. tokenize_string(B, S=#decoder{offset=O}) ->
  407. case tokenize_string_fast(B, O) of
  408. {escape, O1} ->
  409. Length = O1 - O,
  410. S1 = ?ADV_COL(S, Length),
  411. <<_:O/binary, Head:Length/binary, _/binary>> = B,
  412. tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
  413. O1 ->
  414. Length = O1 - O,
  415. <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
  416. {{const, String}, ?ADV_COL(S, Length + 1)}
  417. end.
  418. tokenize_string_fast(B, O) ->
  419. case B of
  420. <<_:O/binary, ?Q, _/binary>> ->
  421. O;
  422. <<_:O/binary, $\\, _/binary>> ->
  423. {escape, O};
  424. <<_:O/binary, C1, _/binary>> when C1 < 128 ->
  425. tokenize_string_fast(B, 1 + O);
  426. <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
  427. C2 >= 128, C2 =< 191 ->
  428. tokenize_string_fast(B, 2 + O);
  429. <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
  430. C2 >= 128, C2 =< 191,
  431. C3 >= 128, C3 =< 191 ->
  432. tokenize_string_fast(B, 3 + O);
  433. <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
  434. C2 >= 128, C2 =< 191,
  435. C3 >= 128, C3 =< 191,
  436. C4 >= 128, C4 =< 191 ->
  437. tokenize_string_fast(B, 4 + O);
  438. _ ->
  439. throw(invalid_utf8)
  440. end.
  441. tokenize_string(B, S=#decoder{offset=O}, Acc) ->
  442. case B of
  443. <<_:O/binary, ?Q, _/binary>> ->
  444. {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
  445. <<_:O/binary, "\\\"", _/binary>> ->
  446. tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
  447. <<_:O/binary, "\\\\", _/binary>> ->
  448. tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
  449. <<_:O/binary, "\\/", _/binary>> ->
  450. tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
  451. <<_:O/binary, "\\b", _/binary>> ->
  452. tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
  453. <<_:O/binary, "\\f", _/binary>> ->
  454. tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
  455. <<_:O/binary, "\\n", _/binary>> ->
  456. tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
  457. <<_:O/binary, "\\r", _/binary>> ->
  458. tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
  459. <<_:O/binary, "\\t", _/binary>> ->
  460. tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
  461. <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
  462. C = erlang:list_to_integer([C3, C2, C1, C0], 16),
  463. if C > 16#D7FF, C < 16#DC00 ->
  464. %% coalesce UTF-16 surrogate pair
  465. <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
  466. D = erlang:list_to_integer([D3,D2,D1,D0], 16),
  467. Acc1 = [unicode:characters_to_binary(
  468. <<C:16/big-unsigned-integer,
  469. D:16/big-unsigned-integer>>,
  470. utf16)
  471. | Acc],
  472. tokenize_string(B, ?ADV_COL(S, 12), Acc1);
  473. true ->
  474. Acc1 = [unicode:characters_to_binary([C]) | Acc],
  475. tokenize_string(B, ?ADV_COL(S, 6), Acc1)
  476. end;
  477. <<_:O/binary, C1, _/binary>> when C1 < 128 ->
  478. tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]);
  479. <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
  480. C2 >= 128, C2 =< 191 ->
  481. tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]);
  482. <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
  483. C2 >= 128, C2 =< 191,
  484. C3 >= 128, C3 =< 191 ->
  485. tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]);
  486. <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
  487. C2 >= 128, C2 =< 191,
  488. C3 >= 128, C3 =< 191,
  489. C4 >= 128, C4 =< 191 ->
  490. tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]);
  491. _ ->
  492. throw(invalid_utf8)
  493. end.
  494. tokenize_number(B, S) ->
  495. case tokenize_number(B, sign, S, []) of
  496. {{int, Int}, S1} ->
  497. {{const, list_to_integer(Int)}, S1};
  498. {{float, Float}, S1} ->
  499. {{const, list_to_float(Float)}, S1}
  500. end.
  501. tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
  502. case B of
  503. <<_:O/binary, $-, _/binary>> ->
  504. tokenize_number(B, int, ?INC_COL(S), [$-]);
  505. _ ->
  506. tokenize_number(B, int, S, [])
  507. end;
  508. tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
  509. case B of
  510. <<_:O/binary, $0, _/binary>> ->
  511. tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
  512. <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
  513. tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
  514. end;
  515. tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
  516. case B of
  517. <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
  518. tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
  519. _ ->
  520. tokenize_number(B, frac, S, Acc)
  521. end;
  522. tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
  523. case B of
  524. <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
  525. tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
  526. <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
  527. tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
  528. _ ->
  529. {{int, lists:reverse(Acc)}, S}
  530. end;
  531. tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
  532. case B of
  533. <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
  534. tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
  535. <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
  536. tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
  537. _ ->
  538. {{float, lists:reverse(Acc)}, S}
  539. end;
  540. tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
  541. case B of
  542. <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
  543. tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
  544. _ ->
  545. tokenize_number(B, eint, S, Acc)
  546. end;
  547. tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
  548. case B of
  549. <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
  550. tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
  551. end;
  552. tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
  553. case B of
  554. <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
  555. tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
  556. _ ->
  557. {{float, lists:reverse(Acc)}, S}
  558. end.
  559. tokenize(B, S=#decoder{offset=O}) ->
  560. case B of
  561. <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
  562. tokenize(B, ?INC_CHAR(S, C));
  563. <<_:O/binary, "{", _/binary>> ->
  564. {start_object, ?INC_COL(S)};
  565. <<_:O/binary, "}", _/binary>> ->
  566. {end_object, ?INC_COL(S)};
  567. <<_:O/binary, "[", _/binary>> ->
  568. {start_array, ?INC_COL(S)};
  569. <<_:O/binary, "]", _/binary>> ->
  570. {end_array, ?INC_COL(S)};
  571. <<_:O/binary, ",", _/binary>> ->
  572. {comma, ?INC_COL(S)};
  573. <<_:O/binary, ":", _/binary>> ->
  574. {colon, ?INC_COL(S)};
  575. <<_:O/binary, "null", _/binary>> ->
  576. {{const, null}, ?ADV_COL(S, 4)};
  577. <<_:O/binary, "true", _/binary>> ->
  578. {{const, true}, ?ADV_COL(S, 4)};
  579. <<_:O/binary, "false", _/binary>> ->
  580. {{const, false}, ?ADV_COL(S, 5)};
  581. <<_:O/binary, "\"", _/binary>> ->
  582. tokenize_string(B, ?INC_COL(S));
  583. <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
  584. orelse C =:= $- ->
  585. tokenize_number(B, S);
  586. <<_:O/binary>> ->
  587. trim = S#decoder.state,
  588. {eof, S}
  589. end.
  590. %%
  591. %% Tests
  592. %%
  593. -ifdef(TEST).
  594. -include_lib("eunit/include/eunit.hrl").
  595. %% testing constructs borrowed from the Yaws JSON implementation.
  596. %% Create an object from a list of Key/Value pairs.
  597. obj_new() ->
  598. {struct, []}.
  599. is_obj({struct, Props}) ->
  600. F = fun ({K, _}) when is_binary(K) -> true end,
  601. lists:all(F, Props).
  602. obj_from_list(Props) ->
  603. Obj = {struct, Props},
  604. ?assert(is_obj(Obj)),
  605. Obj.
  606. %% Test for equivalence of Erlang terms.
  607. %% Due to arbitrary order of construction, equivalent objects might
  608. %% compare unequal as erlang terms, so we need to carefully recurse
  609. %% through aggregates (tuples and objects).
  610. equiv({struct, Props1}, {struct, Props2}) ->
  611. equiv_object(Props1, Props2);
  612. equiv(L1, L2) when is_list(L1), is_list(L2) ->
  613. equiv_list(L1, L2);
  614. equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
  615. equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
  616. equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true.
  617. %% Object representation and traversal order is unknown.
  618. %% Use the sledgehammer and sort property lists.
  619. equiv_object(Props1, Props2) ->
  620. L1 = lists:keysort(1, Props1),
  621. L2 = lists:keysort(1, Props2),
  622. Pairs = lists:zip(L1, L2),
  623. true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
  624. equiv(K1, K2) and equiv(V1, V2)
  625. end, Pairs).
  626. %% Recursively compare tuple elements for equivalence.
  627. equiv_list([], []) ->
  628. true;
  629. equiv_list([V1 | L1], [V2 | L2]) ->
  630. equiv(V1, V2) andalso equiv_list(L1, L2).
  631. decode_test() ->
  632. [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
  633. <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]).
  634. e2j_vec_test() ->
  635. test_one(e2j_test_vec(utf8), 1).
  636. test_one([], _N) ->
  637. %% io:format("~p tests passed~n", [N-1]),
  638. ok;
  639. test_one([{E, J} | Rest], N) ->
  640. %% io:format("[~p] ~p ~p~n", [N, E, J]),
  641. true = equiv(E, decode(J)),
  642. true = equiv(E, decode(encode(E))),
  643. test_one(Rest, 1+N).
  644. e2j_test_vec(utf8) ->
  645. [
  646. {1, "1"},
  647. {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
  648. {-1, "-1"},
  649. {-3.1416, "-3.14160"},
  650. {12.0e10, "1.20000e+11"},
  651. {1.234E+10, "1.23400e+10"},
  652. {-1.234E-10, "-1.23400e-10"},
  653. {10.0, "1.0e+01"},
  654. {123.456, "1.23456E+2"},
  655. {10.0, "1e1"},
  656. {<<"foo">>, "\"foo\""},
  657. {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
  658. {<<"">>, "\"\""},
  659. {<<"\n\n\n">>, "\"\\n\\n\\n\""},
  660. {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
  661. {obj_new(), "{}"},
  662. {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
  663. {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
  664. "{\"foo\":\"bar\",\"baz\":123}"},
  665. {[], "[]"},
  666. {[[]], "[[]]"},
  667. {[1, <<"foo">>], "[1,\"foo\"]"},
  668. %% json array in a json object
  669. {obj_from_list([{<<"foo">>, [123]}]),
  670. "{\"foo\":[123]}"},
  671. %% json object in a json object
  672. {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
  673. "{\"foo\":{\"bar\":true}}"},
  674. %% fold evaluation order
  675. {obj_from_list([{<<"foo">>, []},
  676. {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
  677. {<<"alice">>, <<"bob">>}]),
  678. "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
  679. %% json object in a json array
  680. {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
  681. "[-123,\"foo\",{\"bar\":[]},null]"}
  682. ].
  683. %% test utf8 encoding
  684. encoder_utf8_test() ->
  685. %% safe conversion case (default)
  686. <<"\"\\u0001\\u0442\\u0435\\u0441\\u0442\"">> =
  687. iolist_to_binary(encode(<<1,"\321\202\320\265\321\201\321\202">>)),
  688. %% raw utf8 output (optional)
  689. Enc = mochijson2:encoder([{utf8, true}]),
  690. <<34,"\\u0001",209,130,208,181,209,129,209,130,34>> =
  691. iolist_to_binary(Enc(<<1,"\321\202\320\265\321\201\321\202">>)).
  692. input_validation_test() ->
  693. Good = [
  694. {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound
  695. {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro
  696. {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius
  697. ],
  698. lists:foreach(fun({CodePoint, UTF8}) ->
  699. Expect = unicode:characters_to_binary([CodePoint]),
  700. Expect = decode(UTF8)
  701. end, Good),
  702. Bad = [
  703. %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
  704. <<?Q, 16#80, ?Q>>,
  705. %% missing continuations, last byte in each should be 80-BF
  706. <<?Q, 16#C2, 16#7F, ?Q>>,
  707. <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
  708. <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
  709. %% we don't support code points > 10FFFF per RFC 3629
  710. <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>,
  711. %% escape characters trigger a different code path
  712. <<?Q, $\\, $\n, 16#80, ?Q>>
  713. ],
  714. lists:foreach(
  715. fun(X) ->
  716. ok = try decode(X) catch invalid_utf8 -> ok end,
  717. %% could be {ucs,{bad_utf8_character_code}} or
  718. %% {json_encode,{bad_char,_}}
  719. {'EXIT', _} = (catch encode(X))
  720. end, Bad).
  721. inline_json_test() ->
  722. ?assertEqual(<<"\"iodata iodata\"">>,
  723. iolist_to_binary(
  724. encode({json, [<<"\"iodata">>, " iodata\""]}))),
  725. ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]},
  726. decode(
  727. encode({struct,
  728. [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))),
  729. ok.
  730. big_unicode_test() ->
  731. UTF8Seq = unicode:characters_to_binary([16#0001d120]),
  732. ?assertEqual(
  733. <<"\"\\ud834\\udd20\"">>,
  734. iolist_to_binary(encode(UTF8Seq))),
  735. ?assertEqual(
  736. UTF8Seq,
  737. decode(iolist_to_binary(encode(UTF8Seq)))),
  738. ok.
  739. custom_decoder_test() ->
  740. ?assertEqual(
  741. {struct, [{<<"key">>, <<"value">>}]},
  742. (decoder([]))("{\"key\": \"value\"}")),
  743. F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end,
  744. ?assertEqual(
  745. win,
  746. (decoder([{object_hook, F}]))("{\"key\": \"value\"}")),
  747. ok.
  748. atom_test() ->
  749. %% JSON native atoms
  750. [begin
  751. ?assertEqual(A, decode(atom_to_list(A))),
  752. ?assertEqual(iolist_to_binary(atom_to_list(A)),
  753. iolist_to_binary(encode(A)))
  754. end || A <- [true, false, null]],
  755. %% Atom to string
  756. ?assertEqual(
  757. <<"\"foo\"">>,
  758. iolist_to_binary(encode(foo))),
  759. ?assertEqual(
  760. <<"\"\\ud834\\udd20\"">>,
  761. iolist_to_binary(
  762. encode(
  763. binary_to_atom(
  764. unicode:characters_to_binary([16#0001d120]), latin1)))),
  765. ok.
  766. key_encode_test() ->
  767. %% Some forms are accepted as keys that would not be strings in other
  768. %% cases
  769. ?assertEqual(
  770. <<"{\"foo\":1}">>,
  771. iolist_to_binary(encode({struct, [{foo, 1}]}))),
  772. ?assertEqual(
  773. <<"{\"foo\":1}">>,
  774. iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))),
  775. ?assertEqual(
  776. <<"{\"foo\":1}">>,
  777. iolist_to_binary(encode({struct, [{"foo", 1}]}))),
  778. ?assertEqual(
  779. <<"{\"foo\":1}">>,
  780. iolist_to_binary(encode([{foo, 1}]))),
  781. ?assertEqual(
  782. <<"{\"foo\":1}">>,
  783. iolist_to_binary(encode([{<<"foo">>, 1}]))),
  784. ?assertEqual(
  785. <<"{\"foo\":1}">>,
  786. iolist_to_binary(encode([{"foo", 1}]))),
  787. ?assertEqual(
  788. <<"{\"\\ud834\\udd20\":1}">>,
  789. iolist_to_binary(
  790. encode({struct, [{[16#0001d120], 1}]}))),
  791. ?assertEqual(
  792. <<"{\"1\":1}">>,
  793. iolist_to_binary(encode({struct, [{1, 1}]}))),
  794. ok.
  795. unsafe_chars_test() ->
  796. Chars = "\"\\\b\f\n\r\t",
  797. [begin
  798. ?assertEqual(false, json_string_is_safe([C])),
  799. ?assertEqual(false, json_bin_is_safe(<<C>>)),
  800. ?assertEqual(<<C>>, decode(encode(<<C>>)))
  801. end || C <- Chars],
  802. ?assertEqual(
  803. false,
  804. json_string_is_safe([16#0001d120])),
  805. ?assertEqual(
  806. false,
  807. json_bin_is_safe(unicode:characters_to_binary([16#0001d120]))),
  808. ?assertEqual(
  809. [16#0001d120],
  810. unicode:characters_to_list(
  811. decode(
  812. encode(
  813. binary_to_atom(
  814. unicode:characters_to_binary([16#0001d120]),
  815. latin1))))),
  816. ?assertEqual(
  817. false,
  818. json_string_is_safe([16#10ffff])),
  819. ?assertEqual(
  820. false,
  821. json_bin_is_safe(unicode:characters_to_binary([16#10ffff]))),
  822. %% solidus can be escaped but isn't unsafe by default
  823. ?assertEqual(
  824. <<"/">>,
  825. decode(<<"\"\\/\"">>)),
  826. ok.
  827. int_test() ->
  828. ?assertEqual(0, decode("0")),
  829. ?assertEqual(1, decode("1")),
  830. ?assertEqual(11, decode("11")),
  831. ok.
  832. large_int_test() ->
  833. ?assertEqual(<<"-2147483649214748364921474836492147483649">>,
  834. iolist_to_binary(encode(-2147483649214748364921474836492147483649))),
  835. ?assertEqual(<<"2147483649214748364921474836492147483649">>,
  836. iolist_to_binary(encode(2147483649214748364921474836492147483649))),
  837. ok.
  838. float_test() ->
  839. ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))),
  840. ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))),
  841. ok.
  842. handler_test() ->
  843. ?assertEqual(
  844. {'EXIT',{json_encode,{bad_term,{x,y}}}},
  845. catch encode({x,y})),
  846. F = fun ({x,y}) -> [] end,
  847. ?assertEqual(
  848. <<"[]">>,
  849. iolist_to_binary((encoder([{handler, F}]))({x, y}))),
  850. ok.
  851. encode_empty_test_() ->
  852. [{A, ?_assertEqual(<<"{}">>, iolist_to_binary(encode(B)))}
  853. || {A, B} <- [{"eep18 {}", {}},
  854. {"eep18 {[]}", {[]}},
  855. {"{struct, []}", {struct, []}}]].
  856. encode_test_() ->
  857. P = [{<<"k">>, <<"v">>}],
  858. JSON = iolist_to_binary(encode({struct, P})),
  859. [{atom_to_list(F),
  860. ?_assertEqual(JSON, iolist_to_binary(encode(decode(JSON, [{format, F}]))))}
  861. || F <- [struct, eep18, proplist]].
  862. format_test_() ->
  863. P = [{<<"k">>, <<"v">>}],
  864. JSON = iolist_to_binary(encode({struct, P})),
  865. [{atom_to_list(F),
  866. ?_assertEqual(A, decode(JSON, [{format, F}]))}
  867. || {F, A} <- [{struct, {struct, P}},
  868. {eep18, {P}},
  869. {proplist, P}]].
  870. array_test() ->
  871. A = [<<"hello">>],
  872. ?assertEqual(A, decode(encode({array, A}))).
  873. bad_char_test() ->
  874. ?assertEqual(
  875. {'EXIT', {json_encode, {bad_char, 16#110000}}},
  876. catch json_string_is_safe([16#110000])).
  877. utf8_roundtrip_test_() ->
  878. %% These are the boundary cases for UTF8 encoding
  879. Codepoints = [%% 7 bits -> 1 byte
  880. 16#00, 16#7f,
  881. %% 11 bits -> 2 bytes
  882. 16#080, 16#07ff,
  883. %% 16 bits -> 3 bytes
  884. 16#0800, 16#ffff,
  885. 16#d7ff, 16#e000,
  886. %% 21 bits -> 4 bytes
  887. 16#010000, 16#10ffff],
  888. UTF8 = unicode:characters_to_binary(Codepoints),
  889. Encode = encoder([{utf8, true}]),
  890. [{"roundtrip escaped",
  891. ?_assertEqual(UTF8, decode(encode(UTF8)))},
  892. {"roundtrip utf8",
  893. ?_assertEqual(UTF8, decode(Encode(UTF8)))}].
  894. utf8_non_character_test_() ->
  895. S = unicode:characters_to_binary([16#ffff, 16#fffe]),
  896. [{"roundtrip escaped", ?_assertEqual(S, decode(encode(S)))},
  897. {"roundtrip utf8", ?_assertEqual(S, decode((encoder([{utf8, true}]))(S)))}].
  898. -ifndef(map_unavailable).
  899. decode_map_test() ->
  900. Json = "{\"var1\": 3, \"var2\": {\"var3\": 7}}",
  901. M = #{<<"var1">> => 3,<<"var2">> => #{<<"var3">> => 7}},
  902. ?assertEqual(M, decode(Json, [{format, map}])).
  903. encode_map_test() ->
  904. M = <<"{\"a\":1,\"b\":{\"c\":2}}">>,
  905. ?assertEqual(M, iolist_to_binary(encode(#{a => 1, b => #{ c => 2}}))).
  906. encode_empty_map_test() ->
  907. ?assertEqual(<<"{}">>, encode(#{})).
  908. -endif.
  909. -endif.