您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

116 行
3.4 KiB

  1. % This file is part of Jiffy released under the MIT license.
  2. % See the LICENSE file for more information.
  3. -module(jiffy_utf8).
  4. -export([fix/1]).
  5. fix({Props}) ->
  6. fix_props(Props, []);
  7. fix(Values) when is_list(Values) ->
  8. fix_array(Values, []);
  9. fix(Bin) when is_binary(Bin) ->
  10. fix_bin(Bin);
  11. fix(Val) ->
  12. maybe_map(Val).
  13. -ifndef(JIFFY_NO_MAPS).
  14. maybe_map(Obj) when is_map(Obj) ->
  15. maps:fold(fun fix_map/3, maps:new(), Obj);
  16. maybe_map(Val) ->
  17. Val.
  18. fix_map(K, V, Acc) ->
  19. maps:put(fix(K), fix(V), Acc).
  20. -else.
  21. maybe_map(Val) ->
  22. Val.
  23. -endif.
  24. fix_props([], Acc) ->
  25. {lists:reverse(Acc)};
  26. fix_props([{K0, V0} | Rest], Acc) ->
  27. K = fix(K0),
  28. V = fix(V0),
  29. fix_props(Rest, [{K, V} | Acc]).
  30. fix_array([], Acc) ->
  31. lists:reverse(Acc);
  32. fix_array([Val | Rest], Acc0) ->
  33. Acc = [fix(Val) | Acc0],
  34. fix_array(Rest, Acc).
  35. fix_bin(Bin) ->
  36. Dec0 = loose_decode(Bin, 0, []),
  37. Dec1 = try_combining(Dec0, []),
  38. Dec2 = replace_garbage(Dec1, []),
  39. list_to_binary(xmerl_ucs:to_utf8(Dec2)).
  40. loose_decode(Bin, O, Acc) ->
  41. case Bin of
  42. <<_:O/binary>> ->
  43. lists:reverse(Acc);
  44. <<_:O/binary, 0:1/integer, V:7/integer, _/binary>> ->
  45. loose_decode(Bin, O+1, [V | Acc]);
  46. <<_:O/binary, 6:3/integer, V0:5/integer,
  47. 2:2/integer, V1:6/integer, _/binary>> ->
  48. B = <<0:5/integer, V0:5/integer, V1:6/integer>>,
  49. <<V:16/integer>> = B,
  50. loose_decode(Bin, O+2, [V | Acc]);
  51. <<_:O/binary, 14:4/integer, V0:4/integer,
  52. 2:2/integer, V1:6/integer,
  53. 2:2/integer, V2:6/integer, _/binary>> ->
  54. B = <<V0:4/integer, V1:6/integer, V2:6/integer>>,
  55. <<V:16/integer>> = B,
  56. loose_decode(Bin, O+3, [V | Acc]);
  57. <<_:O/binary, 30:5/integer, V0:3/integer,
  58. 2:2/integer, V1:6/integer,
  59. 2:2/integer, V2:6/integer,
  60. 2:2/integer, V3:6/integer, _/binary>> ->
  61. B = <<0:11/integer, V0:3/integer, V1:6/integer,
  62. V2:6/integer, V3:6/integer>>,
  63. <<V:32/integer>> = B,
  64. loose_decode(Bin, O+4, [V | Acc]);
  65. <<_:O/binary, _:8/integer, R/binary>> ->
  66. % Broken lead or continuation byte. Discard first
  67. % byte and all broken continuations. Replace the
  68. % whole mess with a replacement code point.
  69. T = 1 + count_continuation_bytes(R, 0),
  70. loose_decode(Bin, O+T, [16#FFFD | Acc])
  71. end.
  72. count_continuation_bytes(R, O) ->
  73. case R of
  74. <<_:O/binary, 2:2/integer, _:6/integer, _/binary>> ->
  75. count_continuation_bytes(R, O+1);
  76. _ ->
  77. O
  78. end.
  79. try_combining([], Acc) ->
  80. lists:reverse(Acc);
  81. try_combining([H, L | Rest], Acc) when H >= 16#D800, H =< 16#DFFF,
  82. L >= 16#D800, L =< 16#DFFF ->
  83. Bin = <<H:16/big-unsigned-integer, L:16/big-unsigned-integer>>,
  84. try
  85. [C] = xmerl_ucs:from_utf16be(Bin),
  86. try_combining(Rest, [C | Acc])
  87. catch _:_ ->
  88. try_combining(Rest, [L, H | Acc])
  89. end;
  90. try_combining([C | Rest], Acc) ->
  91. try_combining(Rest, [C | Acc]).
  92. replace_garbage([], Acc) ->
  93. lists:reverse(Acc);
  94. replace_garbage([C | Rest], Acc) ->
  95. case xmerl_ucs:is_unicode(C) of
  96. true -> replace_garbage(Rest, [C | Acc]);
  97. false -> replace_garbage(Rest, [16#FFFD | Acc])
  98. end.