erlang自定义二进制协议
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

518 lines
20 KiB

  1. -module(protoParse).
  2. -export([
  3. parseParse/1
  4. , parseFile/1
  5. ]).
  6. -define(p_anything, true).
  7. -define(p_charclass, true).
  8. -define(p_choose, true).
  9. -define(p_label, true).
  10. -define(p_not, true).
  11. -define(p_one_or_more, true).
  12. -define(p_optional, true).
  13. -define(p_scan, true).
  14. -define(p_seq, true).
  15. -define(p_string, true).
  16. -define(p_zero_or_more, true).
  17. -spec parseFile(file:name()) -> any().
  18. parseFile(Filename) ->
  19. case file:read_file(Filename) of
  20. {ok, Bin} ->
  21. parseParse(Bin);
  22. Err -> Err
  23. end.
  24. -spec parseParse(binary() | list()) -> any().
  25. parseParse(List) when is_list(List) ->
  26. parseParse(unicode:characters_to_binary(List));
  27. parseParse(Input) when is_binary(Input) ->
  28. setup_memo(),
  29. Result = case 'all'(Input, {{line, 1}, {column, 1}}) of
  30. {AST, <<>>, _Index} ->
  31. AST;
  32. Any ->
  33. Any
  34. end,
  35. release_memo(),
  36. Result.
  37. -spec 'all'(input(), index()) -> parse_result().
  38. 'all'(Input, Index) ->
  39. p(Input, Index, 'all', fun(I, D) ->
  40. (p_seq([fun 'blank0'/2, p_zero_or_more(p_seq([p_choose([fun 'type'/2, fun 'protocol'/2]), fun 'blank0'/2]))]))(I, D) end, fun(Node, _Idx) ->
  41. [_ | [T]] = Node,
  42. DataList = [H || [H | _] <- T],
  43. DataList
  44. end).
  45. -spec 'protocol'(input(), index()) -> parse_result().
  46. 'protocol'(Input, Index) ->
  47. p(Input, Index, 'protocol', fun(I, D) ->
  48. (p_seq([p_label('name', fun 'name'/2), fun 'blanks'/2, p_label('tag', fun 'tag'/2), fun 'blank0'/2, p_string(<<"{">>), fun 'blank0'/2, p_label('sub', p_zero_or_more(p_seq([fun 'subproto'/2, fun 'blank0'/2]))), p_string(<<"}">>)]))(I, D) end, fun(Node, _Idx) ->
  49. Name = binary_to_list(iolist_to_binary(proplists:get_value(name, Node))),
  50. Tag = list_to_integer(binary_to_list(iolist_to_binary(proplists:get_value(tag, Node)))),
  51. SubList = proplists:get_value(sub, Node),
  52. SubProtoList = [Head || [Head, _] <- SubList],
  53. {protocol, Name, Tag, SubProtoList}
  54. end).
  55. -spec 'subproto'(input(), index()) -> parse_result().
  56. 'subproto'(Input, Index) ->
  57. p(Input, Index, 'subproto', fun(I, D) ->
  58. (p_seq([p_label('parta', p_choose([p_string(<<"request">>), p_string(<<"response">>)])), fun 'blanks'/2, p_label('partb', p_choose([fun 'typename'/2, fun 'struct'/2]))]))(I, D) end, fun(Node, _Idx) ->
  59. Parta = binary_to_list(iolist_to_binary(proplists:get_value(parta, Node))),
  60. Partb = proplists:get_value(partb, Node),
  61. Partb2 =
  62. case is_tuple(Partb) of
  63. false -> Partb;
  64. true -> element(1, Partb)
  65. end,
  66. {Parta, Partb2}
  67. end).
  68. -spec 'type'(input(), index()) -> parse_result().
  69. 'type'(Input, Index) ->
  70. p(Input, Index, 'type', fun(I, D) ->
  71. (p_seq([p_string(<<".">>), p_label('name', fun 'name'/2), fun 'blank0'/2, p_label('struct', fun 'struct'/2)]))(I, D) end, fun(Node, _Idx) ->
  72. Name = binary_to_list(iolist_to_binary(proplists:get_value(name, Node))),
  73. Struct = proplists:get_value(struct, Node),
  74. Tag = get(messageid),
  75. put(messageid, Tag + 1),
  76. {Name, Tag, Struct}
  77. end).
  78. -spec 'struct'(input(), index()) -> parse_result().
  79. 'struct'(Input, Index) ->
  80. p(Input, Index, 'struct', fun(I, D) ->
  81. (p_seq([p_string(<<"{">>), fun 'blank0'/2, p_zero_or_more(p_seq([p_choose([fun 'field'/2, fun 'type'/2]), fun 'blank0'/2])), p_string(<<"}">>)]))(I, D) end, fun(Node, _Idx) ->
  82. [_, _, List, _] = Node,
  83. [H || [H | _] <- List]
  84. end).
  85. -spec 'field'(input(), index()) -> parse_result().
  86. 'field'(Input, Index) ->
  87. p(Input, Index, 'field', fun(I, D) ->
  88. (p_seq([p_label('name', fun 'name'/2), fun 'blanks'/2, p_string(<<":">>), fun 'blank0'/2, p_label('isarray', p_zero_or_more(p_string(<<"*">>))), p_label('datatype', fun 'typename'/2), p_label('key', p_zero_or_more(fun 'mainkey'/2))]))(I, D) end, fun(Node, _Idx) ->
  89. Name = binary_to_list(iolist_to_binary(proplists:get_value(name, Node))),
  90. IsArray =
  91. case proplists:get_value(isarray, Node) =:= [<<"*">>] of
  92. true ->
  93. true;
  94. false ->
  95. false
  96. end,
  97. {FieldType, DataType} =
  98. case proplists:get_value(datatype, Node) of
  99. {"boolean", _} ->
  100. case IsArray of
  101. true ->
  102. {4, 1};
  103. _ ->
  104. {1, 1}
  105. end;
  106. {"integer", _} ->
  107. case IsArray of
  108. true ->
  109. {4, 2};
  110. _ ->
  111. {2, 2}
  112. end;
  113. {"string", _} ->
  114. case IsArray of
  115. true ->
  116. {4, 3};
  117. _ ->
  118. {3, 3}
  119. end;
  120. {Other, SubName} ->
  121. case IsArray of
  122. true ->
  123. {4, Other ++ SubName};
  124. _ ->
  125. {5, Other ++ SubName}
  126. end
  127. end,
  128. {Name, FieldType, DataType}
  129. end).
  130. -spec 'eof'(input(), index()) -> parse_result().
  131. 'eof'(Input, Index) ->
  132. p(Input, Index, 'eof', fun(I, D) -> (p_not(p_string(<<".">>)))(I, D) end, fun(Node, _Idx) -> Node end).
  133. -spec 'newline'(input(), index()) -> parse_result().
  134. 'newline'(Input, Index) ->
  135. p(Input, Index, 'newline', fun(I, D) ->
  136. (p_seq([p_optional(p_charclass(<<"[\r]">>)), p_charclass(<<"[\n]">>)]))(I, D) end, fun(Node, _Idx) -> Node end).
  137. -spec 'line_comment'(input(), index()) -> parse_result().
  138. 'line_comment'(Input, Index) ->
  139. p(Input, Index, 'line_comment',
  140. fun(I, D) ->
  141. (p_seq([p_string(<<"#">>), p_label('errorcode', p_zero_or_more(p_seq([p_string(<<"$errcode">>), fun 'blanks'/2, fun 'name'/2, fun 'blanks'/2, fun 'words'/2]))), p_zero_or_more(p_seq([p_not(fun 'newline'/2), p_anything()])), p_choose([fun 'newline'/2, fun 'eof'/2])]))(I, D)
  142. end,
  143. fun(Node, _Idx) ->
  144. case proplists:get_value(errorcode, Node) of
  145. [[_ErrorCode, _Space, Tail, _Space1, Comment]] ->
  146. ErrName = binary_to_list(iolist_to_binary(Tail)),
  147. ComDesc = (iolist_to_binary(Comment)),
  148. ErrNameList = erlang:get(errorname),
  149. case ErrName =/= [] andalso lists:keyfind(ErrName, 1, ErrNameList) == false of
  150. true ->
  151. ErrCode = erlang:get(errorid),
  152. erlang:put(errorname, [{ErrName, ErrCode, ComDesc} | ErrNameList]),
  153. erlang:put(errorid, ErrCode + 1);
  154. _ ->
  155. skip
  156. end;
  157. _AAa ->
  158. skip
  159. end,
  160. Node
  161. end).
  162. -spec 'blank'(input(), index()) -> parse_result().
  163. 'blank'(Input, Index) ->
  164. p(Input, Index, 'blank', fun(I, D) ->
  165. (p_choose([p_charclass(<<"[\s\t]">>), fun 'newline'/2, fun 'errorcode'/2, fun 'line_comment'/2]))(I, D) end, fun(Node, _Idx) ->
  166. Node end).
  167. -spec 'blank0'(input(), index()) -> parse_result().
  168. 'blank0'(Input, Index) ->
  169. p(Input, Index, 'blank0', fun(I, D) -> (p_zero_or_more(fun 'blank'/2))(I, D) end, fun(Node, _Idx) -> Node end).
  170. -spec 'blanks'(input(), index()) -> parse_result().
  171. 'blanks'(Input, Index) ->
  172. p(Input, Index, 'blanks', fun(I, D) -> (p_one_or_more(fun 'blank'/2))(I, D) end, fun(Node, _Idx) -> Node end).
  173. -spec 'alpha'(input(), index()) -> parse_result().
  174. 'alpha'(Input, Index) ->
  175. p(Input, Index, 'alpha', fun(I, D) ->
  176. (p_choose([p_charclass(<<"[a-z]">>), p_charclass(<<"[A-Z]">>), p_string(<<"_">>)]))(I, D) end, fun(Node, _Idx) ->
  177. Node end).
  178. -spec 'str'(input(), index()) -> parse_result().
  179. 'str'(Input, Index) ->
  180. p(Input, Index, 'str',
  181. fun(I, D) ->
  182. (p_seq([p_not(fun 'newline'/2), p_charclass(<<".">>)]))(I, D)
  183. end,
  184. fun(Node, _Idx) ->
  185. Node
  186. end).
  187. -spec 'alnum'(input(), index()) -> parse_result().
  188. 'alnum'(Input, Index) ->
  189. p(Input, Index, 'alnum', fun(I, D) ->
  190. (p_choose([fun 'alpha'/2, p_charclass(<<"[0-9]">>)]))(I, D) end, fun(Node, _Idx) -> Node end).
  191. -spec 'word'(input(), index()) -> parse_result().
  192. 'word'(Input, Index) ->
  193. p(Input, Index, 'word', fun(I, D) ->
  194. (p_seq([fun 'alpha'/2, p_zero_or_more(fun 'alnum'/2)]))(I, D) end, fun(Node, _Idx) -> Node end).
  195. -spec 'words'(input(), index()) -> parse_result().
  196. 'words'(Input, Index) ->
  197. p(Input, Index, 'words', fun(I, D) ->
  198. (p_seq([p_choose([p_zero_or_more(fun 'str'/2), p_zero_or_more(fun 'word'/2)])]))(I, D) end, fun(Node, _Idx) ->
  199. Node end).
  200. -spec 'name'(input(), index()) -> parse_result().
  201. 'name'(Input, Index) ->
  202. p(Input, Index, 'name', fun(I, D) -> (fun 'word'/2)(I, D) end, fun(Node, _Idx) -> Node end).
  203. -spec 'typename'(input(), index()) -> parse_result().
  204. 'typename'(Input, Index) ->
  205. p(Input, Index, 'typename', fun(I, D) ->
  206. (p_seq([fun 'word'/2, p_zero_or_more(p_seq([p_string(<<".">>), fun 'word'/2]))]))(I, D) end, fun(Node, _Idx) ->
  207. [Head, List] = Node,
  208. SubName =
  209. case List of
  210. [] -> "";
  211. _ ->
  212. List2 = [[Dot, iolist_to_binary(Word)] || [Dot, Word] <- List],
  213. binary_to_list(list_to_binary(lists:append(List2)))
  214. end,
  215. {binary_to_list(iolist_to_binary(Head)), SubName}
  216. end).
  217. -spec 'tag'(input(), index()) -> parse_result().
  218. 'tag'(Input, Index) ->
  219. p(Input, Index, 'tag', fun(I, D) -> (p_one_or_more(p_charclass(<<"[0-9]">>)))(I, D) end, fun(Node, _Idx) ->
  220. Node end).
  221. -spec 'mainkey'(input(), index()) -> parse_result().
  222. 'mainkey'(Input, Index) ->
  223. p(Input, Index, 'mainkey', fun(I, D) ->
  224. (p_seq([p_string(<<"(">>), fun 'blank0'/2, p_label('name', fun 'name'/2), fun 'blank0'/2, p_string(<<")">>)]))(I, D) end, fun(Node, _Idx) ->
  225. proplists:get_value(name, Node)
  226. end).
  227. -file("peg_includes.hrl", 1).
  228. -type index() :: {{line, pos_integer()}, {column, pos_integer()}}.
  229. -type input() :: binary().
  230. -type parse_failure() :: {fail, term()}.
  231. -type parse_success() :: {term(), input(), index()}.
  232. -type parse_result() :: parse_failure() | parse_success().
  233. -type parse_fun() :: fun((input(), index()) -> parse_result()).
  234. -type xform_fun() :: fun((input(), index()) -> term()).
  235. -spec p(input(), index(), atom(), parse_fun(), xform_fun()) -> parse_result().
  236. p(Inp, StartIndex, Name, ParseFun, TransformFun) ->
  237. case get_memo(StartIndex, Name) of % See if the current reduction is memoized
  238. {ok, Memo} -> % If it is, return the stored result
  239. Memo;
  240. _ -> % If not, attempt to parse
  241. Result = case ParseFun(Inp, StartIndex) of
  242. {fail, _} = Failure -> % If it fails, memoize the failure
  243. Failure;
  244. {Match, InpRem, NewIndex} -> % If it passes, transform and memoize the result.
  245. Transformed = TransformFun(Match, StartIndex),
  246. {Transformed, InpRem, NewIndex}
  247. end,
  248. memoize(StartIndex, Name, Result),
  249. Result
  250. end.
  251. -spec setup_memo() -> ets:tid().
  252. setup_memo() ->
  253. put({parse_memo_table, ?MODULE},
  254. ets:new(?MODULE, [set])).
  255. -spec release_memo() -> true.
  256. release_memo() ->
  257. ets:delete(memo_table_name()).
  258. -spec memoize(index(), atom(), parse_result()) -> true.
  259. memoize(Index, Name, Result) ->
  260. Memo = case ets:lookup(memo_table_name(), Index) of
  261. [] ->
  262. [];
  263. [{Index, Plist}] ->
  264. Plist
  265. end,
  266. ets:insert(memo_table_name(), {Index, [{Name, Result} | Memo]}).
  267. -spec get_memo(index(), atom()) -> {ok, term()} | {error, not_found}.
  268. get_memo(Index, Name) ->
  269. case ets:lookup(memo_table_name(), Index) of
  270. [] ->
  271. {error, not_found};
  272. [{Index, Plist}] ->
  273. case proplists:lookup(Name, Plist) of
  274. {Name, Result} ->
  275. {ok, Result};
  276. _ ->
  277. {error, not_found}
  278. end
  279. end.
  280. -spec memo_table_name() -> ets:tid().
  281. memo_table_name() ->
  282. get({parse_memo_table, ?MODULE}).
  283. -ifdef(p_eof).
  284. -spec p_eof() -> parse_fun().
  285. p_eof() ->
  286. fun(<<>>, Index) -> {eof, [], Index};
  287. (_, Index) -> {fail, {expected, eof, Index}} end.
  288. -endif.
  289. -ifdef(p_optional).
  290. -spec p_optional(parse_fun()) -> parse_fun().
  291. p_optional(P) ->
  292. fun(Input, Index) ->
  293. case P(Input, Index) of
  294. {fail, _} ->
  295. {[], Input, Index};
  296. {_, _, _} = Success ->
  297. Success
  298. end
  299. end.
  300. -endif.
  301. -ifdef(p_not).
  302. -spec p_not(parse_fun()) -> parse_fun().
  303. p_not(P) ->
  304. fun(Input, Index) ->
  305. case P(Input, Index) of
  306. {fail, _} ->
  307. {[], Input, Index};
  308. {Result, _, _} -> {fail, {expected, {no_match, Result}, Index}}
  309. end
  310. end.
  311. -endif.
  312. -ifdef(p_assert).
  313. -spec p_assert(parse_fun()) -> parse_fun().
  314. p_assert(P) ->
  315. fun(Input, Index) ->
  316. case P(Input, Index) of
  317. {fail, _} = Failure -> Failure;
  318. _ -> {[], Input, Index}
  319. end
  320. end.
  321. -endif.
  322. -ifdef(p_seq).
  323. -spec p_seq([parse_fun()]) -> parse_fun().
  324. p_seq(P) ->
  325. fun(Input, Index) ->
  326. p_all(P, Input, Index, [])
  327. end.
  328. -spec p_all([parse_fun()], input(), index(), [term()]) -> parse_result().
  329. p_all([], Inp, Index, Accum) -> {lists:reverse(Accum), Inp, Index};
  330. p_all([P | Parsers], Inp, Index, Accum) ->
  331. case P(Inp, Index) of
  332. {fail, _} = Failure -> Failure;
  333. {Result, InpRem, NewIndex} -> p_all(Parsers, InpRem, NewIndex, [Result | Accum])
  334. end.
  335. -endif.
  336. -ifdef(p_choose).
  337. -spec p_choose([parse_fun()]) -> parse_fun().
  338. p_choose(Parsers) ->
  339. fun(Input, Index) ->
  340. p_attempt(Parsers, Input, Index, none)
  341. end.
  342. -spec p_attempt([parse_fun()], input(), index(), none | parse_failure()) -> parse_result().
  343. p_attempt([], _Input, _Index, Failure) -> Failure;
  344. p_attempt([P | Parsers], Input, Index, FirstFailure) ->
  345. case P(Input, Index) of
  346. {fail, _} = Failure ->
  347. case FirstFailure of
  348. none -> p_attempt(Parsers, Input, Index, Failure);
  349. _ -> p_attempt(Parsers, Input, Index, FirstFailure)
  350. end;
  351. Result -> Result
  352. end.
  353. -endif.
  354. -ifdef(p_zero_or_more).
  355. -spec p_zero_or_more(parse_fun()) -> parse_fun().
  356. p_zero_or_more(P) ->
  357. fun(Input, Index) ->
  358. p_scan(P, Input, Index, [])
  359. end.
  360. -endif.
  361. -ifdef(p_one_or_more).
  362. -spec p_one_or_more(parse_fun()) -> parse_fun().
  363. p_one_or_more(P) ->
  364. fun(Input, Index) ->
  365. Result = p_scan(P, Input, Index, []),
  366. case Result of
  367. {[_ | _], _, _} ->
  368. Result;
  369. _ ->
  370. {fail, {expected, Failure, _}} = P(Input, Index),
  371. {fail, {expected, {at_least_one, Failure}, Index}}
  372. end
  373. end.
  374. -endif.
  375. -ifdef(p_label).
  376. -spec p_label(atom(), parse_fun()) -> parse_fun().
  377. p_label(Tag, P) ->
  378. fun(Input, Index) ->
  379. case P(Input, Index) of
  380. {fail, _} = Failure ->
  381. Failure;
  382. {Result, InpRem, NewIndex} ->
  383. {{Tag, Result}, InpRem, NewIndex}
  384. end
  385. end.
  386. -endif.
  387. -ifdef(p_scan).
  388. -spec p_scan(parse_fun(), input(), index(), [term()]) -> {[term()], input(), index()}.
  389. p_scan(_, <<>>, Index, Accum) -> {lists:reverse(Accum), <<>>, Index};
  390. p_scan(P, Inp, Index, Accum) ->
  391. case P(Inp, Index) of
  392. {fail, _} -> {lists:reverse(Accum), Inp, Index};
  393. {Result, InpRem, NewIndex} -> p_scan(P, InpRem, NewIndex, [Result | Accum])
  394. end.
  395. -endif.
  396. -ifdef(p_string).
  397. -spec p_string(binary()) -> parse_fun().
  398. p_string(S) ->
  399. Length = erlang:byte_size(S),
  400. fun(Input, Index) ->
  401. try
  402. <<S:Length/binary, Rest/binary>> = Input,
  403. {S, Rest, p_advance_index(S, Index)}
  404. catch
  405. error:{badmatch, _} -> {fail, {expected, {string, S}, Index}}
  406. end
  407. end.
  408. -endif.
  409. -ifdef(p_anything).
  410. -spec p_anything() -> parse_fun().
  411. p_anything() ->
  412. fun(<<>>, Index) -> {fail, {expected, any_character, Index}};
  413. (Input, Index) when is_binary(Input) ->
  414. <<C/utf8, Rest/binary>> = Input,
  415. {<<C/utf8>>, Rest, p_advance_index(<<C/utf8>>, Index)}
  416. end.
  417. -endif.
  418. -ifdef(p_charclass).
  419. -spec p_charclass(string() | binary()) -> parse_fun().
  420. p_charclass(Class) ->
  421. {ok, RE} = re:compile(Class, [unicode, dotall]),
  422. fun(Inp, Index) ->
  423. case re:run(Inp, RE, [anchored]) of
  424. {match, [{0, Length} | _]} ->
  425. {Head, Tail} = erlang:split_binary(Inp, Length),
  426. {Head, Tail, p_advance_index(Head, Index)};
  427. _ -> {fail, {expected, {character_class, binary_to_list(Class)}, Index}}
  428. end
  429. end.
  430. -endif.
  431. -ifdef(p_regexp).
  432. -spec p_regexp(binary()) -> parse_fun().
  433. p_regexp(Regexp) ->
  434. {ok, RE} = re:compile(Regexp, [unicode, dotall, anchored]),
  435. fun(Inp, Index) ->
  436. case re:run(Inp, RE) of
  437. {match, [{0, Length} | _]} ->
  438. {Head, Tail} = erlang:split_binary(Inp, Length),
  439. {Head, Tail, p_advance_index(Head, Index)};
  440. _ -> {fail, {expected, {regexp, binary_to_list(Regexp)}, Index}}
  441. end
  442. end.
  443. -endif.
  444. -ifdef(line).
  445. -spec line(index() | term()) -> pos_integer() | undefined.
  446. line({{line, L}, _}) -> L;
  447. line(_) -> undefined.
  448. -endif.
  449. -ifdef(column).
  450. -spec column(index() | term()) -> pos_integer() | undefined.
  451. column({_, {column, C}}) -> C;
  452. column(_) -> undefined.
  453. -endif.
  454. -spec p_advance_index(input() | unicode:charlist() | pos_integer(), index()) -> index().
  455. p_advance_index(MatchedInput, Index) when is_list(MatchedInput) orelse is_binary(MatchedInput) -> % strings
  456. lists:foldl(fun p_advance_index/2, Index, unicode:characters_to_list(MatchedInput));
  457. p_advance_index(MatchedInput, Index) when is_integer(MatchedInput) -> % single characters
  458. {{line, Line}, {column, Col}} = Index,
  459. case MatchedInput of
  460. $\n -> {{line, Line + 1}, {column, 1}};
  461. _ -> {{line, Line}, {column, Col + 1}}
  462. end.