|
|
@ -6,24 +6,28 @@ |
|
|
|
]). |
|
|
|
|
|
|
|
main(Args) -> |
|
|
|
[SNFile, WriteDir] = Args, |
|
|
|
case file:open(SNFile, [read, raw, binary, {read_ahead, 65536}, {'encoding', 'utf8'}]) of |
|
|
|
[SWFile, WriteDir] = Args, |
|
|
|
case file:open(SWFile, [read, raw, binary, {read_ahead, 65536}, {encoding, utf8}]) of |
|
|
|
{ok, IoDevice} -> |
|
|
|
{Goto, Output} = dealEveryLine(IoDevice, _Goto=#{0 => #{}}, _Output=#{}, _State=0), |
|
|
|
{Goto, Output} = dealEveryLine(IoDevice, _Goto = #{0 => #{}}, _Output = #{}, _State = 0), |
|
|
|
Failure = genFailure(Goto), |
|
|
|
genErl(WriteDir, Goto, Failure, Output); |
|
|
|
_Err -> |
|
|
|
io:format("genAcs open the file:~p error ~p~n", [SNFile, _Err]) |
|
|
|
io:format("genAcs open the file:~p error ~p~n", [SWFile, _Err]) |
|
|
|
end. |
|
|
|
|
|
|
|
dealEveryLine(IoDevice, Goto, Output, MaxState) -> |
|
|
|
case file:read_line(IoDevice) of |
|
|
|
{ok, DataStr} -> |
|
|
|
%% io:format("IMY*********** ~w ~n", [DataStr]), |
|
|
|
BinStr = binary:part(DataStr, 0, byte_size(DataStr) - 1), |
|
|
|
{NewGoto, NewState, NewMaxState} = addPattern(BinStr, Goto, 0, MaxState), |
|
|
|
NewOutput = Output#{NewState => BinStr}, |
|
|
|
dealEveryLine(IoDevice, NewGoto, NewOutput, NewMaxState); |
|
|
|
case BinStr =/= <<>> of |
|
|
|
true -> |
|
|
|
{NewGoto, NewState, NewMaxState} = addPattern(BinStr, Goto, 0, MaxState), |
|
|
|
NewOutput = Output#{NewState => BinStr}, |
|
|
|
dealEveryLine(IoDevice, NewGoto, NewOutput, NewMaxState); |
|
|
|
_ -> |
|
|
|
dealEveryLine(IoDevice, Goto, Output, MaxState) |
|
|
|
end; |
|
|
|
eof -> |
|
|
|
{Goto, Output}; |
|
|
|
_Err -> |
|
|
@ -33,15 +37,20 @@ dealEveryLine(IoDevice, Goto, Output, MaxState) -> |
|
|
|
%% 从字符串模式列表构建ac搜索树 |
|
|
|
genTree(BinStrList) -> |
|
|
|
%% 先构造 goto and output table |
|
|
|
{Goto, Output} = genGotoOutput(BinStrList, _Goto=#{0 => #{}}, _Output=#{}, _State=0), |
|
|
|
{Goto, Output} = genGotoOutput(BinStrList, _Goto = #{0 => #{}}, _Output = #{}, _State = 0), |
|
|
|
%% 然后构造 failure table |
|
|
|
Failure = genFailure(Goto), |
|
|
|
{Goto, Failure, Output}. |
|
|
|
|
|
|
|
genGotoOutput([BinStr |Tail], Goto, Output, MaxState) -> |
|
|
|
{NewGoto, NewState, NewMaxState} = addPattern(BinStr, Goto, 0, MaxState), |
|
|
|
NewOutput = Output#{NewState => BinStr}, |
|
|
|
genGotoOutput(Tail, NewGoto, NewOutput, NewMaxState); |
|
|
|
genGotoOutput([BinStr | Tail], Goto, Output, MaxState) -> |
|
|
|
case BinStr =/= <<>> of |
|
|
|
true -> |
|
|
|
{NewGoto, NewState, NewMaxState} = addPattern(BinStr, Goto, 0, MaxState), |
|
|
|
NewOutput = Output#{NewState => BinStr}, |
|
|
|
genGotoOutput(Tail, NewGoto, NewOutput, NewMaxState); |
|
|
|
_ -> |
|
|
|
genGotoOutput(Tail, Goto, Output, MaxState) |
|
|
|
end; |
|
|
|
genGotoOutput([], Goto, Output, _MaxState) -> |
|
|
|
{Goto, Output}. |
|
|
|
|
|
|
@ -60,55 +69,55 @@ addPattern(<<>>, Goto, State, MaxState) -> |
|
|
|
|
|
|
|
genFailure(#{0 := Node} = Goto) -> |
|
|
|
States = maps:values(Node), |
|
|
|
genFailure(States, Goto, _Failure=#{}). |
|
|
|
genFailure(States, Goto, _Failure = #{}). |
|
|
|
|
|
|
|
%% 构造 failure with bfs搜索 |
|
|
|
genFailure([], _Goto, Failure) -> |
|
|
|
Failure; |
|
|
|
genFailure([State|Tail], Goto, Failure) -> |
|
|
|
genFailure([State | Tail], Goto, Failure) -> |
|
|
|
#{State := Node} = Goto, |
|
|
|
%% find the starting point: the parent's failure node |
|
|
|
FailureState = maps:get(State, Failure, 0), |
|
|
|
FailState = maps:get(State, Failure, 0), |
|
|
|
|
|
|
|
%% children |
|
|
|
Kvs = maps:to_list(Node), |
|
|
|
|
|
|
|
%% find failure node for all children |
|
|
|
NewFailure = genFailureInner(Kvs, FailureState, Goto, Failure), |
|
|
|
NewFailure = genFailureInner(Kvs, FailState, Goto, Failure), |
|
|
|
|
|
|
|
%% add children states to the queue |
|
|
|
NewQueue = Tail ++ maps:values(Node), |
|
|
|
|
|
|
|
genFailure(NewQueue, Goto, NewFailure). |
|
|
|
genFailure(NewQueue, Goto, NewFailure); |
|
|
|
genFailure([], _Goto, Failure) -> |
|
|
|
Failure. |
|
|
|
|
|
|
|
|
|
|
|
%% 为节点构造失败指针 |
|
|
|
%% @param FailureState 是当前节点的失败指针 |
|
|
|
genFailureInner([], _FailureState, _Goto, Failure) -> |
|
|
|
%% @param FailState 是当前节点的失败指针 |
|
|
|
genFailureInner([], _FailState, _Goto, Failure) -> |
|
|
|
Failure; |
|
|
|
genFailureInner([{Word, State}|Tail], FailureState, Goto, Failure) -> |
|
|
|
NewFailure = findFailureNode(Word, State, FailureState, Goto, Failure), |
|
|
|
genFailureInner(Tail, FailureState, Goto, NewFailure). |
|
|
|
genFailureInner([{Word, State} | Tail], FailState, Goto, Failure) -> |
|
|
|
NewFailure = findFailureNode(Word, State, FailState, Goto, Failure), |
|
|
|
genFailureInner(Tail, FailState, Goto, NewFailure). |
|
|
|
|
|
|
|
%% 为某个儿子节点构造失败指针 |
|
|
|
findFailureNode(Word, State, FailureState, Goto, Failure) -> |
|
|
|
#{FailureState := Node} = Goto, |
|
|
|
findFailureNode(Word, State, FailState, Goto, Failure) -> |
|
|
|
#{FailState := Node} = Goto, |
|
|
|
case Node of |
|
|
|
#{Word := TheFailureState} -> |
|
|
|
#{Word := TheFailState} -> |
|
|
|
%% 找到最近的失败节点的儿子节点拥有当前儿子节点的值,查找成功 |
|
|
|
Failure#{State => TheFailureState}; |
|
|
|
Failure#{State => TheFailState}; |
|
|
|
_ -> |
|
|
|
case FailureState =:= 0 of |
|
|
|
case FailState =:= 0 of |
|
|
|
true -> %% 找不到,而且已经到了根节点,查找失败 |
|
|
|
Failure; |
|
|
|
_ -> %% 找不到但是还没到根节点,继续往上找 |
|
|
|
NewFailureState = maps:get(FailureState, Failure, 0), |
|
|
|
findFailureNode(Word, State, NewFailureState, Goto, Failure) |
|
|
|
NewFailState = maps:get(FailState, Failure, 0), |
|
|
|
findFailureNode(Word, State, NewFailState, Goto, Failure) |
|
|
|
end |
|
|
|
end. |
|
|
|
|
|
|
|
genHead() -> |
|
|
|
<<"-module(acsTrees).\n\n-export([goto/1, failure/1, output/1]).\n\n">>. |
|
|
|
<<"-module(acsTrees).\n\n-compile([deterministic, no_line_info]).\n\n-export([goto/1, failOut/1]).\n\n">>. |
|
|
|
|
|
|
|
genGoto(Goto, StrAcc) -> |
|
|
|
Kvs = maps:to_list(Goto), |
|
|
@ -116,47 +125,51 @@ genGoto(Goto, StrAcc) -> |
|
|
|
doGenGoto(SortKvs, StrAcc). |
|
|
|
|
|
|
|
doGenGoto([], StrAcc) -> |
|
|
|
StrAcc; |
|
|
|
<<StrAcc/binary, "goto(_) -> undefined.\n\n">>; |
|
|
|
doGenGoto([{K, V}], StrAcc) -> |
|
|
|
<<StrAcc/binary, "goto(", (integer_to_binary(K))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [V]))/binary, ".\n\n">>; |
|
|
|
case maps:size(V) > 0 of |
|
|
|
true -> |
|
|
|
<<StrAcc/binary, "goto(", (integer_to_binary(K))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [V]))/binary, ";\ngoto(_) -> undefined.\n\n">>; |
|
|
|
_ -> |
|
|
|
<<StrAcc/binary, "goto(_) -> undefined.\n\n">> |
|
|
|
end; |
|
|
|
doGenGoto([{K, V} | SortKvs], StrAcc) -> |
|
|
|
NewStrAcc = <<StrAcc/binary, "goto(", (integer_to_binary(K))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [V]))/binary, ";\n">>, |
|
|
|
doGenGoto(SortKvs, NewStrAcc). |
|
|
|
|
|
|
|
genFailure(Goto, StrAcc) -> |
|
|
|
Kvs = maps:to_list(Goto), |
|
|
|
SortKvs = lists:sort(Kvs), |
|
|
|
doGenFailure(SortKvs, StrAcc). |
|
|
|
|
|
|
|
doGenFailure([], StrAcc) -> |
|
|
|
StrAcc; |
|
|
|
doGenFailure([{K, V}], StrAcc) -> |
|
|
|
<<StrAcc/binary, "failure(", (integer_to_binary(K))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [V]))/binary, ".\n\n">>; |
|
|
|
doGenFailure([{K, V} | SortKvs], StrAcc) -> |
|
|
|
NewStrAcc = <<StrAcc/binary, "failure(", (integer_to_binary(K))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [V]))/binary, ";\n">>, |
|
|
|
doGenFailure(SortKvs, NewStrAcc). |
|
|
|
|
|
|
|
genOutput(Goto, StrAcc) -> |
|
|
|
Kvs = maps:to_list(Goto), |
|
|
|
SortKvs = lists:sort(Kvs), |
|
|
|
doGenOutput(SortKvs, StrAcc). |
|
|
|
|
|
|
|
doGenOutput([], StrAcc) -> |
|
|
|
StrAcc; |
|
|
|
doGenOutput([{K, V}], StrAcc) -> |
|
|
|
<<StrAcc/binary, "output(", (integer_to_binary(K))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [V]))/binary, ".\n\n">>; |
|
|
|
doGenOutput([{K, V} | SortKvs], StrAcc) -> |
|
|
|
NewStrAcc = <<StrAcc/binary, "output(", (integer_to_binary(K))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [V]))/binary, ";\n">>, |
|
|
|
doGenOutput(SortKvs, NewStrAcc). |
|
|
|
case maps:size(V) > 0 of |
|
|
|
true -> |
|
|
|
NewStrAcc = <<StrAcc/binary, "goto(", (integer_to_binary(K))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [V]))/binary, ";\n">>, |
|
|
|
doGenGoto(SortKvs, NewStrAcc); |
|
|
|
_ -> |
|
|
|
doGenGoto(SortKvs, StrAcc) |
|
|
|
end. |
|
|
|
|
|
|
|
genFailOut([], _Failure, _Output, StrAcc) -> |
|
|
|
<<StrAcc/binary, "\nfailOut(_) -> {0, undefined}.\n\n">>; |
|
|
|
genFailOut([State], Failure, Output, StrAcc) -> |
|
|
|
FailState = maps:get(State, Failure, 0), |
|
|
|
Pattern = maps:get(State, Output, undefined), |
|
|
|
case FailState /= 0 orelse Pattern /= undefined of |
|
|
|
true -> |
|
|
|
<<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [{FailState, Pattern}]))/binary, ";\nfailOut(_) -> {0, undefined}.\n\n">>; |
|
|
|
_ -> |
|
|
|
<<StrAcc/binary, ";\nfailOut(_) -> {0, undefined}.\n\n">> |
|
|
|
end; |
|
|
|
genFailOut([State | SortStates], Failure, Output, StrAcc) -> |
|
|
|
FailState = maps:get(State, Failure, 0), |
|
|
|
Pattern = maps:get(State, Output, undefined), |
|
|
|
case FailState /= 0 orelse Pattern /= undefined of |
|
|
|
true -> |
|
|
|
NewStrAcc = <<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (eFmt:formatBin(<<"~w">>, [{FailState, Pattern}]))/binary, ";\n">>, |
|
|
|
genFailOut(SortStates, Failure, Output, NewStrAcc); |
|
|
|
_ -> |
|
|
|
genFailOut(SortStates, Failure, Output, StrAcc) |
|
|
|
end. |
|
|
|
|
|
|
|
genErl(WriteDir, Goto, Failure, Output) -> |
|
|
|
HeadStr = genHead(), |
|
|
|
GotoStr = genGoto(Goto, HeadStr), |
|
|
|
FailureStr = genFailure(Failure, GotoStr), |
|
|
|
OutputStr = genOutput(Output, FailureStr), |
|
|
|
FailureStr = genFailOut(lists:sort(maps:keys(Goto)), Failure, Output, GotoStr), |
|
|
|
FileName = filename:join([WriteDir, "acsTrees.erl"]), |
|
|
|
file:write_file(FileName, OutputStr). |
|
|
|
file:write_file(FileName, FailureStr). |
|
|
|
|
|
|
|
|
|
|
|
|