|
|
@ -10,8 +10,8 @@ main(Args) -> |
|
|
|
case file:open(SWFile, [read, raw, binary, {read_ahead, 65536}, {encoding, utf8}]) of |
|
|
|
{ok, IoDevice} -> |
|
|
|
{Goto, Output} = dealEveryLine(IoDevice, _Goto = #{0 => #{}}, _Output = #{}, _State = 0), |
|
|
|
Failure = genFailure(Goto), |
|
|
|
genErl(WriteDir, Goto, Failure, Output); |
|
|
|
Fail = genFail(Goto), |
|
|
|
genErl(WriteDir, Goto, Fail, Output); |
|
|
|
_Err -> |
|
|
|
io:format("genAcs open the file:~p error ~p~n", [SWFile, _Err]) |
|
|
|
end. |
|
|
@ -22,7 +22,7 @@ dealEveryLine(IoDevice, Goto, Output, MaxState) -> |
|
|
|
BinStr = binary:part(DataStr, 0, byte_size(DataStr) - 1), |
|
|
|
case BinStr =/= <<>> of |
|
|
|
true -> |
|
|
|
{NewGoto, NewState, NewMaxState} = addPattern(BinStr, Goto, 0, MaxState), |
|
|
|
{NewGoto, NewState, NewMaxState} = addGoto(BinStr, Goto, 0, MaxState), |
|
|
|
NewOutput = Output#{NewState => BinStr}, |
|
|
|
dealEveryLine(IoDevice, NewGoto, NewOutput, NewMaxState); |
|
|
|
_ -> |
|
|
@ -34,18 +34,19 @@ dealEveryLine(IoDevice, Goto, Output, MaxState) -> |
|
|
|
io:format("genAcs read the file error ~p~n", [_Err]) |
|
|
|
end. |
|
|
|
|
|
|
|
%% 从字符串模式列表构建ac搜索树 |
|
|
|
%% 从字符串列表构建ac搜索树 |
|
|
|
genTree(BinStrList) -> |
|
|
|
%% 先构造 goto and output table |
|
|
|
{Goto, Output} = genGotoOutput(BinStrList, _Goto = #{0 => #{}}, _Output = #{}, _State = 0), |
|
|
|
%% 然后构造 failure table |
|
|
|
Failure = genFailure(Goto), |
|
|
|
{Goto, Failure, Output}. |
|
|
|
%% 然后构造 fail table |
|
|
|
Fail = genFail(Goto), |
|
|
|
{Goto, Fail, Output}. |
|
|
|
|
|
|
|
%% 构造 goto and output table |
|
|
|
genGotoOutput([BinStr | Tail], Goto, Output, MaxState) -> |
|
|
|
case BinStr =/= <<>> of |
|
|
|
true -> |
|
|
|
{NewGoto, NewState, NewMaxState} = addPattern(BinStr, Goto, 0, MaxState), |
|
|
|
{NewGoto, NewState, NewMaxState} = addGoto(BinStr, Goto, 0, MaxState), |
|
|
|
NewOutput = Output#{NewState => BinStr}, |
|
|
|
genGotoOutput(Tail, NewGoto, NewOutput, NewMaxState); |
|
|
|
_ -> |
|
|
@ -54,65 +55,67 @@ genGotoOutput([BinStr | Tail], Goto, Output, MaxState) -> |
|
|
|
genGotoOutput([], Goto, Output, _MaxState) -> |
|
|
|
{Goto, Output}. |
|
|
|
|
|
|
|
addPattern(<<Word/utf8, Tail/binary>>, Goto, State, MaxState) -> |
|
|
|
%% 添加Goto 匹配状态转移项 |
|
|
|
addGoto(<<Word/utf8, Tail/binary>>, Goto, State, MaxState) -> |
|
|
|
#{State := Node} = Goto, |
|
|
|
case Node of |
|
|
|
#{Word := NextState} -> |
|
|
|
addPattern(Tail, Goto, NextState, MaxState); |
|
|
|
addGoto(Tail, Goto, NextState, MaxState); |
|
|
|
_ -> |
|
|
|
NewMaxState = MaxState + 1, |
|
|
|
NewNode = Node#{Word => NewMaxState}, |
|
|
|
addPattern(Tail, Goto#{NewMaxState => #{}, State => NewNode}, NewMaxState, NewMaxState) |
|
|
|
addGoto(Tail, Goto#{NewMaxState => #{}, State => NewNode}, NewMaxState, NewMaxState) |
|
|
|
end; |
|
|
|
addPattern(<<>>, Goto, State, MaxState) -> |
|
|
|
addGoto(<<>>, Goto, State, MaxState) -> |
|
|
|
{Goto, State, MaxState}. |
|
|
|
|
|
|
|
genFailure(#{0 := Node} = Goto) -> |
|
|
|
States = maps:values(Node), |
|
|
|
genFailure(States, Goto, _Failure = #{}). |
|
|
|
%% 添加匹配Fail状态转移项 |
|
|
|
genFail(#{0 := Node} = Goto) -> |
|
|
|
genFail(maps:values(Node), Goto, _Fail = #{}). |
|
|
|
|
|
|
|
%% 构造 failure with bfs搜索 |
|
|
|
genFailure([State | Tail], Goto, Failure) -> |
|
|
|
%% 基于bfs搜索构造 Fail |
|
|
|
genFail([State | Tail], Goto, Fail) -> |
|
|
|
#{State := Node} = Goto, |
|
|
|
%% find the starting point: the parent's failure node |
|
|
|
FailState = maps:get(State, Failure, 0), |
|
|
|
|
|
|
|
%% children |
|
|
|
%% 获取父节点的失败节点 |
|
|
|
FailState = maps:get(State, Fail, 0), |
|
|
|
|
|
|
|
%% 子节点 |
|
|
|
Kvs = maps:to_list(Node), |
|
|
|
|
|
|
|
%% find failure node for all children |
|
|
|
NewFailure = genFailureInner(Kvs, FailState, Goto, Failure), |
|
|
|
%% 为子节点查找失败节点 |
|
|
|
NewFail = addFail(Kvs, FailState, Goto, Fail), |
|
|
|
|
|
|
|
%% add children states to the queue |
|
|
|
%% 子节点入队列 |
|
|
|
NewQueue = Tail ++ maps:values(Node), |
|
|
|
|
|
|
|
genFailure(NewQueue, Goto, NewFailure); |
|
|
|
genFailure([], _Goto, Failure) -> |
|
|
|
Failure. |
|
|
|
|
|
|
|
genFail(NewQueue, Goto, NewFail); |
|
|
|
genFail([], _Goto, Fail) -> |
|
|
|
Fail. |
|
|
|
|
|
|
|
%% 为节点构造失败指针 |
|
|
|
%% @param FailState 是当前节点的失败指针 |
|
|
|
genFailureInner([], _FailState, _Goto, Failure) -> |
|
|
|
Failure; |
|
|
|
genFailureInner([{Word, State} | Tail], FailState, Goto, Failure) -pan>> |
|
|
|
NewFailure = findFailureNode(Word, State, FailState, Goto, Failure), |
|
|
|
genFailureInner(Tail, FailState, Goto, NewFailure). |
|
|
|
addFail([{Word, State} | Tail], FailState, Goto, Fail) -> |
|
|
|
NewFail = findFailNode(Word, State, FailState, Goto, Fail), |
|
|
|
addFail(Tail, FailState, Goto, NewFail); |
|
|
|
addFail([], _FailState, _Goto, Fail) -> |
|
|
|
Fail. |
|
|
|
|
|
|
|
%% 为某个儿子节点构造失败指针 |
|
|
|
findFailureNode(Word, State, FailState, Goto, Failure) -> |
|
|
|
findFailNode(Word, State, FailState, Goto, Fail) -> |
|
|
|
#{FailState := Node} = Goto, |
|
|
|
case Node of |
|
|
|
#{Word := TheFailState} -> |
|
|
|
%% 找到最近的失败节点的儿子节点拥有当前儿子节点的值,查找成功 |
|
|
|
Failure#{State => TheFailState}; |
|
|
|
Fail#{State => TheFailState}; |
|
|
|
_ -> |
|
|
|
case FailState =:= 0 of |
|
|
|
true -> %% 找不到,而且已经到了根节点,查找失败 |
|
|
|
Failure; |
|
|
|
_ -> %% 找不到但是还没到根节点,继续往上找 |
|
|
|
NewFailState = maps:get(FailState, Failure, 0), |
|
|
|
findFailureNode(Word, State, NewFailState, Goto, Failure) |
|
|
|
true -> |
|
|
|
%% 找不到,而且已经到了根节点,查找失败 |
|
|
|
Fail; |
|
|
|
_ -> |
|
|
|
%% 找不到但是还没到根节点,继续往上找 |
|
|
|
NewFailState = maps:get(FailState, Fail, 0), |
|
|
|
findFailNode(Word, State, NewFailState, Goto, Fail) |
|
|
|
end |
|
|
|
end. |
|
|
|
|
|
|
@ -142,10 +145,10 @@ doGenGoto([{K, V} | SortKvs], StrAcc) -> |
|
|
|
doGenGoto(SortKvs, StrAcc) |
|
|
|
end. |
|
|
|
|
|
|
|
genFailOut([], _Failure, _Output, StrAcc) -> |
|
|
|
genFailOut([], _Fail, _Output, StrAcc) -> |
|
|
|
<<StrAcc/binary, "\nfailOut(_) -> {0, undefined}.\n\n">>; |
|
|
|
genFailOut([State], Failure, Output, StrAcc) -> |
|
|
|
FailState = maps:get(State, Failure, 0), |
|
|
|
genFailOut([State], Fail, Output, StrAcc) -> |
|
|
|
FailState = maps:get(State, Fail, 0), |
|
|
|
Pattern = maps:get(State, Output, undefined), |
|
|
|
case FailState /= 0 orelse Pattern /= undefined of |
|
|
|
true -> |
|
|
@ -153,20 +156,20 @@ genFailOut([State], Failure, Output, StrAcc) -> |
|
|
|
_ -> |
|
|
|
<<StrAcc/binary, ";\nfailOut(_) -> {0, undefined}.\n\n">> |
|
|
|
end; |
|
|
|
genFailOut([State | SortStates], Failure, Output, StrAcc) -> |
|
|
|
FailState = maps:get(State, Failure, 0), |
|
|
|
genFailOut([State | SortStates], Fail, Output, StrAcc) -> |
|
|
|
FailState = maps:get(State, Fail, 0), |
|
|
|
Pattern = maps:get(State, Output, undefined), |
|
|
|
case FailState /= 0 orelse Pattern /= undefined of |
|
|
|
true -> |
|
|
|
NewStrAcc = <<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailState, Pattern}])))/binary, ";\n">>, |
|
|
|
genFailOut(SortStates, Failure, Output, NewStrAcc); |
|
|
|
genFailOut(SortStates, Fail, Output, NewStrAcc); |
|
|
|
_ -> |
|
|
|
genFailOut(SortStates, Failure, Output, StrAcc) |
|
|
|
genFailOut(SortStates, Fail, Output, StrAcc) |
|
|
|
end. |
|
|
|
|
|
|
|
genErl(WriteDir, Goto, Failure, Output) -> |
|
|
|
genErl(WriteDir, Goto, Fail, Output) -> |
|
|
|
HeadStr = genHead(), |
|
|
|
GotoStr = genGoto(Goto, HeadStr), |
|
|
|
FailureStr = genFailOut(lists:sort(maps:keys(Goto)), Failure, Output, GotoStr), |
|
|
|
FailStr = genFailOut(lists:sort(maps:keys(Goto)), Fail, Output, GotoStr), |
|
|
|
FileName = filename:join([WriteDir, "acsTrees.erl"]), |
|
|
|
file:write_file(FileName, FailureStr). |
|
|
|
file:write_file(FileName, FailStr). |