|
|
@ -12,7 +12,7 @@ main(Args) -> |
|
|
|
[SWFile, WriteDir] -> |
|
|
|
case file:open(SWFile, [read, raw, binary, {read_ahead, 65536}, {encoding, utf8}]) of |
|
|
|
{ok, IoDevice} -> |
|
|
|
{Goto, Output} = dealEverySW(IoDevice, _Goto = #{0 => #{}}, _Output = #{}, _State = 0), |
|
|
|
{Goto, Output} = dealEverySW(IoDevice, _Goto = #{0 => #{}}, _Output = #{}, _PtrInx = 0), |
|
|
|
file:close(IoDevice), |
|
|
|
Fail = genFail(Goto), |
|
|
|
genSpw(WriteDir), |
|
|
@ -69,17 +69,23 @@ writeFilter(CurLine, Line, FilterFile, LineMap) -> |
|
|
|
writeFilter(CurLine + 1, Line, FilterFile, LineMap) |
|
|
|
end. |
|
|
|
|
|
|
|
dealEverySW(IoDevice, Goto, Output, MaxState) -> |
|
|
|
dealEverySW(IoDevice, Goto, Output, MaxPtrInx) -> |
|
|
|
case file:read_line(IoDevice) of |
|
|
|
{ok, DataStr} -> |
|
|
|
BinStr = binary:part(DataStr, 0, byte_size(DataStr) - 1), |
|
|
|
BinStr = |
|
|
|
case binary:last(DataStr) of |
|
|
|
10 -> |
|
|
|
binary:part(DataStr, 0, byte_size(DataStr) - 1); |
|
|
|
_ -> |
|
|
|
DataStr |
|
|
|
end, |
|
|
|
case BinStr =/= <<>> of |
|
|
|
true -> |
|
|
|
{NewGoto, NewState, NewMaxState} = addGoto(BinStr, Goto, 0, MaxState), |
|
|
|
NewOutput = Output#{NewState => eAcs:strSize(BinStr, 0)}, |
|
|
|
dealEverySW(IoDevice, NewGoto, NewOutput, NewMaxState); |
|
|
|
{NewGoto, EndPtrInx, NewMaxPtrInx} = addGoto(BinStr, Goto, 0, MaxPtrInx), |
|
|
|
NewOutput = Output#{EndPtrInx => eAcs:strSize(BinStr, 0)}, |
|
|
|
dealEverySW(IoDevice, NewGoto, NewOutput, NewMaxPtrInx); |
|
|
|
_ -> |
|
|
|
dealEverySW(IoDevice, Goto, Output, MaxState) |
|
|
|
dealEverySW(IoDevice, Goto, Output, MaxPtrInx) |
|
|
|
end; |
|
|
|
eof -> |
|
|
|
{Goto, Output}; |
|
|
@ -90,54 +96,54 @@ dealEverySW(IoDevice, Goto, Output, MaxState) -> |
|
|
|
%% 从字符串列表构建ac搜索树 |
|
|
|
genTree(BinStrList) -> |
|
|
|
%% 先构造 goto and output table |
|
|
|
{Goto, Output} = genGotoOutput(BinStrList, _Goto = #{0 => #{}}, _Output = #{}, _State = 0), |
|
|
|
{Goto, Output} = genGotoOutput(BinStrList, _Goto = #{0 => #{}}, _Output = #{}, _PtrInx = 0), |
|
|
|
%% 然后构造 fail table |
|
|
|
Fail = genFail(Goto), |
|
|
|
{Goto, Fail, Output}. |
|
|
|
|
|
|
|
%% 构造 goto and output table |
|
|
|
genGotoOutput([BinStr | Tail], Goto, Output, MaxState) -> |
|
|
|
genGotoOutput([BinStr | Tail], Goto, Output, MaxPtrInx) -> |
|
|
|
case BinStr =/= <<>> of |
|
|
|
true -> |
|
|
|
{NewGoto, NewState, NewMaxState} = addGoto(BinStr, Goto, 0, MaxState), |
|
|
|
NewOutput = Output#{NewState => BinStr}, |
|
|
|
genGotoOutput(Tail, NewGoto, NewOutput, NewMaxState); |
|
|
|
{NewGoto, EndPtrInx, NewMaxPtrInx} = addGoto(BinStr, Goto, 0, MaxPtrInx), |
|
|
|
NewOutput = Output#{EndPtrInx => BinStr}, |
|
|
|
genGotoOutput(Tail, NewGoto, NewOutput, NewMaxPtrInx); |
|
|
|
_ -> |
|
|
|
genGotoOutput(Tail, Goto, Output, MaxState) |
|
|
|
genGotoOutput(Tail, Goto, Output, MaxPtrInx) |
|
|
|
end; |
|
|
|
genGotoOutput([], Goto, Output, _MaxState) -> |
|
|
|
genGotoOutput([], Goto, Output, _MaxPtrInx) -> |
|
|
|
{Goto, Output}. |
|
|
|
|
|
|
|
%% 添加Goto 匹配状态转移项 |
|
|
|
addGoto(<<Word/utf8, Tail/binary>>, Goto, State, MaxState) -> |
|
|
|
#{State := Node} = Goto, |
|
|
|
addGoto(<<Word/utf8, Tail/binary>>, Goto, PtrInx, MaxPtrInx) -> |
|
|
|
#{PtrInx := Node} = Goto, |
|
|
|
case Node of |
|
|
|
#{Word := NextState} -> |
|
|
|
addGoto(Tail, Goto, NextState, MaxState); |
|
|
|
#{Word := NextPtrInx} -> |
|
|
|
addGoto(Tail, Goto, NextPtrInx, MaxPtrInx); |
|
|
|
_ -> |
|
|
|
NewMaxState = MaxState + 1, |
|
|
|
NewNode = Node#{Word => NewMaxState}, |
|
|
|
addGoto(Tail, Goto#{NewMaxState => #{}, State => NewNode}, NewMaxState, NewMaxState) |
|
|
|
NewMaxPtrInx = MaxPtrInx + 1, |
|
|
|
NewNode = Node#{Word => NewMaxPtrInx}, |
|
|
|
addGoto(Tail, Goto#{NewMaxPtrInx => #{}, PtrInx => NewNode}, NewMaxPtrInx, NewMaxPtrInx) |
|
|
|
end; |
|
|
|
addGoto(<<>>, Goto, State, MaxState) -> |
|
|
|
{Goto, State, MaxState}. |
|
|
|
addGoto(<<>>, Goto, PtrInx, MaxPtrInx) -> |
|
|
|
{Goto, PtrInx, MaxPtrInx}. |
|
|
|
|
|
|
|
%% 添加匹配Fail状态转移项 |
|
|
|
genFail(#{0 := Node} = Goto) -> |
|
|
|
genFail(maps:values(Node), Goto, _Fail = #{}). |
|
|
|
|
|
|
|
%% 基于bfs搜索构造 Fail |
|
|
|
genFail([State | Tail], Goto, Fail) -> |
|
|
|
#{State := Node} = Goto, |
|
|
|
genFail([PtrInx | Tail], Goto, Fail) -> |
|
|
|
#{PtrInx := Node} = Goto, |
|
|
|
|
|
|
|
%% 获取父节点的失败节点 |
|
|
|
FailState = maps:get(State, Fail, 0), |
|
|
|
FatherFailPtrInx = maps:get(PtrInx, Fail, 0), |
|
|
|
|
|
|
|
%% 子节点 |
|
|
|
Kvs = maps:to_list(Node), |
|
|
|
ChildKvs = maps:to_list(Node), |
|
|
|
|
|
|
|
%% 为子节点查找失败节点 |
|
|
|
NewFail = addFail(Kvs, FailState, Goto, Fail), |
|
|
|
NewFail = addFail(ChildKvs, FatherFailPtrInx, Goto, Fail), |
|
|
|
|
|
|
|
%% 子节点入队列 |
|
|
|
NewQueue = Tail ++ maps:values(Node), |
|
|
@ -146,29 +152,29 @@ genFail([], _Goto, Fail) -> |
|
|
|
Fail. |
|
|
|
|
|
|
|
%% 为节点构造失败指针 |
|
|
|
%% @param FailState 是当前节点的失败指针 |
|
|
|
addFail([{Word, State} | Tail], FailState, Goto, Fail) -> |
|
|
|
NewFail = findFailNode(Word, State, FailState, Goto, Fail), |
|
|
|
addFail(Tail, FailState, Goto, NewFail); |
|
|
|
addFail([], _FailState, _Goto, Fail) -> |
|
|
|
%% @param FatherFailPtrInx 是当前节点的失败指针 |
|
|
|
addFail([{Word, PtrInx} | Tail], FatherFailPtrInx, Goto, Fail) -> |
|
|
|
NewFail = findFailNode(Word, PtrInx, FatherFailPtrInx, Goto, Fail), |
|
|
|
addFail(Tail, FatherFailPtrInx, Goto, NewFail); |
|
|
|
addFail([], _FatherFailPtrInx, _Goto, Fail) -> |
|
|
|
Fail. |
|
|
|
|
|
|
|
%% 为某个儿子节点构造失败指针 |
|
|
|
findFailNode(Word, State, FailState, Goto, Fail) -> |
|
|
|
#{FailState := Node} = Goto, |
|
|
|
findFailNode(Word, PtrInx, FatherFailPtrInx, Goto, Fail) -> |
|
|
|
#{FatherFailPtrInx := Node} = Goto, |
|
|
|
case Node of |
|
|
|
#{Word := TheFailState} -> |
|
|
|
#{Word := MyFailPtrInx} -> |
|
|
|
%% 找到最近的失败节点的儿子节点拥有当前儿子节点的值,查找成功 |
|
|
|
Fail#{State => TheFailState}; |
|
|
|
Fail#{PtrInx => MyFailPtrInx}; |
|
|
|
_ -> |
|
|
|
case FailState =:= 0 of |
|
|
|
case FatherFailPtrInx =:= 0 of |
|
|
|
true -> |
|
|
|
%% 找不到,而且已经到了根节点,查找失败 |
|
|
|
Fail; |
|
|
|
_ -> |
|
|
|
%% 找不到但是还没到根节点,继续往上找 |
|
|
|
NewFailState = maps:get(FailState, Fail, 0), |
|
|
|
findFailNode(Word, State, NewFailState, Goto, Fail) |
|
|
|
NewFatherFailPtrInx = maps:get(FatherFailPtrInx, Fail, 0), |
|
|
|
findFailNode(Word, PtrInx, NewFatherFailPtrInx, Goto, Fail) |
|
|
|
end |
|
|
|
end. |
|
|
|
|
|
|
@ -206,25 +212,36 @@ doGenGoto([{K, V} | SortKvs], StrAcc) -> |
|
|
|
end. |
|
|
|
|
|
|
|
genFailOut([], _Fail, _Output, StrAcc) -> |
|
|
|
<<StrAcc/binary, "\nfailOut(_) -> {0, undefined}.">>; |
|
|
|
genFailOut([State], Fail, Output, StrAcc) -> |
|
|
|
FailState = maps:get(State, Fail, 0), |
|
|
|
Pattern = maps:get(State, Output, undefined), |
|
|
|
case FailState /= 0 orelse Pattern /= undefined of |
|
|
|
<<StrAcc/binary, "\nfailOut(_) -> 0.">>; |
|
|
|
genFailOut([PtrInx], Fail, Output, StrAcc) -> |
|
|
|
FailPtrInx = maps:get(PtrInx, Fail, 0), |
|
|
|
Pattern = maps:get(PtrInx, Output, undefined), |
|
|
|
case FailPtrInx /= 0 orelse Pattern /= undefined of |
|
|
|
true -> |
|
|
|
<<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailState, Pattern}])))/binary, ";\nfailOut(_) -> {0, undefined}.">>; |
|
|
|
case Pattern of |
|
|
|
undefined -> |
|
|
|
<<StrAcc/binary, "failOut(", (integer_to_binary(PtrInx))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [FailPtrInx])))/binary, ";\nfailOut(_) -> 0.">>; |
|
|
|
_ -> |
|
|
|
<<StrAcc/binary, "failOut(", (integer_to_binary(PtrInx))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailPtrInx, Pattern}])))/binary, ";\nfailOut(_) -> 0.">> |
|
|
|
end; |
|
|
|
_ -> |
|
|
|
<<StrAcc/binary, ";\nfailOut(_) -> {0, undefined}.">> |
|
|
|
<<StrAcc/binary, ";\nfailOut(_) -> 0.">> |
|
|
|
end; |
|
|
|
genFailOut([State | SortStates], Fail, Output, StrAcc) -> |
|
|
|
FailState = maps:get(State, Fail, 0), |
|
|
|
Pattern = maps:get(State, Output, undefined), |
|
|
|
case FailState /= 0 orelse Pattern /= undefined of |
|
|
|
genFailOut([PtrInx | SortPtrInxes], Fail, Output, StrAcc) -> |
|
|
|
FailPtrInx = maps:get(PtrInx, Fail, 0), |
|
|
|
Pattern = maps:get(PtrInx, Output, undefined), |
|
|
|
case FailPtrInx /= 0 orelse Pattern /= undefined of |
|
|
|
true -> |
|
|
|
NewStrAcc = <<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailState, Pattern}])))/binary, ";\n">>, |
|
|
|
genFailOut(SortStates, Fail, Output, NewStrAcc); |
|
|
|
NewStrAcc = |
|
|
|
case Pattern of |
|
|
|
undefined -> |
|
|
|
<<StrAcc/binary, "failOut(", (integer_to_binary(PtrInx))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [FailPtrInx])))/binary, ";\n">>; |
|
|
|
_ -> |
|
|
|
<<StrAcc/binary, "failOut(", (integer_to_binary(PtrInx))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailPtrInx, Pattern}])))/binary, ";\n">> |
|
|
|
end, |
|
|
|
genFailOut(SortPtrInxes, Fail, Output, NewStrAcc); |
|
|
|
_ -> |
|
|
|
genFailOut(SortStates, Fail, Output, StrAcc) |
|
|
|
genFailOut(SortPtrInxes, Fail, Output, StrAcc) |
|
|
|
end. |
|
|
|
|
|
|
|
-spec load(Module :: atom(), Export :: [{Fun :: atom(), Arity :: pos_integer()}], Str :: string()) -> {module, Module :: atom()} | {error, _}. |
|
|
|