瀏覽代碼

ft: 代码调整

master
SisMaker 4 年之前
父節點
當前提交
5c97b1ee9a
共有 4 個檔案被更改,包括 28 行新增28 行删除
  1. +4
    -4
      README.md
  2. +17
    -16
      src/eAcs.erl
  3. +4
    -4
      src/genAcs.erl
  4. +3
    -4
      src/test/test.txt

+ 4
- 4
README.md 查看文件

@ -1,17 +1,17 @@
eAcs eAcs
===== =====
基于ac算法实现的快速高效的erl版本敏感词匹配,检查,过滤代码
基于ac算法实现的快速高效的敏感词匹配,检查,过滤功能
Build Build
----- -----
$ rebar3 escriptize -> genAcs
$ rebar3 escriptize -> genAcs
$ rebar3 compile $ rebar3 compile
Uses Uses
----- -----
创建 acsTree.erl 创建 acsTree.erl
脚本生成:./genAcs SWord.txtFile OutputDir 或者函数嗲用
函数调用: genAcs:main([SWord.txtFile, OutputDir])
脚本生成:./genAcs SWordFile OutputDir
函数调用: genAcs:main([SWordFile, OutputDir])
匹配 检查 过滤 敏感词 匹配 检查 过滤 敏感词
eAcs:matchSw/1 %% 返回匹配的敏感词列表 eAcs:matchSw/1 %% 返回匹配的敏感词列表
eAcs:isHasSw/1 %% 检查是否包含敏感词 eAcs:isHasSw/1 %% 检查是否包含敏感词

+ 17
- 16
src/eAcs.erl 查看文件

@ -4,6 +4,7 @@
matchSw/1 %% matchSw/1 %%
, isHasSw/1 %% , isHasSw/1 %%
, replaceSw/1 %% , replaceSw/1 %%
, strSize/2 %% utf8字符串的长度
]). ]).
%% state 0 is the root node %% state 0 is the root node
@ -17,10 +18,10 @@ matchSw(BinStr) ->
doMatch(<<>>, _, _Index, MatchList) -> doMatch(<<>>, _, _Index, MatchList) ->
MatchList; MatchList;
doMatch(<<Word/utf8, Tail/binary>>, State, Index, MatchList) -> doMatch(<<Word/utf8, Tail/binary>>, State, Index, MatchList) ->
{NewState, NewMatchList} = matchInner(Word, State, Index, MatchList),
{NewState, NewMatchList} = matchWord(Word, State, Index, MatchList),
doMatch(Tail, NewState, Index + 1, NewMatchList). doMatch(Tail, NewState, Index + 1, NewMatchList).
matchInner(Word, State, Index, MatchList) ->
matchWord(Word, State, Index, MatchList) ->
Node = acsTree:goto(State), Node = acsTree:goto(State),
case Node of case Node of
undefined -> undefined ->
@ -29,7 +30,7 @@ matchInner(Word, State, Index, MatchList) ->
{State, MatchList}; {State, MatchList};
_ -> _ ->
{NextState, _} = acsTree:failOut(State), {NextState, _} = acsTree:failOut(State),
matchInner(Word, NextState, Index, MatchList)
matchWord(Word, NextState, Index, MatchList)
end; end;
_ -> _ ->
case Node of case Node of
@ -42,7 +43,7 @@ matchInner(Word, State, Index, MatchList) ->
{State, MatchList}; {State, MatchList};
_ -> _ ->
{NextState, _} = acsTree:failOut(State), {NextState, _} = acsTree:failOut(State),
matchInner(Word, NextState, Index, MatchList)
matchWord(Word, NextState, Index, MatchList)
end end
end end
end. end.
@ -55,7 +56,7 @@ getOutput(State, Index, MatchList) ->
undefined -> undefined ->
getOutput(FailState, Index, MatchList); getOutput(FailState, Index, MatchList);
_ -> _ ->
NewMatchList = [{Index - patternSize(Pattern, 0) + 1, Index, Pattern} | MatchList],
NewMatchList = [{Index - Pattern + 1, Pattern} | MatchList],
getOutput(FailState, Index, NewMatchList) getOutput(FailState, Index, NewMatchList)
end. end.
@ -68,14 +69,14 @@ isHasSw(BinStr) ->
doMatch(<<>>, _) -> doMatch(<<>>, _) ->
false; false;
doMatch(<<Word/utf8, Tail/binary>>, State) -> doMatch(<<Word/utf8, Tail/binary>>, State) ->
case matchInner(Word, State) of
case matchWord(Word, State) of
true -> true ->
true; true;
NewState -> NewState ->
doMatch(Tail, NewState) doMatch(Tail, NewState)
end. end.
matchInner(Word, State) ->
matchWord(Word, State) ->
Node = acsTree:goto(State), Node = acsTree:goto(State),
case Node of case Node of
undefined -> undefined ->
@ -84,7 +85,7 @@ matchInner(Word, State) ->
State; State;
_ -> _ ->
{NextState, _} = acsTree:failOut(State), {NextState, _} = acsTree:failOut(State),
matchInner(Word, NextState)
matchWord(Word, NextState)
end; end;
_ -> _ ->
case Node of case Node of
@ -101,7 +102,7 @@ matchInner(Word, State) ->
State; State;
_ -> _ ->
{NextState, _} = acsTree:failOut(State), {NextState, _} = acsTree:failOut(State),
matchInner(Word, NextState)
matchWord(Word, NextState)
end end
end end
end. end.
@ -110,11 +111,11 @@ getOutput(0) ->
false; false;
getOutput(State) -> getOutput(State) ->
{FailState, Pattern} = acsTree:failOut(State), {FailState, Pattern} = acsTree:failOut(State),
case Pattern == undefined orelse FailState == 0 of
true ->
false;
case Pattern of
undefined ->
getOutput(FailState);
_ -> _ ->
getOutput(FailState)
true
end. end.
%% *************************************** matchSw end *************************************************************** %% *************************************** matchSw end ***************************************************************
%% *************************************** replaceSw start ************************************************************* %% *************************************** replaceSw start *************************************************************
@ -122,7 +123,7 @@ replaceSw(_BinStr) ->
ok. ok.
%% *************************************** replaceSw end ************************************************************* %% *************************************** replaceSw end *************************************************************
patternSize(<<>>, Cnt) ->
strSize(<<>>, Cnt) ->
Cnt; Cnt;
patternSize(<<_Word/utf8, Left/binary>>, Cnt) ->
patternSize(Left, Cnt + 1).
strSize(<<_Word/utf8, Left/binary>>, Cnt) ->
strSize(Left, Cnt + 1).

+ 4
- 4
src/genAcs.erl 查看文件

@ -23,7 +23,7 @@ dealEveryLine(IoDevice, Goto, Output, MaxState) ->
case BinStr =/= <<>> of case BinStr =/= <<>> of
true -> true ->
{NewGoto, NewState, NewMaxState} = addGoto(BinStr, Goto, 0, MaxState), {NewGoto, NewState, NewMaxState} = addGoto(BinStr, Goto, 0, MaxState),
NewOutput = Output#{NewState => BinStr},
NewOutput = Output#{NewState => eAcs:strSize(BinStr, 0)},
dealEveryLine(IoDevice, NewGoto, NewOutput, NewMaxState); dealEveryLine(IoDevice, NewGoto, NewOutput, NewMaxState);
_ -> _ ->
dealEveryLine(IoDevice, Goto, Output, MaxState) dealEveryLine(IoDevice, Goto, Output, MaxState)
@ -146,15 +146,15 @@ doGenGoto([{K, V} | SortKvs], StrAcc) ->
end. end.
genFailOut([], _Fail, _Output, StrAcc) -> genFailOut([], _Fail, _Output, StrAcc) ->
<<StrAcc/binary, "\nfailOut(_) -> {0, undefined}.\n\n">>;
<<StrAcc/binary, "\nfailOut(_) -> {0, undefined}.">>;
genFailOut([State], Fail, Output, StrAcc) -> genFailOut([State], Fail, Output, StrAcc) ->
FailState = maps:get(State, Fail, 0), FailState = maps:get(State, Fail, 0),
Pattern = maps:get(State, Output, undefined), Pattern = maps:get(State, Output, undefined),
case FailState /= 0 orelse Pattern /= undefined of case FailState /= 0 orelse Pattern /= undefined of
true -> true ->
<<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailState, Pattern}])))/binary, ";\nfailOut(_) -> {0, undefined}.\n\n">>;
<<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailState, Pattern}])))/binary, ";\nfailOut(_) -> {0, undefined}.">>;
_ -> _ ->
<<StrAcc/binary, ";\nfailOut(_) -> {0, undefined}.\n\n">>
<<StrAcc/binary, ";\nfailOut(_) -> {0, undefined}.">>
end; end;
genFailOut([State | SortStates], Fail, Output, StrAcc) -> genFailOut([State | SortStates], Fail, Output, StrAcc) ->
FailState = maps:get(State, Fail, 0), FailState = maps:get(State, Fail, 0),

+ 3
- 4
src/test/test.txt 查看文件

@ -1,7 +1,6 @@
去你妈的
你妈
abcd
cd
c

Loading…
取消
儲存