Przeglądaj źródła

ft: 代码调整

master
SisMaker 4 lat temu
rodzic
commit
5c97b1ee9a
4 zmienionych plików z 28 dodań i 28 usunięć
  1. +4
    -4
      README.md
  2. +17
    -16
      src/eAcs.erl
  3. +4
    -4
      src/genAcs.erl
  4. +3
    -4
      src/test/test.txt

+ 4
- 4
README.md Wyświetl plik

@ -1,17 +1,17 @@
eAcs
=====
基于ac算法实现的快速高效的erl版本敏感词匹配,检查,过滤代码
基于ac算法实现的快速高效的敏感词匹配,检查,过滤功能
Build
-----
$ rebar3 escriptize -> genAcs
$ rebar3 escriptize -> genAcs
$ rebar3 compile
Uses
-----
创建 acsTree.erl
脚本生成:./genAcs SWord.txtFile OutputDir 或者函数嗲用
函数调用: genAcs:main([SWord.txtFile, OutputDir])
脚本生成:./genAcs SWordFile OutputDir
函数调用: genAcs:main([SWordFile, OutputDir])
匹配 检查 过滤 敏感词
eAcs:matchSw/1 %% 返回匹配的敏感词列表
eAcs:isHasSw/1 %% 检查是否包含敏感词

+ 17
- 16
src/eAcs.erl Wyświetl plik

@ -4,6 +4,7 @@
matchSw/1 %%
, isHasSw/1 %%
, replaceSw/1 %%
, strSize/2 %% utf8字符串的长度
]).
%% state 0 is the root node
@ -17,10 +18,10 @@ matchSw(BinStr) ->
doMatch(<<>>, _, _Index, MatchList) ->
MatchList;
doMatch(<<Word/utf8, Tail/binary>>, State, Index, MatchList) ->
{NewState, NewMatchList} = matchInner(Word, State, Index, MatchList),
{NewState, NewMatchList} = matchWord(Word, State, Index, MatchList),
doMatch(Tail, NewState, Index + 1, NewMatchList).
matchInner(Word, State, Index, MatchList) ->
matchWord(Word, State, Index, MatchList) ->
Node = acsTree:goto(State),
case Node of
undefined ->
@ -29,7 +30,7 @@ matchInner(Word, State, Index, MatchList) ->
{State, MatchList};
_ ->
{NextState, _} = acsTree:failOut(State),
matchInner(Word, NextState, Index, MatchList)
matchWord(Word, NextState, Index, MatchList)
end;
_ ->
case Node of
@ -42,7 +43,7 @@ matchInner(Word, State, Index, MatchList) ->
{State, MatchList};
_ ->
{NextState, _} = acsTree:failOut(State),
matchInner(Word, NextState, Index, MatchList)
matchWord(Word, NextState, Index, MatchList)
end
end
end.
@ -55,7 +56,7 @@ getOutput(State, Index, MatchList) ->
undefined ->
getOutput(FailState, Index, MatchList);
_ ->
NewMatchList = [{Index - patternSize(Pattern, 0) + 1, Index, Pattern} | MatchList],
NewMatchList = [{Index - Pattern + 1, Pattern} | MatchList],
getOutput(FailState, Index, NewMatchList)
end.
@ -68,14 +69,14 @@ isHasSw(BinStr) ->
doMatch(<<>>, _) ->
false;
doMatch(<<Word/utf8, Tail/binary>>, State) ->
case matchInner(Word, State) of
case matchWord(Word, State) of
true ->
true;
NewState ->
doMatch(Tail, NewState)
end.
matchInner(Word, State) ->
matchWord(Word, State) ->
Node = acsTree:goto(State),
case Node of
undefined ->
@ -84,7 +85,7 @@ matchInner(Word, State) ->
State;
_ ->
{NextState, _} = acsTree:failOut(State),
matchInner(Word, NextState)
matchWord(Word, NextState)
end;
_ ->
case Node of
@ -101,7 +102,7 @@ matchInner(Word, State) ->
State;
_ ->
{NextState, _} = acsTree:failOut(State),
matchInner(Word, NextState)
matchWord(Word, NextState)
end
end
end.
@ -110,11 +111,11 @@ getOutput(0) ->
false;
getOutput(State) ->
{FailState, Pattern} = acsTree:failOut(State),
case Pattern == undefined orelse FailState == 0 of
true ->
false;
case Pattern of
undefined ->
getOutput(FailState);
_ ->
getOutput(FailState)
true
end.
%% *************************************** matchSw end ***************************************************************
%% *************************************** replaceSw start *************************************************************
@ -122,7 +123,7 @@ replaceSw(_BinStr) ->
ok.
%% *************************************** replaceSw end *************************************************************
patternSize(<<>>, Cnt) ->
strSize(<<>>, Cnt) ->
Cnt;
patternSize(<<_Word/utf8, Left/binary>>, Cnt) ->
patternSize(Left, Cnt + 1).
strSize(<<_Word/utf8, Left/binary>>, Cnt) ->
strSize(Left, Cnt + 1).

+ 4
- 4
src/genAcs.erl Wyświetl plik

@ -23,7 +23,7 @@ dealEveryLine(IoDevice, Goto, Output, MaxState) ->
case BinStr =/= <<>> of
true ->
{NewGoto, NewState, NewMaxState} = addGoto(BinStr, Goto, 0, MaxState),
NewOutput = Output#{NewState => BinStr},
NewOutput = Output#{NewState => eAcs:strSize(BinStr, 0)},
dealEveryLine(IoDevice, NewGoto, NewOutput, NewMaxState);
_ ->
dealEveryLine(IoDevice, Goto, Output, MaxState)
@ -146,15 +146,15 @@ doGenGoto([{K, V} | SortKvs], StrAcc) ->
end.
genFailOut([], _Fail, _Output, StrAcc) ->
<<StrAcc/binary, "\nfailOut(_) -> {0, undefined}.\n\n">>;
<<StrAcc/binary, "\nfailOut(_) -> {0, undefined}.">>;
genFailOut([State], Fail, Output, StrAcc) ->
FailState = maps:get(State, Fail, 0),
Pattern = maps:get(State, Output, undefined),
case FailState /= 0 orelse Pattern /= undefined of
true ->
<<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailState, Pattern}])))/binary, ";\nfailOut(_) -> {0, undefined}.\n\n">>;
<<StrAcc/binary, "failOut(", (integer_to_binary(State))/binary, ") -> ", (iolist_to_binary(io_lib:format(<<"~w">>, [{FailState, Pattern}])))/binary, ";\nfailOut(_) -> {0, undefined}.">>;
_ ->
<<StrAcc/binary, ";\nfailOut(_) -> {0, undefined}.\n\n">>
<<StrAcc/binary, ";\nfailOut(_) -> {0, undefined}.">>
end;
genFailOut([State | SortStates], Fail, Output, StrAcc) ->
FailState = maps:get(State, Fail, 0),

+ 3
- 4
src/test/test.txt Wyświetl plik

@ -1,7 +1,6 @@
去你妈的
你妈
abcd
cd
c

Ładowanie…
Anuluj
Zapisz