|
|
@ -4,6 +4,7 @@ |
|
|
|
matchSw/1 %% 返回匹配的敏感词列表 |
|
|
|
, isHasSw/1 %% 检查是否包含敏感词 |
|
|
|
, replaceSw/1 %% 替换敏感词 |
|
|
|
, strSize/2 %% 获取utf8字符串的长度 |
|
|
|
]). |
|
|
|
|
|
|
|
%% state 0 is the root node |
|
|
@ -17,10 +18,10 @@ matchSw(BinStr) -> |
|
|
|
doMatch(<<>>, _, _Index, MatchList) -> |
|
|
|
MatchList; |
|
|
|
doMatch(<<Word/utf8, Tail/binary>>, State, Index, MatchList) -> |
|
|
|
{NewState, NewMatchList} = matchInner(Word, State, Index, MatchList), |
|
|
|
{NewState, NewMatchList} = matchWord(Word, State, Index, MatchList), |
|
|
|
doMatch(Tail, NewState, Index + 1, NewMatchList). |
|
|
|
|
|
|
|
matchInner(Word, State, Index, MatchList) -> |
|
|
|
matchWord(Word, State, Index, MatchList) -> |
|
|
|
Node = acsTree:goto(State), |
|
|
|
case Node of |
|
|
|
undefined -> |
|
|
@ -29,7 +30,7 @@ matchInner(Word, State, Index, MatchList) -> |
|
|
|
{State, MatchList}; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchInner(Word, NextState, Index, MatchList) |
|
|
|
matchWord(Word, NextState, Index, MatchList) |
|
|
|
end; |
|
|
|
_ -> |
|
|
|
case Node of |
|
|
@ -42,7 +43,7 @@ matchInner(Word, State, Index, MatchList) -> |
|
|
|
{State, MatchList}; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchInner(Word, NextState, Index, MatchList) |
|
|
|
matchWord(Word, NextState, Index, MatchList) |
|
|
|
end |
|
|
|
end |
|
|
|
end. |
|
|
@ -55,7 +56,7 @@ getOutput(State, Index, MatchList) -> |
|
|
|
undefined -> |
|
|
|
getOutput(FailState, Index, MatchList); |
|
|
|
_ -> |
|
|
|
NewMatchList = [{Index - patternSize(Pattern, 0) + 1, Index, Pattern} | MatchList], |
|
|
|
NewMatchList = [{Index - Pattern + 1, Pattern} | MatchList], |
|
|
|
getOutput(FailState, Index, NewMatchList) |
|
|
|
end. |
|
|
|
|
|
|
@ -68,14 +69,14 @@ isHasSw(BinStr) -> |
|
|
|
doMatch(<<>>, _) -> |
|
|
|
false; |
|
|
|
doMatch(<<Word/utf8, Tail/binary>>, State) -> |
|
|
|
case matchInner(Word, State) of |
|
|
|
case matchWord(Word, State) of |
|
|
|
true -> |
|
|
|
true; |
|
|
|
NewState -> |
|
|
|
doMatch(Tail, NewState) |
|
|
|
end. |
|
|
|
|
|
|
|
matchInner(Word, State) -> |
|
|
|
matchWord(Word, State) -> |
|
|
|
Node = acsTree:goto(State), |
|
|
|
case Node of |
|
|
|
undefined -> |
|
|
@ -84,7 +85,7 @@ matchInner(Word, State) -> |
|
|
|
State; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchInner(Word, NextState) |
|
|
|
matchWord(Word, NextState) |
|
|
|
end; |
|
|
|
_ -> |
|
|
|
case Node of |
|
|
@ -101,7 +102,7 @@ matchInner(Word, State) -> |
|
|
|
State; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchInner(Word, NextState) |
|
|
|
matchWord(Word, NextState) |
|
|
|
end |
|
|
|
end |
|
|
|
end. |
|
|
@ -110,11 +111,11 @@ getOutput(0) -> |
|
|
|
false; |
|
|
|
getOutput(State) -> |
|
|
|
{FailState, Pattern} = acsTree:failOut(State), |
|
|
|
case Pattern == undefined orelse FailState == 0 of |
|
|
|
true -> |
|
|
|
false; |
|
|
|
case Pattern of |
|
|
|
undefined -> |
|
|
|
getOutput(FailState); |
|
|
|
_ -> |
|
|
|
getOutput(FailState) |
|
|
|
true |
|
|
|
end. |
|
|
|
%% *************************************** matchSw end *************************************************************** |
|
|
|
%% *************************************** replaceSw start ************************************************************* |
|
|
@ -122,7 +123,7 @@ replaceSw(_BinStr) -> |
|
|
|
ok. |
|
|
|
%% *************************************** replaceSw end ************************************************************* |
|
|
|
|
|
|
|
patternSize(<<>>, Cnt) -> |
|
|
|
strSize(<<>>, Cnt) -> |
|
|
|
Cnt; |
|
|
|
patternSize(<<_Word/utf8, Left/binary>>, Cnt) -> |
|
|
|
patternSize(Left, Cnt + 1). |
|
|
|
strSize(<<_Word/utf8, Left/binary>>, Cnt) -> |
|
|
|
strSize(Left, Cnt + 1). |