|
|
@ -13,15 +13,21 @@ |
|
|
|
%% *************************************** matchSw start *************************************************************** |
|
|
|
-spec matchSw(BinStr :: binary()) -> [{StartIndex :: integer(), EndIndex :: integer(), Pattern :: binary()}]. |
|
|
|
matchSw(BinStr) -> |
|
|
|
doMatch(BinStr, 0, _Index = 1, _MatchList = []). |
|
|
|
doMatchMs(BinStr, 0, _Index = 1, _MatchList = []). |
|
|
|
|
|
|
|
doMatch(<<>>, _, _Index, MatchList) -> |
|
|
|
doMatchMs(<<>>, _, _Index, MatchList) -> |
|
|
|
MatchList; |
|
|
|
doMatch(<<Word/utf8, Tail/binary>>, State, Index, MatchList) -> |
|
|
|
{NewState, NewMatchList} = matchWord(Word, State, Index, MatchList), |
|
|
|
doMatch(Tail, NewState, Index + 1, NewMatchList). |
|
|
|
doMatchMs(<<Word/utf8, Tail/binary>>, State, Index, MatchList) -> |
|
|
|
case acsSpw:getSpw(Word) of |
|
|
|
true -> |
|
|
|
%% 特殊字符跳过 且Index 不加 1 |
|
|
|
doMatchMs(Tail, State, Index, MatchList); |
|
|
|
_ -> |
|
|
|
{NewState, NewMatchList} = matchWordMs(Word, State, Index, MatchList), |
|
|
|
doMatchMs(Tail, NewState, Index + 1, NewMatchList) |
|
|
|
end. |
|
|
|
|
|
|
|
matchWord(Word, State, Index, MatchList) -> |
|
|
|
matchWordMs(Word, State, Index, MatchList) -> |
|
|
|
Node = acsTree:goto(State), |
|
|
|
case Node of |
|
|
|
undefined -> |
|
|
@ -30,12 +36,12 @@ matchWord(Word, State, Index, MatchList) -> |
|
|
|
{State, MatchList}; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchWord(Word, NextState, Index, MatchList) |
|
|
|
matchWordMs(Word, NextState, Index, MatchList) |
|
|
|
end; |
|
|
|
_ -> |
|
|
|
case Node of |
|
|
|
#{Word := NextState} -> |
|
|
|
NewMatchList = getOutput(NextState, Index, MatchList), |
|
|
|
NewMatchList = getOutputMs(NextState, Index, MatchList), |
|
|
|
{NextState, NewMatchList}; |
|
|
|
_ -> |
|
|
|
case State of |
|
|
@ -43,40 +49,46 @@ matchWord(Word, State, Index, MatchList) -> |
|
|
|
{State, MatchList}; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchWord(Word, NextState, Index, MatchList) |
|
|
|
matchWordMs(Word, NextState, Index, MatchList) |
|
|
|
end |
|
|
|
end |
|
|
|
end. |
|
|
|
|
|
|
|
getOutput(0, _Index, MatchList) -> |
|
|
|
getOutputMs(0, _Index, MatchList) -> |
|
|
|
MatchList; |
|
|
|
getOutput(State, Index, MatchList) -> |
|
|
|
getOutputMs(State, Index, MatchList) -> |
|
|
|
{FailState, Pattern} = acsTree:failOut(State), |
|
|
|
case Pattern of |
|
|
|
undefined -> |
|
|
|
getOutput(FailState, Index, MatchList); |
|
|
|
getOutputMs(FailState, Index, MatchList); |
|
|
|
_ -> |
|
|
|
NewMatchList = [{Index - Pattern + 1, Pattern} | MatchList], |
|
|
|
getOutput(FailState, Index, NewMatchList) |
|
|
|
getOutputMs(FailState, Index, NewMatchList) |
|
|
|
end. |
|
|
|
|
|
|
|
%% *************************************** matchSw end *************************************************************** |
|
|
|
%% *************************************** isHasSw start *************************************************************** |
|
|
|
-spec isHasSw(BinStr :: binary()) -> boolean(). |
|
|
|
isHasSw(BinStr) -> |
|
|
|
doMatch(BinStr, 0). |
|
|
|
doMatchIs(BinStr, 0). |
|
|
|
|
|
|
|
doMatch(<<>>, _) -> |
|
|
|
doMatchIs(<<>>, _) -> |
|
|
|
false; |
|
|
|
doMatch(<<Word/utf8, Tail/binary>>, State) -> |
|
|
|
case matchWord(Word, State) of |
|
|
|
doMatchIs(<<Word/utf8, Tail/binary>>, State) -> |
|
|
|
case acsSpw:getSpw(Word) of |
|
|
|
true -> |
|
|
|
true; |
|
|
|
NewState -> |
|
|
|
doMatch(Tail, NewState) |
|
|
|
%% 特殊字符跳过 |
|
|
|
doMatchIs(Tail, State); |
|
|
|
_ -> |
|
|
|
case matchWordIs(Word, State) of |
|
|
|
true -> |
|
|
|
true; |
|
|
|
NewState -> |
|
|
|
doMatchIs(Tail, NewState) |
|
|
|
end |
|
|
|
end. |
|
|
|
|
|
|
|
matchWord(Word, State) -> |
|
|
|
matchWordIs(Word, State) -> |
|
|
|
Node = acsTree:goto(State), |
|
|
|
case Node of |
|
|
|
undefined -> |
|
|
@ -85,12 +97,12 @@ matchWord(Word, State) -> |
|
|
|
State; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchWord(Word, NextState) |
|
|
|
matchWordIs(Word, NextState) |
|
|
|
end; |
|
|
|
_ -> |
|
|
|
case Node of |
|
|
|
#{Word := NextState} -> |
|
|
|
case getOutput(NextState) of |
|
|
|
case getOutputIs(NextState) of |
|
|
|
false -> |
|
|
|
NextState; |
|
|
|
_ -> |
|
|
@ -102,26 +114,111 @@ matchWord(Word, State) -> |
|
|
|
State; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchWord(Word, NextState) |
|
|
|
matchWordIs(Word, NextState) |
|
|
|
end |
|
|
|
end |
|
|
|
end. |
|
|
|
|
|
|
|
getOutput(0) -> |
|
|
|
getOutputIs(0) -> |
|
|
|
false; |
|
|
|
getOutput(State) -> |
|
|
|
getOutputIs(State) -> |
|
|
|
{FailState, Pattern} = acsTree:failOut(State), |
|
|
|
case Pattern of |
|
|
|
undefined -> |
|
|
|
getOutput(FailState); |
|
|
|
getOutputIs(FailState); |
|
|
|
_ -> |
|
|
|
true |
|
|
|
end. |
|
|
|
%% *************************************** matchSw end *************************************************************** |
|
|
|
%% *************************************** replaceSw start ************************************************************* |
|
|
|
replaceSw(_BinStr) -> |
|
|
|
ok. |
|
|
|
%% *************************************** replaceSw end ************************************************************* |
|
|
|
replaceSw(BinStr) -> |
|
|
|
MatchBIMWs = doMatchRs(BinStr, _TotalSize = byte_size(BinStr) - 1, _Index = 1, _State = 0, _MatchList = []), |
|
|
|
doReplaceSw(lists:reverse(MatchBIMWs), 0, BinStr, <<>>). |
|
|
|
|
|
|
|
%% 从前往后替换 |
|
|
|
doReplaceSw([], _BinStr, _StartPos, Acc) -> |
|
|
|
iolist_to_binary(Acc); |
|
|
|
doReplaceSw([{CurByteIndex, MatchWordCnt, _CurWordIndex} | MatchBIMWs], _BinStr, StartPos, Acc) -> |
|
|
|
iolist_to_binary(Acc). |
|
|
|
|
|
|
|
%% 递归处理判断最新的匹配是否包含或者连接了历史匹配的{CurByteIndex, MatchWordCnt, CurWordIndex} |
|
|
|
dealMatchList([], CurByteIndex, MatchWordCnt, CurWordIndex) -> |
|
|
|
[{CurByteIndex, MatchWordCnt, CurWordIndex}]; |
|
|
|
dealMatchList([{_OldByteIndex, OldMatchWordCnt, OldWordIndex} | LeftMatchList] = OldMatchList, CurByteIndex, MatchWordCnt, CurWordIndex) -> |
|
|
|
CurStartIndex = CurWordIndex - MatchWordCnt, |
|
|
|
OldStartIndex = OldWordIndex - OldMatchWordCnt, |
|
|
|
if |
|
|
|
CurStartIndex > OldWordIndex + 1 -> |
|
|
|
[{CurByteIndex, MatchWordCnt, CurWordIndex} | OldMatchList]; |
|
|
|
CurStartIndex >= OldStartIndex -> |
|
|
|
[{CurByteIndex, CurWordIndex - OldStartIndex, CurWordIndex} | LeftMatchList]; |
|
|
|
true -> |
|
|
|
dealMatchList(LeftMatchList, CurByteIndex, MatchWordCnt, CurWordIndex) |
|
|
|
end. |
|
|
|
|
|
|
|
doMatchRs(<<>>, _TotalSize, _CurIndex, _State, MatchList) -> |
|
|
|
MatchList; |
|
|
|
doMatchRs(<<Word/utf8, Tail/binary>>, TotalSize, CurIndex, State, MatchList) -> |
|
|
|
case acsSpw:getSpw(Word) of |
|
|
|
true -> |
|
|
|
%% 特殊字符跳过 且CurIndex 不加 1 |
|
|
|
doMatchRs(Tail, TotalSize, CurIndex, State, MatchList); |
|
|
|
_ -> |
|
|
|
{NewState, MatchCnt} = matchWordRs(Word, State, 0), |
|
|
|
case MatchCnt of |
|
|
|
0 -> |
|
|
|
doMatchRs(Tail, TotalSize, CurIndex + 1, NewState, MatchList); |
|
|
|
_ -> |
|
|
|
LeftSize = byte_size(Tail), |
|
|
|
NewMatchList = dealMatchList(MatchList, TotalSize - LeftSize, MatchCnt, CurIndex), |
|
|
|
doMatchRs(Tail, TotalSize, CurIndex + 1, NewState, NewMatchList) |
|
|
|
end |
|
|
|
end. |
|
|
|
|
|
|
|
matchWordRs(Word, State, MatchCnt) -> |
|
|
|
Node = acsTree:goto(State), |
|
|
|
case Node of |
|
|
|
undefined -> |
|
|
|
case State of |
|
|
|
0 -> |
|
|
|
{State, MatchCnt}; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchWordRs(Word, NextState, MatchCnt) |
|
|
|
end; |
|
|
|
_ -> |
|
|
|
case Node of |
|
|
|
#{Word := NextState} -> |
|
|
|
NewMatchCnt = getOutputRs(NextState, MatchCnt), |
|
|
|
{NextState, NewMatchCnt}; |
|
|
|
_ -> |
|
|
|
case State of |
|
|
|
0 -> |
|
|
|
{State, MatchCnt}; |
|
|
|
_ -> |
|
|
|
{NextState, _} = acsTree:failOut(State), |
|
|
|
matchWordRs(Word, NextState, MatchCnt) |
|
|
|
end |
|
|
|
end |
|
|
|
end. |
|
|
|
|
|
|
|
%% 获取当前字符最大匹配数 |
|
|
|
getOutputRs(0, MatchCnt) -> |
|
|
|
MatchCnt; |
|
|
|
getOutputRs(State, MatchCnt) -> |
|
|
|
{FailState, Pattern} = acsTree:failOut(State), |
|
|
|
case Pattern of |
|
|
|
undefined -> |
|
|
|
getOutputRs(FailState, MatchCnt); |
|
|
|
_ -> |
|
|
|
case Pattern > MatchCnt of |
|
|
|
true -> |
|
|
|
getOutputRs(FailState, Pattern); |
|
|
|
_ -> |
|
|
|
getOutputRs(FailState, MatchCnt) |
|
|
|
end |
|
|
|
end. |
|
|
|
% *************************************** replaceSw end ************************************************************* |
|
|
|
|
|
|
|
strSize(<<>>, Cnt) -> |
|
|
|
Cnt; |
|
|
|