Browse Source

ft: 代码调整

master
SisMaker 4 years ago
parent
commit
96b7f13176
3 changed files with 81 additions and 11 deletions
  1. +67
    -6
      src/eAcs.erl
  2. +14
    -1
      src/test/acTest.erl
  3. +0
    -4
      src/test/test.txt

+ 67
- 6
src/eAcs.erl View File

@ -7,6 +7,8 @@
, strSize/2 %% utf8字符串的长度
]).
-define(RW, 42). %% utf8code
%% state 0 is the root node
%% Goto: State -> #{Word -> State}
%% failOut: State -> {FailState, BinStr}
@ -132,14 +134,73 @@ getOutputIs(State) ->
%% *************************************** matchSw end ***************************************************************
%% *************************************** replaceSw start *************************************************************
replaceSw(BinStr) ->
MatchBIMWs = doMatchRs(BinStr, _TotalSize = byte_size(BinStr) - 1, _Index = 1, _State = 0, _MatchList = []),
doReplaceSw(lists:reverse(MatchBIMWs), 0, BinStr, <<>>).
TotalSize = byte_size(BinStr),
MatchBIMWs = doMatchRs(BinStr, TotalSize - 1, _Index = 1, _State = 0, _MatchList = []),
%io:format("IMY******************* ~p~n", [lists:reverse(MatchBIMWs)]),
doReplaceSw(lists:reverse(MatchBIMWs), BinStr, TotalSize, _StartPos = 0, <<>>).
%%
doReplaceSw([], _BinStr, _StartPos, Acc) ->
iolist_to_binary(Acc);
doReplaceSw([{CurByteIndex, MatchWordCnt, _CurWordIndex} | MatchBIMWs], _BinStr, StartPos, Acc) ->
iolist_to_binary(Acc).
doReplaceSw([], BinStr, TotalSize, StartPos, BinAcc) ->
case TotalSize > StartPos of
true ->
<<BinAcc/binary, (binary:part(BinStr, StartPos, TotalSize - StartPos))/binary>>;
_ ->
BinAcc
end;
doReplaceSw([{CurByteIndex, MatchWordCnt, _CurWordIndex} | MatchBIMWs], BinStr, TotalSize, StartPos, BinAcc) ->
{EndByteIndex, FilterWs} = getMatchWords(MatchWordCnt, BinStr, CurByteIndex, _BslCnt = 0, _Utf8Code = 0, []),
RPStr = unicode:characters_to_binary(FilterWs, utf8),
case StartPos =< EndByteIndex of
true ->
NewBinAcc = <<BinAcc/binary, (binary:part(BinStr, StartPos, EndByteIndex - StartPos + 1))/binary, RPStr/binary>>;
_ ->
NewBinAcc = <<BinAcc/binary, RPStr/binary>>
end,
doReplaceSw(MatchBIMWs, BinStr, TotalSize, CurByteIndex + 1, NewBinAcc).
getMatchWords(0, _BinStr, ByteIndex, _BslCnt, _Utf8Code, FilterWs) ->
{ByteIndex, FilterWs};
getMatchWords(MatchWordCnt, BinStr, ByteIndex, BslCnt, Utf8Code, FilterWs) ->
Byte = binary:at(BinStr, ByteIndex),
%io:format("IMY****************~p ~n", [Byte]),
if
Byte band 2#10000000 == 0 ->
%% ASCII
case acsSpw:getSpw(Byte) of
true ->
%%
getMatchWords(MatchWordCnt, BinStr, ByteIndex - 1, 0, 0, [Byte | FilterWs]);
_ ->
%%
getMatchWords(MatchWordCnt - 1, BinStr, ByteIndex - 1, 0, 0, [?RW | FilterWs])
end;
Byte band 2#10000000 == 128 ->
Code = Byte band 2#00111111,
getMatchWords(MatchWordCnt, BinStr, ByteIndex - 1, BslCnt + 6, Code bsl BslCnt + Utf8Code, FilterWs);
true ->
%io:format("IMY****************~p", [Byte]),
case BslCnt of
6 ->
Code = Byte band 2#00011111;
12 ->
Code = Byte band 2#00001111;
18 ->
Code = Byte band 2#00000111;
24 ->
Code = Byte band 2#00000011;
30 ->
Code = Byte band 2#00000001
end,
FullWord = Code bsl BslCnt + Utf8Code,
case acsSpw:getSpw(FullWord) of
true ->
%%
getMatchWords(MatchWordCnt, BinStr, ByteIndex - 1, 0, 0, [FullWord | FilterWs]);
_ ->
%%
getMatchWords(MatchWordCnt - 1, BinStr, ByteIndex - 1, 0, 0, [?RW | FilterWs])
end
end.
%% {CurByteIndex, MatchWordCnt, CurWordIndex}
dealMatchList([], CurByteIndex, MatchWordCnt, CurWordIndex) ->

+ 14
- 1
src/test/acTest.erl View File

@ -10,9 +10,22 @@ test1() ->
test2() ->
acTc:ts(1000000, eAcs, matchSw, [<<"fdsfads拉法叶舰fds淫秽ffdsfdsffdddd"/utf8>>]).
test21() ->
acTc:ts(1000000, eAcs, replaceSw, [<<"fdsfads拉法叶舰fds淫秽ffdsfdsffdddd"/utf8>>]).
test22() ->
acTc:ts(1000000, eAcs, isHasSw, [<<"fdsfads拉法叶舰fds淫秽ffdsfdsffdddd"/utf8>>]).
test3(Cnt, BinStr) ->
acTc:ts(Cnt, eAcs, matchSw, [BinStr]).
test31(Cnt, BinStr) ->
acTc:ts(Cnt, keyword, filter, [BinStr]).
test4(Cnt, FileName) ->
{ok, Data} = file:read_file(FileName),
test3(Cnt, Data).
test3(Cnt, Data).
test41(Cnt, FileName) ->
{ok, Data} = file:read_file(FileName),
test31(Cnt, Data).

+ 0
- 4
src/test/test.txt View File

@ -3,7 +3,3 @@ sssa
ssafds
sg
afd

Loading…
Cancel
Save