|
-module(utVMInfo).
|
|
|
|
-compile([export_all, nowarn_export_all]).
|
|
|
|
%% 打印并排序各个表的缓存消耗
|
|
show_cache() ->
|
|
io:format("table name | memory | size~n", []),
|
|
lists:reverse(lists:keysort(2, [{T, ets:info(T, memory), ets:info(T, size)} || T <- ets:all()])).
|
|
|
|
%% 打印进程消耗内存的信息
|
|
show_process() ->
|
|
lists:reverse(lists:keysort(2, [{erlang:process_info(P, registered_name), erlang:process_info(P, heap_size)} || P <- erlang:processes()])).
|
|
|
|
%% 打印当前进程数量
|
|
show_process_count() ->
|
|
length(erlang:processes()).
|
|
|
|
%% 反编译
|
|
%% 确认线上运行代码是否正确,reltools没掌握好,升级偶尔出现问题
|
|
decompile(Mod) ->
|
|
{ok,{_,[{abstract_code,{_,AC}}]}} = beam_lib:chunks(code:which(Mod), [abstract_code]),
|
|
io:format("~s~n", [erl_prettypr:format(erl_syntax:form_list(AC))]).
|
|
|
|
%% 进程栈
|
|
%% 类似于jstack,发现大量进程挂起,进程数过高,运行慢,hang住等问题用到
|
|
pstack(Reg) when is_atom(Reg) ->
|
|
case whereis(Reg) of
|
|
undefined -> undefined;
|
|
Pid -> pstack(Pid)
|
|
end;
|
|
pstack(Pid) ->
|
|
io:format("~s~n", [element(2, process_info(Pid, backtrace))]).
|
|
|
|
%% ====================================================================
|
|
%% etop
|
|
%% 分析内存、cpu占用进程,即使数十w进程node 也能正常使用
|
|
%% 进程CPU占用排名
|
|
%% --------------------------------------------------------------------
|
|
etop() ->
|
|
spawn(fun() -> etop:start([{output, text}, {interval, 10}, {lines, 20}, {sort, reductions}]) end).
|
|
|
|
%% 进程Mem占用排名
|
|
etop_mem() ->
|
|
spawn(fun() -> etop:start([{output, text}, {interval, 10}, {lines, 20}, {sort, memory}]) end).
|
|
|
|
%% 停止etop
|
|
etop_stop() ->
|
|
etop:stop().
|
|
%% ====================================================================
|
|
|
|
%% 对所有process做gc
|
|
%% 进程内存过高时,来一发,看看是内存泄露还是gc不过来
|
|
gc_all() ->
|
|
[erlang:garbage_collect(Pid) || Pid <- processes()],
|
|
ok.
|
|
|
|
%% 对MFA 执行分析,会严重减缓运行,建议只对小量业务执行
|
|
%% 结果:
|
|
%% fprof 结果比较详细,能够输出热点调用路径
|
|
fprof(M, F, A) ->
|
|
fprof:start(),
|
|
fprof:apply(M, F, A),
|
|
fprof:profile(),
|
|
fprof:analyse(),
|
|
fprof:stop().
|
|
|
|
%% 对整个节点内所有进程执行eprof, eprof 对线上业务有一定影响,慎用!
|
|
%% 建议TimeoutSec<10s,且进程数< 1000,否则可能导致节点crash
|
|
%% 结果:
|
|
%% 输出每个方法实际执行时间(不会累计方法内其他mod调用执行时间)
|
|
%% 只能得到mod - Fun 执行次数 执行耗时
|
|
eprof_all(TimeoutSec) ->
|
|
eprof(processes() -- [whereis(eprof)], TimeoutSec).
|
|
|
|
eprof(Pids, TimeoutSec) ->
|
|
eprof:start(),
|
|
eprof:start_profiling(Pids),
|
|
timer:sleep(TimeoutSec),
|
|
eprof:stop_profiling(),
|
|
eprof:analyze(total),
|
|
eprof:stop().
|
|
|
|
%% scheduler usage
|
|
%% 统计下1s每个调度器CPU的实际利用率(因为有spin wait、调度工作, 可能usage 比top显示低很多)
|
|
scheduler_usage() ->
|
|
scheduler_usage(1000).
|
|
|
|
scheduler_usage(RunMs) ->
|
|
erlang:system_flag(scheduler_wall_time, true),
|
|
Ts0 = lists:sort(erlang:statistics(scheduler_wall_time)),
|
|
timer:sleep(RunMs),
|
|
Ts1 = lists:sort(erlang:statistics(scheduler_wall_time)),
|
|
erlang:system_flag(scheduler_wall_time, false),
|
|
Cores = lists:map(fun({{_I, A0, T0}, {I, A1, T1}}) ->
|
|
{I, (A1 - A0) / (T1 - T0)} end, lists:zip(Ts0, Ts1)),
|
|
{A, T} = lists:foldl(fun({{_, A0, T0}, {_, A1, T1}}, {Ai,Ti}) ->
|
|
{Ai + (A1 - A0), Ti + (T1 - T0)} end, {0, 0}, lists:zip(Ts0, Ts1)),
|
|
Total = A/T,
|
|
io:format("~p~n", [[{total, Total} | Cores]]).
|
|
|
|
%% 进程调度
|
|
%% 统计下1s内调度进程数量(含义:第一个数字执行进程数量,第二个数字迁移进程数量)
|
|
scheduler_stat() ->
|
|
scheduler_stat(1000).
|
|
|
|
scheduler_stat(RunMs) ->
|
|
erlang:system_flag(scheduling_statistics, enable),
|
|
Ts0 = erlang:system_info(total_scheduling_statistics),
|
|
timer:sleep(RunMs),
|
|
Ts1 = erlang:system_info(total_scheduling_statistics),
|
|
erlang:system_flag(scheduling_statistics, disable),
|
|
lists:map(fun({{_Key, In0, Out0}, {Key, In1, Out1}}) ->
|
|
{Key, In1 - In0, Out1 - Out0} end, lists:zip(Ts0, Ts1)).
|
|
|
|
%% ====================================================================
|
|
%% trace 日志
|
|
%% 会把mod 每次调用详细MFA log 下来,args 太大就不好看了
|
|
%% trace Mod 所有方法的调用
|
|
%% --------------------------------------------------------------------
|
|
trace(Mod) ->
|
|
dbg:tracer(),
|
|
dbg:tpl(Mod, '_', []),
|
|
dbg:p(all, c).
|
|
|
|
%% trace Node上指定 Mod 所有方法的调用, 结果将输出到本地shell
|
|
trace(Node, Mod) ->
|
|
dbg:tracer(),
|
|
dbg:n(Node),
|
|
dbg:tpl(Mod, '_', []),
|
|
dbg:p(all, c).
|
|
|
|
%% 停止trace
|
|
trace_stop() ->
|
|
dbg:stop_clear().
|
|
%% ====================================================================
|
|
|
|
%% 内存高OOM 排查工具
|
|
%% etop 无法应对10w+ 进程节点, 下面代码就没问题了;找到可疑proc后通过pstack、message_queu_len 排查原因
|
|
proc_mem_all(SizeLimitKb) ->
|
|
Procs = [{undefined, Pid} || Pid<- erlang:processes()],
|
|
proc_mem(Procs, SizeLimitKb).
|
|
|
|
proc_mem(SizeLimitKb) ->
|
|
Procs = [{Name, Pid} || {_, Name, Pid, _} <- release_handler_1:get_supervised_procs(),
|
|
is_process_alive(Pid)],
|
|
proc_mem(Procs, SizeLimitKb).
|
|
|
|
proc_mem(Procs, SizeLimitKb) ->
|
|
SizeLimit = SizeLimitKb * 1024,
|
|
{R, Total} = lists:foldl(fun({Name, Pid}, {Acc, TotalSize}) ->
|
|
case erlang:process_info(Pid, total_heap_size) of
|
|
{_, Size0} ->
|
|
Size = Size0*8,
|
|
case Size > SizeLimit of
|
|
true -> {[{Name, Pid, Size} | Acc], TotalSize+Size};
|
|
false -> {Acc, TotalSize}
|
|
end;
|
|
_ -> {Acc, TotalSize}
|
|
end
|
|
end, {[], 0}, Procs),
|
|
R1 = lists:keysort(3, R),
|
|
{Total, lists:reverse(R1)}.
|
|
|
|
show(N) ->
|
|
F = fun(P) ->
|
|
case catch process_info(P, [memory, dictionary]) of
|
|
[{_, Memory}, {_, Dict}] ->
|
|
InitStart = util:prop_get_value('$initial_call', Dict, null),
|
|
{InitStart, Memory};
|
|
_ -> {null, 0}
|
|
end
|
|
end,
|
|
Infos1 = lists:map(F, processes()),
|
|
Infos2 = [{Name, M} || {Name, M} <- Infos1, Name =/= null],
|
|
SortFun = fun({_, M1}, {_, M2}) -> M1 > M2 end,
|
|
Infos3 = lists:sort(SortFun, Infos2),
|
|
Infos4 = lists:sublist(Infos3, N),
|
|
[io:format("~p : ~p ~n", [Name, M]) || {Name, M} <- Infos4],
|
|
ok.
|
|
|
|
show(N, SkipNames) ->
|
|
F = fun(P) ->
|
|
case catch process_info(P, [memory, dictionary]) of
|
|
[{_, Memory}, {_, Dict}] ->
|
|
InitStart = util:prop_get_value('$initial_call', Dict, null),
|
|
case catch tuple_to_list(InitStart) of
|
|
[Name | _] ->
|
|
case lists:member(Name, SkipNames) of
|
|
true -> {null, 0};
|
|
false -> {InitStart, Memory}
|
|
end;
|
|
_ -> {null, 0}
|
|
end;
|
|
_ -> {null, 0}
|
|
end
|
|
end,
|
|
Infos1 = lists:map(F, processes()),
|
|
Infos2 = [{Name, M} || {Name, M} <- Infos1, Name =/= null],
|
|
SortFun = fun({_, M1}, {_, M2}) -> M1 > M2 end,
|
|
Infos3 = lists:sort(SortFun, Infos2),
|
|
Infos4 = lists:sublist(Infos3, N),
|
|
[io:format("~p : ~p ~n", [Name, M]) || {Name, M} <- Infos4],
|
|
ok.
|
|
|
|
show1(N) ->
|
|
F = fun(P, Acc) ->
|
|
case catch process_info(P, [memory, dictionary]) of
|
|
[{_, Memory}, {_, Dict}] ->
|
|
InitStart = util:prop_get_value('$initial_call', Dict, null),
|
|
case lists:keyfind(InitStart, 1, Acc) of
|
|
false -> [{InitStart, Memory, 1} | Acc];
|
|
{InitStart, Memory1, Num} -> lists:keystore(InitStart, 1, Acc, {InitStart, Memory + Memory1, Num + 1})
|
|
end;
|
|
_ -> Acc
|
|
end
|
|
end,
|
|
Infos1 = lists:foldl(F, [], processes()),
|
|
Infos2 = [{Name, M, Num} || {Name, M, Num} <- Infos1, Name =/= null],
|
|
SortFun = fun({_, M1, _}, {_, M2, _}) -> M1 > M2 end,
|
|
Infos3 = lists:sort(SortFun, Infos2),
|
|
Infos4 = lists:sublist(Infos3, N),
|
|
[io:format("~p : per_memory=~p process_num=~p ~n", [Name, (M div Num), Num]) || {Name, M, Num} <- Infos4],
|
|
ok.
|
|
|
|
%% 得到CPU核数
|
|
coreCnt() ->
|
|
erlang:system_info(schedulers).
|
|
|
|
%% 获取当前进程运行的核id
|
|
coreIndex() ->
|
|
erlang:system_info(scheduler_id).
|
|
|
|
|
|
%% @doc 节点所有进程信息
|
|
process_infos() ->
|
|
filelib:ensure_dir("./logs/"),
|
|
File = "./logs/processes_infos.log",
|
|
{ok, Fd} = file:open(File, [write, raw, binary, append]),
|
|
Fun = fun(Pi) ->
|
|
Info = io_lib:format("=>~p \n\n", [Pi]),
|
|
case filelib:is_file(File) of
|
|
true -> file:write(Fd, Info);
|
|
false ->
|
|
file:close(Fd),
|
|
{ok, NewFd} = file:open(File, [write, raw, binary, append]),
|
|
file:write(NewFd, Info)
|
|
end,
|
|
timer:sleep(20)
|
|
end,
|
|
[Fun(erlang:process_info(P)) || P <- erlang:processes()].
|
|
|
|
rfc1123_local_date() ->
|
|
rfc1123_local_date(os:timestamp()).
|
|
rfc1123_local_date({A, B, C}) ->
|
|
rfc1123_local_date(calendar:now_to_local_time({A, B, C}));
|
|
rfc1123_local_date({{YYYY, MM, DD}, {Hour, Min, Sec}}) ->
|
|
DayNumber = calendar:day_of_the_week({YYYY, MM, DD}),
|
|
lists:flatten(
|
|
io_lib:format("~s, ~2.2.0w ~3.s ~4.4.0w ~2.2.0w:~2.2.0w:~2.2.0w GMT",
|
|
[httpd_util:day(DayNumber), DD, httpd_util:month(MM), YYYY, Hour, Min, Sec]));
|
|
rfc1123_local_date(Epoch) when erlang:is_integer(Epoch) ->
|
|
rfc1123_local_date(calendar:gregorian_seconds_to_datetime(Epoch + 62167219200)).
|
|
|
|
%% @doc erlang_dump
|
|
crash_dump() ->
|
|
Date = erlang:list_to_binary(rfc1123_local_date()),
|
|
Header = binary:list_to_bin([<<"=erl_crash_dump:0.2\n">>, Date, <<"\nSystem version: ">>]),
|
|
Ets = ets_info(),
|
|
Report = binary:list_to_bin([Header, erlang:list_to_binary(erlang:system_info(system_version)),
|
|
erlang:system_info(info), erlang:system_info(procs), Ets, erlang:system_info(dist),
|
|
<<"=loaded_modules\n">>, binary:replace(erlang:system_info(loaded),
|
|
<<"\n">>, <<"\n=mod:">>, [global])]),
|
|
file:write_file("erl_crash.dump", Report).
|
|
|
|
ets_info() ->
|
|
binary:list_to_bin([ets_table_info(T) || T <- ets:all()]).
|
|
|
|
ets_table_info(Table) ->
|
|
Info = ets:info(Table),
|
|
Owner = erlang:list_to_binary(erlang:pid_to_list(proplists:get_value(owner, Info))),
|
|
TableN = erlang:list_to_binary(erlang:atom_to_list(proplists:get_value(name, Info))),
|
|
Name = erlang:list_to_binary(erlang:atom_to_list(proplists:get_value(name, Info))),
|
|
Objects = erlang:list_to_binary(erlang:integer_to_list(proplists:get_value(size, Info))),
|
|
binary:list_to_bin([<<"=ets:">>, Owner, <<"\nTable: ">>, TableN, <<"\nName: ">>, Name,
|
|
<<"\nObjects: ">>, Objects, <<"\n">>]).
|
|
|
|
%% 检查溢出的内存,强制gc, 并写入日志分析
|
|
check_mem(MemLim) ->
|
|
lists:foreach(
|
|
fun(P) ->
|
|
case is_pid(P) andalso erlang:is_process_alive(P) of
|
|
true ->
|
|
{memory, Mem} = erlang:process_info(P, memory),
|
|
case Mem > MemLim of
|
|
true ->
|
|
erlang:garbage_collect(P);
|
|
false ->
|
|
[]
|
|
end;
|
|
false ->
|
|
[]
|
|
end
|
|
end, erlang:processes()).
|
|
|
|
%% @spec top() -> ok
|
|
%% @doc 查看系统当前的综合信息
|
|
top() ->
|
|
Release = erlang:system_info(otp_release),
|
|
SchedNum = erlang:system_info(schedulers),
|
|
ProcCount = erlang:system_info(process_count),
|
|
ProcLimit = erlang:system_info(process_limit),
|
|
ProcMemUsed = erlang:memory(processes_used),
|
|
EtsMemAlc = erlang:memory(ets),
|
|
MemTot = erlang:memory(total),
|
|
%PetNum = all_pets(),
|
|
io:format(
|
|
"++++++++++++++++++++++++++++++++++++++++++~n"
|
|
" Node: ~p~n"
|
|
" Erlang Ver: ~p~n"
|
|
" Free Threads: ~p~n"
|
|
" Process Used Memory: ~pMb~n"
|
|
" Ets Used Memory: ~pMb~n"
|
|
" Erlang VM Used Memory: ~pMb~n"
|
|
" Process Limit: ~p~n"
|
|
" Process Used: ~p~n"
|
|
"++++++++++++++++++++++++++++++++++++++++++~n"
|
|
, [node(), Release, SchedNum, ProcMemUsed / 1024 / 1024, EtsMemAlc / 1024 / 1024, MemTot / 1024 / 1024, ProcLimit, ProcCount]),
|
|
ok.
|
|
|
|
%% @doc 运维要用
|
|
top_back() ->
|
|
Release = erlang:system_info(otp_release),
|
|
SchedNum = erlang:system_info(schedulers),
|
|
ProcCount = erlang:system_info(process_count),
|
|
ProcLimit = erlang:system_info(process_limit),
|
|
ProcMemUsed = erlang:memory(processes_used),
|
|
EtsMemAlc = erlang:memory(ets),
|
|
MemTot = erlang:memory(total),
|
|
Str = io_lib:format(
|
|
" Erlang 版本: ~p~n"
|
|
" 可使用的调度线程: ~p~n"
|
|
" 所有进程使用的内存: ~pMb~n"
|
|
" 所有ets使用的内存: ~pMb~n"
|
|
" Erlang系统占用内存: ~pMb~n"
|
|
" 可创建进程数量上限: ~p~n"
|
|
" 当前进程数: ~p~n"
|
|
, [Release, SchedNum, ProcMemUsed / 1024 / 1024, EtsMemAlc / 1024 / 1024, MemTot / 1024 / 1024, ProcLimit, ProcCount]),
|
|
binary_to_list(list_to_binary(Str)).
|
|
|
|
%% @spec ets_mem() -> term()
|
|
%% @doc 查看内存占用最多的30张ets表
|
|
ets_mem() ->
|
|
L = ets:all(),
|
|
Mems = lists:map(fun(Tab) ->
|
|
Info = ets:info(Tab),
|
|
case lists:keyfind(memory, 1, Info) of
|
|
{memory, Mem} -> {Tab, Mem};
|
|
_ -> {Tab, 0}
|
|
end
|
|
end, L),
|
|
L1 = lists:sublist(lists:reverse(lists:keysort(2, Mems)), 30),
|
|
io:format("~n--------------------------------------------------~n"
|
|
"~-30w ~w~n--------------------------------------------------~n"
|
|
, [table, used_memory]),
|
|
lists:foreach(
|
|
fun({Tab, Mem}) ->
|
|
io:format("~-30w ~wKb~n", [Tab, Mem / 1024])
|
|
end, L1).
|
|
|
|
%% @spec tcp_links() -> Info
|
|
%% @doc 统计tcp链接
|
|
tcp_links() ->
|
|
L = erlang:ports(),
|
|
F = fun(P) ->
|
|
Pinfo = erlang:port_info(P),
|
|
case lists:keyfind(name, 1, Pinfo) of
|
|
{name, "tcp_inet"} -> true;
|
|
_ -> false
|
|
end
|
|
end,
|
|
L1 = lists:filter(F, L),
|
|
io:format("~n当前socket数量(包括链接数据库的socket): ~w~n", [length(L1)]).
|
|
|
|
|
|
%% @doc 备份进程信息
|
|
dump_process_info(Pid) ->
|
|
{{Year, Month, Day}, {Hour, Minutes, Second}} = util:local_time(),
|
|
{ok, FileHandle} = file:open(util:fbin("~s-~w-~w-~w-~w-~w-~w", [<<"../logs/pid_info.dump">>, Year, Month, Day, Hour, Minutes, Second]), write),
|
|
case erlang:process_info(Pid) of
|
|
Info when is_list(Info) ->
|
|
lists:foreach(fun({messages, Messages}) ->
|
|
case Messages =:= [] of
|
|
true ->
|
|
io:format(FileHandle, "~w~n", [{messages, Messages}]);
|
|
_ ->
|
|
io:format(FileHandle, "{messages,~n", []),
|
|
lists:foreach(fun(M) ->
|
|
io:format(FileHandle, " ~w~n", [M])
|
|
end, Messages),
|
|
io:format(FileHandle, "}~n", [])
|
|
end;
|
|
({dictionary, Dics}) ->
|
|
case Dics =:= [] of
|
|
true ->
|
|
io:format(FileHandle, "~w~n", [{dictionary, Dics}]);
|
|
_ ->
|
|
io:format(FileHandle, "{dictionary,~n", []),
|
|
lists:foreach(fun(M) ->
|
|
io:format(FileHandle, " ~w~n", [M])
|
|
end, Dics),
|
|
io:format(FileHandle, "}~n", [])
|
|
end;
|
|
(E) ->
|
|
io:format(FileHandle, "~w~n", [E])
|
|
end, Info);
|
|
_ ->
|
|
io:format("not find pid info")
|
|
end,
|
|
file:close(FileHandle).
|
|
|
|
get_process_info_and_zero_value(InfoName) ->
|
|
PList = erlang:processes(),
|
|
ZList = lists:filter(
|
|
fun(T) ->
|
|
case erlang:process_info(T, InfoName) of
|
|
{InfoName, 0} -> false;
|
|
_ -> true
|
|
end
|
|
end, PList),
|
|
ZZList = lists:map(
|
|
fun(T) -> {T, erlang:process_info(T, InfoName), erlang:process_info(T, registered_name)}
|
|
end, ZList),
|
|
[length(PList), InfoName, length(ZZList), ZZList].
|
|
|
|
get_process_info_and_large_than_value(InfoName, Value) ->
|
|
PList = erlang:processes(),
|
|
ZList = lists:filter(
|
|
fun(T) ->
|
|
case erlang:process_info(T, InfoName) of
|
|
{InfoName, VV} ->
|
|
if VV > Value -> true;
|
|
true -> false
|
|
end;
|
|
_ -> true
|
|
end
|
|
end, PList),
|
|
ZZList = lists:map(
|
|
fun(T) -> {T, erlang:process_info(T, InfoName), erlang:process_info(T, registered_name)}
|
|
end, ZList),
|
|
[length(PList), InfoName, Value, length(ZZList), ZZList].
|
|
|
|
get_msg_queue() ->
|
|
io:fwrite("process count:~p~n~p value is not 0 count:~p~nLists:~p~n",
|
|
get_process_info_and_zero_value(message_queue_len)).
|
|
|
|
get_memory() ->
|
|
io:fwrite("process count:~p~n~p value is large than ~p count:~p~nLists:~p~n",
|
|
get_process_info_and_large_than_value(memory, 1048576)).
|
|
|
|
get_memory(Value) ->
|
|
io:fwrite("process count:~p~n~p value is large than ~p count:~p~nLists:~p~n",
|
|
get_process_info_and_large_than_value(memory, Value)).
|
|
|
|
get_heap() ->
|
|
io:fwrite("process count:~p~n~p value is large than ~p count:~p~nLists:~p~n",
|
|
get_process_info_and_large_than_value(heap_size, 1048576)).
|
|
|
|
get_heap(Value) ->
|
|
io:fwrite("process count:~p~n~p value is large than ~p count:~p~nLists:~p~n",
|
|
get_process_info_and_large_than_value(heap_size, Value)).
|
|
|
|
get_processes() ->
|
|
io:fwrite("process count:~p~n~p value is large than ~p count:~p~nLists:~p~n",
|
|
get_process_info_and_large_than_value(memory, 0)).
|
|
|
|
|