|
|
@ -0,0 +1,167 @@ |
|
|
|
-module(utVMInfo). |
|
|
|
|
|
|
|
-compile([export_all, nowarn_export_all]). |
|
|
|
|
|
|
|
%% 打印并排序各个表的缓存消耗 |
|
|
|
show_cache() -> |
|
|
|
io:format("table name | memory | size~n", []), |
|
|
|
lists:reverse(lists:keysort(2, [{T, ets:info(T, memory), ets:info(T, size)} || T <- ets:all()])). |
|
|
|
|
|
|
|
%% 打印进程消耗内存的信息 |
|
|
|
show_process() -> |
|
|
|
lists:reverse(lists:keysort(2, [{erlang:process_info(P, registered_name), erlang:process_info(P, heap_size)} || P <- erlang:processes()])). |
|
|
|
|
|
|
|
%% 打印当前进程数量 |
|
|
|
show_process_count() -> |
|
|
|
length(erlang:processes()). |
|
|
|
|
|
|
|
%% 反编译 |
|
|
|
%% 确认线上运行代码是否正确,reltools没掌握好,升级偶尔出现问题 |
|
|
|
decompile(Mod) -> |
|
|
|
{ok,{_,[{abstract_code,{_,AC}}]}} = beam_lib:chunks(code:which(Mod), [abstract_code]), |
|
|
|
io:format("~s~n", [erl_prettypr:format(erl_syntax:form_list(AC))]). |
|
|
|
|
|
|
|
%% 进程栈 |
|
|
|
%% 类似于jstack,发现大量进程挂起,进程数过高,运行慢,hang住等问题用到 |
|
|
|
pstack(Reg) when is_atom(Reg) -> |
|
|
|
case whereis(Reg) of |
|
|
|
undefined -> undefined; |
|
|
|
Pid -> pstack(Pid) |
|
|
|
end; |
|
|
|
pstack(Pid) -> |
|
|
|
io:format("~s~n", [element(2, process_info(Pid, backtrace))]). |
|
|
|
|
|
|
|
%% ==================================================================== |
|
|
|
%% etop |
|
|
|
%% 分析内存、cpu占用进程,即使数十w进程node 也能正常使用 |
|
|
|
%% 进程CPU占用排名 |
|
|
|
%% -------------------------------------------------------------------- |
|
|
|
etop() -> |
|
|
|
spawn(fun() -> etop:start([{output, text}, {interval, 10}, {lines, 20}, {sort, reductions}]) end). |
|
|
|
|
|
|
|
%% 进程Mem占用排名 |
|
|
|
etop_mem() -> |
|
|
|
spawn(fun() -> etop:start([{output, text}, {interval, 10}, {lines, 20}, {sort, memory}]) end). |
|
|
|
|
|
|
|
%% 停止etop |
|
|
|
etop_stop() -> |
|
|
|
etop:stop(). |
|
|
|
%% ==================================================================== |
|
|
|
|
|
|
|
%% 对所有process做gc |
|
|
|
%% 进程内存过高时,来一发,看看是内存泄露还是gc不过来 |
|
|
|
gc_all() -> |
|
|
|
[erlang:garbage_collect(Pid) || Pid <- processes()]. |
|
|
|
|
|
|
|
%% 对MFA 执行分析,会严重减缓运行,建议只对小量业务执行 |
|
|
|
%% 结果: |
|
|
|
%% fprof 结果比较详细,能够输出热点调用路径 |
|
|
|
fprof(M, F, A) -> |
|
|
|
fprof:start(), |
|
|
|
fprof:apply(M, F, A), |
|
|
|
fprof:profile(), |
|
|
|
fprof:analyse(), |
|
|
|
fprof:stop(). |
|
|
|
|
|
|
|
%% 对整个节点内所有进程执行eprof, eprof 对线上业务有一定影响,慎用! |
|
|
|
%% 建议TimeoutSec<10s,且进程数< 1000,否则可能导致节点crash |
|
|
|
%% 结果: |
|
|
|
%% 输出每个方法实际执行时间(不会累计方法内其他mod调用执行时间) |
|
|
|
%% 只能得到mod - Fun 执行次数 执行耗时 |
|
|
|
eprof_all(TimeoutSec) -> |
|
|
|
eprof(processes() -- [whereis(eprof)], TimeoutSec). |
|
|
|
|
|
|
|
eprof(Pids, TimeoutSec) -> |
|
|
|
eprof:start(), |
|
|
|
eprof:start_profiling(Pids), |
|
|
|
timer:sleep(TimeoutSec), |
|
|
|
eprof:stop_profiling(), |
|
|
|
eprof:analyze(total), |
|
|
|
eprof:stop(). |
|
|
|
|
|
|
|
%% scheduler usage |
|
|
|
%% 统计下1s每个调度器CPU的实际利用率(因为有spin wait、调度工作, 可能usage 比top显示低很多) |
|
|
|
scheduler_usage() -> |
|
|
|
scheduler_usage(1000). |
|
|
|
|
|
|
|
scheduler_usage(RunMs) -> |
|
|
|
erlang:system_flag(scheduler_wall_time, true), |
|
|
|
Ts0 = lists:sort(erlang:statistics(scheduler_wall_time)), |
|
|
|
timer:sleep(RunMs), |
|
|
|
Ts1 = lists:sort(erlang:statistics(scheduler_wall_time)), |
|
|
|
erlang:system_flag(scheduler_wall_time, false), |
|
|
|
Cores = lists:map(fun({{_I, A0, T0}, {I, A1, T1}}) -> |
|
|
|
{I, (A1 - A0) / (T1 - T0)} end, lists:zip(Ts0, Ts1)), |
|
|
|
{A, T} = lists:foldl(fun({{_, A0, T0}, {_, A1, T1}}, {Ai,Ti}) -> |
|
|
|
{Ai + (A1 - A0), Ti + (T1 - T0)} end, {0, 0}, lists:zip(Ts0, Ts1)), |
|
|
|
Total = A/T, |
|
|
|
io:format("~p~n", [[{total, Total} | Cores]]). |
|
|
|
|
|
|
|
%% 进程调度 |
|
|
|
%% 统计下1s内调度进程数量(含义:第一个数字执行进程数量,第二个数字迁移进程数量) |
|
|
|
scheduler_stat() -> |
|
|
|
scheduler_stat(1000). |
|
|
|
|
|
|
|
scheduler_stat(RunMs) -> |
|
|
|
erlang:system_flag(scheduling_statistics, enable), |
|
|
|
Ts0 = erlang:system_info(total_scheduling_statistics), |
|
|
|
timer:sleep(RunMs), |
|
|
|
Ts1 = erlang:system_info(total_scheduling_statistics), |
|
|
|
erlang:system_flag(scheduling_statistics, disable), |
|
|
|
lists:map(fun({{_Key, In0, Out0}, {Key, In1, Out1}}) -> |
|
|
|
{Key, In1 - In0, Out1 - Out0} end, lists:zip(Ts0, Ts1)). |
|
|
|
|
|
|
|
%% ==================================================================== |
|
|
|
%% trace 日志 |
|
|
|
%% 会把mod 每次调用详细MFA log 下来,args 太大就不好看了 |
|
|
|
%% trace Mod 所有方法的调用 |
|
|
|
%% -------------------------------------------------------------------- |
|
|
|
trace(Mod) -> |
|
|
|
dbg:tracer(), |
|
|
|
dbg:tpl(Mod, '_', []), |
|
|
|
dbg:p(all, c). |
|
|
|
|
|
|
|
%% trace Node上指定 Mod 所有方法的调用, 结果将输出到本地shell |
|
|
|
trace(Node, Mod) -> |
|
|
|
dbg:tracer(), |
|
|
|
dbg:n(Node), |
|
|
|
dbg:tpl(Mod, '_', []), |
|
|
|
dbg:p(all, c). |
|
|
|
|
|
|
|
%% 停止trace |
|
|
|
trace_stop() -> |
|
|
|
dbg:stop_clear(). |
|
|
|
%% ==================================================================== |
|
|
|
|
|
|
|
%% 内存高OOM 排查工具 |
|
|
|
%% etop 无法应对10w+ 进程节点, 下面代码就没问题了;找到可疑proc后通过pstack、message_queu_len 排查原因 |
|
|
|
proc_mem_all(SizeLimitKb) -> |
|
|
|
Procs = [{undefined, Pid} || Pid<- erlang:processes()], |
|
|
|
proc_mem(Procs, SizeLimitKb). |
|
|
|
|
|
|
|
proc_mem(SizeLimitKb) -> |
|
|
|
Procs = [{Name, Pid} || {_, Name, Pid, _} <- release_handler_1:get_supervised_procs(), |
|
|
|
is_process_alive(Pid)], |
|
|
|
proc_mem(Procs, SizeLimitKb). |
|
|
|
|
|
|
|
proc_mem(Procs, SizeLimitKb) -> |
|
|
|
SizeLimit = SizeLimitKb * 1024, |
|
|
|
{R, Total} = lists:foldl(fun({Name, Pid}, {Acc, TotalSize}) -> |
|
|
|
case erlang:process_info(Pid, total_heap_size) of |
|
|
|
{_, Size0} -> |
|
|
|
Size = Size0*8, |
|
|
|
case Size > SizeLimit of |
|
|
|
true -> {[{Name, Pid, Size} | Acc], TotalSize+Size}; |
|
|
|
false -> {Acc, TotalSize} |
|
|
|
end; |
|
|
|
_ -> {Acc, TotalSize} |
|
|
|
end |
|
|
|
end, {[], 0}, Procs), |
|
|
|
R1 = lists:keysort(3, R), |
|
|
|
{Total, lists:reverse(R1)}. |
|
|
|
|
|
|
|
%% ==================================================================== |
|
|
|
%% Internal functions |
|
|
|
%% ==================================================================== |
|
|
|
|
|
|
|
|