From 8467b00eaf4f04d20ea3e98d869605f149ca76f5 Mon Sep 17 00:00:00 2001
From: SisMaker <1713699517@qq.com>
Date: Tue, 29 Jun 2021 11:47:11 +0800
Subject: [PATCH] =?UTF-8?q?ft:=20=E9=83=A8=E5=88=86=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E8=B0=83=E6=95=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/callgrind/tpCallGrind.erl | 637 +++++++++++++++++-----------------
 src/eTpf.erl                  |   3 +-
 src/flame/tpFlame.erl         |  28 +-
 3 files changed, 328 insertions(+), 340 deletions(-)

diff --git a/src/callgrind/tpCallGrind.erl b/src/callgrind/tpCallGrind.erl
index af689a5..22d3468 100644
--- a/src/callgrind/tpCallGrind.erl
+++ b/src/callgrind/tpCallGrind.erl
@@ -1,160 +1,145 @@
--module(tpCallGrind).
+-module(tpCallGrind).      %% 调用堆栈和时间分析
 
--export([pts/0]).
--export([pfs/2]).
--export([pfs/3]).
--export([pfm/3]).
--export([pfm/4]).
+-export([
+	pfs/2
+	, pfs/3
+	, pfm/3
+	, pfm/4
+]).
 
 -type opts() :: #{
-   scope => global | per_process,                        %% Whether we filter the output per process.
-   running => boolean()                                  %% Whether we compute and save wait times.
+scope => global | per_process,                        %% Whether we filter the output per process.
+running => boolean()                                  %% Whether we compute and save wait times.
 }.
 
 -record(call, {
-   mfa :: atom(),                                     %% The MFA for the call.
-   source :: {string(), pos_integer()},               %% The source file name.
-   ts :: pos_integer(),                               %% The timestamp for the call.
-   self_ts :: pos_integer(),                          %% The timestamp for when we last started executing this function.
-   incl :: undefined | non_neg_integer(),             %% Execution time including subcalls.
-   self = 0 :: integer(),                             %% Execution time excluding subcalls.
-   count = 1 :: pos_integer(),                        %% Number of times the function was called.
-   wait = 0 :: non_neg_integer(),                     %% Time when the process was not running in this function.
-   wait_incl = 0 :: non_neg_integer(),                %% Time when the process was not running in this function or any subcalls.
-   wait_count = 0 :: non_neg_integer(),               %% Number of times the process was scheduled out.
-   wait_count_incl = 0 :: non_neg_integer(),          %% Number of times the function or any subcall was scheduled out.
-   calls = #{} :: #{atom() => #call{}}                %% Calls done by this MFA.
+	mfa :: atom(),                                     %% The MFA for the call.
+	source :: {string(), pos_integer()},               %% The source file name.
+	ts :: pos_integer(),                               %% The timestamp for the call.
+	self_ts :: pos_integer(),                          %% The timestamp for when we last started executing this function.
+	incl :: undefined | non_neg_integer(),             %% Execution time including subcalls.
+	self = 0 :: integer(),                             %% Execution time excluding subcalls.
+	count = 1 :: pos_integer(),                        %% Number of times the function was called.
+	wait = 0 :: non_neg_integer(),                     %% Time when the process was not running in this function.
+	waitIncl = 0 :: non_neg_integer(),                 %% Time when the process was not running in this function or any subcalls.
+	waitCnt = 0 :: non_neg_integer(),                  %% Number of times the process was scheduled out.
+	waitCntIncl = 0 :: non_neg_integer(),              %% Number of times the function or any subcall was scheduled out.
+	calls = #{} :: #{atom() => #call{}}                %% Calls done by this MFA.
 }).
 
 -record(proc, {
-   stack = [] :: [#call{}],                           %% Call stack.
-   mfas = #{} :: #{atom() => #call{}},                %% Profile information waiting to be written to file.
-   out = undefined :: undefined | non_neg_integer()   %% Timestamp the process got scheduled out.
+	stack = [] :: [#call{}],                           %% Call stack.
+	mfas = #{} :: #{atom() => #call{}},                %% Profile information waiting to be written to file.
+	out = undefined :: undefined | non_neg_integer()   %% Timestamp the process got scheduled out.
 }).
 
 -record(state, {
-   input :: file:filename_all(),                      %% Input file name.
-   output :: file:filename_all(),                     %% Output file name.
-   output_device :: file:io_device(),                 %% Output fd.
-   opts :: opts(),                                    %% Options.
-   processes = #{} :: #{pid() => #proc{}},            %% List of processes.
-   sources = #{} :: #{mfa() => {string(), pos_integer()}}    %% Cache of source file information.
+	input :: file:filename_all(),                      %% Input file name.
+	outputFile :: file:filename_all(),                     %% Output file name.
+	ioDevice :: file:io_device(),                      %% Output fd.
+	opts :: opts(),                                    %% Options.
+	procs = #{} :: #{pid() => #proc{}},                 %% List of processes.
+	sources = #{} :: #{mfa() => {string(), pos_integer()}}    %% Cache of source file information.
 }).
 
--spec pts() -> eTpf:input().
-pts() ->
-   [{app, kernel}, {app, stdlib}, {app, looking_glass}].
-
 -spec pfs(file:filename_all(), file:filename_all()) -> ok.
-pfs(InputFile, Output) ->
-   pfs(InputFile, Output, #{}).
+pfs(InputFile, OutputFile) ->
+	pfs(InputFile, OutputFile, #{}).
 
 -spec pfs(file:filename_all(), file:filename_all(), opts()) -> ok.
-pfs(InputFile, Output, Opts) ->
-   {ok, OutDevice} = file:open(Output, [write]),
-   State = #state{input = InputFile, output = Output, output_device = OutDevice, opts = Opts},
-   writeHeader(State),
-   {ok, FinalState} = tpFReader:fold(fun handleEvent/2, State, InputFile),
-   flush(FinalState),
-   _ = file:close(OutDevice),
-   ok.
-
+pfs(InputFile, OutputFile, Opts) ->
+	{ok, IoDevice} = file:open(OutputFile, [write]),
+	State = #state{input = InputFile, outputFile = OutputFile, ioDevice = IoDevice, opts = Opts},
+	writeHeader(State),
+	{ok, FinalState} = tpFReader:fold(fun handleEvent/2, State, InputFile),
+	flush(FinalState),
+	_ = file:close(IoDevice),
+	ok.
 
 -spec pfm(file:filename(), filelib:dirname(), file:filename()) -> ok.
 pfm(Wildcard, Cwd, Prefix) ->
-   pfm(Wildcard, Cwd, Prefix, #{}).
+	pfm(Wildcard, Cwd, Prefix, #{}).
 
 -spec pfm(file:filename(), filelib:dirname(), file:filename(), opts()) -> ok.
-pfm(Wildcard, Cwd, Prefix, Opts) ->
-   Files = lists:sort(filelib:wildcard(Wildcard, Cwd)),
-   Seq = lists:seq(1, length(Files)),
-   OutFiles = [Prefix ++ "." ++ integer_to_list(N) || N <- Seq],
-   Many = lists:zip(Files, OutFiles),
-   Refs = [monitor(process, spawn_link(?MODULE, pfs, [Input, Output, Opts])) || {Input, Output} <- Many],
-   waitForProcs(Refs).
-
-flush(State = #state{processes = Procs}) ->
-   maps:fold(
-      fun(Pid, #proc{mfas = MFAs}, _) ->
-         writeMfas(Pid, MFAs, State)
-      end,
-      undefined, Procs),
-   ok.
-
-%% We do not need to worry about failure because we are linked.
-waitForProcs([]) ->
-   ok;
-waitForProcs(Refs) ->
-   receive
-   %% We purposefully ignore any stray messages.
-      {'DOWN', R, process, _, _} ->
-         waitForProcs(Refs -- [R])
-   end.
+pfm(InputFiles, Cwd, OutputPrefix, Opts) ->
+	PfFiles = lists:sort(filelib:wildcard(InputFiles, Cwd)),
+	SeqList = lists:seq(1, length(PfFiles)),
+	OutputFiles = [OutputPrefix ++ "." ++ integer_to_list(Index) || Index <- SeqList],
+	ManyFiles = lists:zip(PfFiles, OutputFiles),
+	Refs = [monitor(process, spawn_link(?MODULE, pfs, [InputFile, OutputFile, Opts])) || {InputFile, OutputFile} <- ManyFiles],
+	waitForProc(Refs).
+
+waitForProc([]) ->
+	ok;
+waitForProc(Refs) ->
+	receive
+		{'DOWN', R, process, _, _} ->
+			waitForProc(Refs -- [R])
+	end.
 
 %% We handle trace events one by one, keeping track of the execution stack for each process.
 %% 我们一一处理跟踪事件，跟踪每个进程的执行堆栈。
 
 %% 我们不关心匹配规范结果 for callgrind.
 handleEvent({call, Pid, Ts, MFA, _MSpec}, State) ->
-   handleEvent({call, Pid, Ts, MFA}, State);
-handleEvent({call, Pid, Ts, MFA}, State0) ->
-   Proc =
-      case isProcProfiled(Pid, State0) of
-         {true, P} ->
-            P;
-         {empty, P} ->
-            P;
-         false ->
-            #proc{}
-      end,
-   {Source, State} = findSource(MFA, State0),
-   handleCall(Pid, convertMfa(MFA), Source, Ts, Proc, State);
+	handleEvent({call, Pid, Ts, MFA}, State);
+handleEvent({call, Pid, Ts, MFA}, State) ->
+	case isProcProfiled(Pid, State) of
+		{true, P} ->
+			Proc = P;
+		{empty, P} ->
+			Proc = P;
+		false ->
+			Proc = #proc{}
+	end,
+	{Source, NewState} = findSource(MFA, State),
+	handleCall(Pid, convertMfa(MFA), Source, Ts, Proc, NewState);
 handleEvent({return_to, Pid, Ts, MFA}, State) ->
-   case isProcProfiled(Pid, State) of
-      {true, Proc} ->
-         handleReturnTo(Pid, convertMfa(MFA), Ts, Proc, State);
-      _ ->
-         State
-   end;
+	case isProcProfiled(Pid, State) of
+		{true, Proc} ->
+			handleReturnTo(Pid, convertMfa(MFA), Ts, Proc, State);
+		_ ->
+			State
+	end;
 %% Process exited. Unfold the stacktrace entirely.
 %%
 %% We use the atom exit because we know it will not match
 %% a function call and will therefore unfold everything.
 handleEvent({exit, Pid, Ts, _Reason}, State0) ->
-   case isProcProfiled(Pid, State0) of
-      {true, Proc} ->
-         State = #state{processes = Procs} = handleReturnTo(Pid, exit, Ts, Proc, State0),
-         %% Remove the pid from the state to save memory.
-         State#state{processes = maps:remove(Pid, Procs)};
-      _ ->
-         State0
-   end;
+	case isProcProfiled(Pid, State0) of
+		{true, Proc} ->
+			State = #state{procs = Procs} = handleReturnTo(Pid, exit, Ts, Proc, State0),
+			%% Remove the pid from the state to save memory.
+			State#state{procs = maps:remove(Pid, Procs)};
+		_ ->
+			State0
+	end;
 handleEvent({in, Pid, Ts, _MFA}, State = #state{opts = #{running := true}}) ->
-   case isProcProfiled(Pid, State) of
-      {true, Proc} -> handleIn(Pid, Ts, Proc, State);
-      _ -> State
-   end;
+	case isProcProfiled(Pid, State) of
+		{true, Proc} -> handleIn(Pid, Ts, Proc, State);
+		_ -> State
+	end;
 handleEvent({out, Pid, Ts, _MFA}, State = #state{opts = #{running := true}}) ->
-   case isProcProfiled(Pid, State) of
-      {true, Proc} -> handleOut(Pid, Ts, Proc, State);
-      _ -> State
-   end;
+	case isProcProfiled(Pid, State) of
+		{true, Proc} -> handleOut(Pid, Ts, Proc, State);
+		_ -> State
+	end;
 %% Ignore all other events. We do not need them for building the callgrind file.
 handleEvent(_, State) ->
-   State.
-
-isProcProfiled(Pid, #state{processes = Procs}) ->
-   case maps:get(Pid, Procs, undefined) of
-      %% We never received events for this process. Ignore.
-      undefined ->
-         false;
-      %% We received events but are not in a known function currently. Ignore.
-      #proc{stack = []} = Proc ->
-         {empty, Proc};
-      %% All good!
-      Proc ->
-         {true, Proc}
-   end.
+	State.
+
+isProcProfiled(Pid, #state{procs = Procs}) ->
+	case maps:get(Pid, Procs, undefined) of
+		undefined ->
+			%% We never received events for this process. Ignore.
+			false;
+		#proc{stack = []} = Proc ->
+			%% We received events but are not in a known function currently. Ignore.
+			{empty, Proc};
+		Proc ->
+			%% All good!
+			{true, Proc}
+	end.
 
 %% We track a number of different things:
 %% - how much time was spent in the different function calls
@@ -175,15 +160,15 @@ isProcProfiled(Pid, #state{processes = Procs}) ->
 %% compared to an imperative language.
 
 %% Recursive call. Just increase the call count.
-handleCall(Pid, MFA, _Source, _Ts, Proc0 = #proc{stack = [Call = #call{mfa = MFA, count = Count} | Stack0]}, State = #state{processes = Procs}) ->
-   Stack = [Call#call{count = Count + 1} | Stack0],
-   Proc = Proc0#proc{stack = Stack},
-   State#state{processes = Procs#{Pid => Proc}};
+handleCall(Pid, MFA, _Source, _Ts, #proc{stack = [Call = #call{mfa = MFA, count = Count} | Stack0]} = Proc, #state{procs = Procs} = State) ->
+	Stack = [Call#call{count = Count + 1} | Stack0],
+	NewProc = Proc#proc{stack = Stack},
+	State#state{procs = Procs#{Pid => NewProc}};
 %% Non-recursive call.
-handleCall(Pid, MFA, Source, Ts, Proc0 = #proc{stack = Stack0}, State = #state{processes = Procs}) ->
-   Stack = [#call{mfa = MFA, source = Source, ts = Ts, self_ts = Ts} | Stack0],
-   Proc = Proc0#proc{stack = Stack},
-   State#state{processes = Procs#{Pid => Proc}}.
+handleCall(Pid, MFA, Source, Ts, #proc{stack = Stack0} = Proc0, #state{procs = Procs} = State) ->
+	Stack = [#call{mfa = MFA, source = Source, ts = Ts, self_ts = Ts} | Stack0],
+	Proc = Proc0#proc{stack = Stack},
+	State#state{procs = Procs#{Pid => Proc}}.
 
 %% We return from the current call, so the current call
 %% ends regardless of what it was doing. We stop as soon
@@ -216,98 +201,98 @@ handleCall(Pid, MFA, Source, Ts, Proc0 = #proc{stack = Stack0}, State = #state{p
 %% and the return time for the end. Here again we also
 %% update the sub call times.
 
-handleReturnTo(Pid, MFA, Ts, Proc0 = #proc{stack = [Current0 | Stack0], mfas = MFAs0}, State = #state{processes = Procs}) ->
-   {Returned0, Stack1} = lists:splitwith(fun(#call{mfa = E}) -> E =/= MFA end, Stack0),
-   #call{ts = CurrentTs, self_ts = CurrentSelfTs, self = CurrentSelf} = Current0,
-   Current = Current0#call{incl = Ts - CurrentTs, self = CurrentSelf + Ts - CurrentSelfTs},
-   Returned = updateTailCalls([Current | Returned0], Ts),
-   Stack = updateStack(Returned, Stack1, Ts),
-   %% Save the profile information in the state, potentially flushing it
-   %% to disk if the stack is empty.
-   MFAs1 = updateMfas(Returned, MFAs0),
-   MFAs =
-      case Stack of
-         [] ->
-            writeMfas(Pid, MFAs1, State),
-            #{};
-         _ ->
-            MFAs1
-      end,
-   Proc = Proc0#proc{stack = Stack, mfas = MFAs},
-   State#state{processes = Procs#{Pid => Proc}}.
+handleReturnTo(Pid, MFA, Ts, Proc0 = #proc{stack = [Current0 | Stack0], mfas = MFAs0}, State = #state{procs = Procs}) ->
+	{Returned0, Stack1} = lists:splitwith(fun(#call{mfa = E}) -> E =/= MFA end, Stack0),
+	#call{ts = CurrentTs, self_ts = CurrentSelfTs, self = CurrentSelf} = Current0,
+	Current = Current0#call{incl = Ts - CurrentTs, self = CurrentSelf + Ts - CurrentSelfTs},
+	Returned = updateTailCalls([Current | Returned0], Ts),
+	Stack = updateStack(Returned, Stack1, Ts),
+	%% Save the profile information in the state, potentially flushing it
+	%% to disk if the stack is empty.
+	MFAs1 = updateMfas(Returned, MFAs0),
+	MFAs =
+		case Stack of
+			[] ->
+				writeMfas(Pid, MFAs1, State),
+				#{};
+			_ ->
+				MFAs1
+		end,
+	Proc = Proc0#proc{stack = Stack, mfas = MFAs},
+	State#state{procs = Procs#{Pid => Proc}}.
 
 updateTailCalls([Call], _) ->
-   [Call];
+	[Call];
 updateTailCalls([Callee = #call{ts = CalleeTs}, Caller0 = #call{ts = CallerTs, self_ts = CallerSelfTs, self = CallerSelf} | Tail], ReturnToTs) ->
-   Caller1 = Caller0#call{incl = ReturnToTs - CallerTs, self = CallerSelf + CalleeTs - CallerSelfTs},
-   Caller = updateSubCalls(Callee, Caller1),
-   [Callee | updateTailCalls([Caller | Tail], ReturnToTs)].
+	Caller1 = Caller0#call{incl = ReturnToTs - CallerTs, self = CallerSelf + CalleeTs - CallerSelfTs},
+	Caller = updateSubCalls(Callee, Caller1),
+	[Callee | updateTailCalls([Caller | Tail], ReturnToTs)].
 
 %% Update nothing; there's nothing in the stack.
 updateStack(_, [], _) ->
-   [];
+	[];
 %% Update the incl/self value based on the top-level function we return from,
 %% but only update the sub calls based on the function we directly called.
 updateStack(Returned, [Caller0 = #call{self_ts = CallerSelfTs, self = CallerSelf} | Stack], ReturnToTs) ->
-   Callee = #call{ts = CalleeTs} = hd(lists:reverse(Returned)),
-   Caller = Caller0#call{self_ts = ReturnToTs, self = CallerSelf + CalleeTs - CallerSelfTs},
-   [updateSubCalls(Callee, Caller) | Stack].
-
-updateSubCalls(Callee = #call{mfa = MFA, incl = CallerIncl, count = CallerCount, wait_incl = CallerWaitIncl}, Caller = #call{calls = SubCalls}) ->
-   case maps:get(MFA, SubCalls, undefined) of
-      %% Add the callee to the subcalls but remove the callee's subcalls
-      %% since we don't need those here.
-      undefined ->
-         Caller#call{calls = SubCalls#{MFA => Callee#call{calls = #{}}}};
-      %% Same as above, except we add to the existing values.
-      Sub = #call{incl = SubIncl, count = SubCount, wait_incl = SubWaitIncl} ->
-         Caller#call{calls = SubCalls#{MFA => Sub#call{
-            %% We do not care about self/wait here as we will be using incl/wait_incl in the output.
-            incl = SubIncl + CallerIncl,
-            count = SubCount + CallerCount,
-            wait_incl = SubWaitIncl + CallerWaitIncl
-         }}}
-   end.
+	Callee = #call{ts = CalleeTs} = hd(lists:reverse(Returned)),
+	Caller = Caller0#call{self_ts = ReturnToTs, self = CallerSelf + CalleeTs - CallerSelfTs},
+	[updateSubCalls(Callee, Caller) | Stack].
+
+updateSubCalls(Callee = #call{mfa = MFA, incl = CallerIncl, count = CallerCount, waitIncl = CallerWaitIncl}, Caller = #call{calls = SubCalls}) ->
+	case maps:get(MFA, SubCalls, undefined) of
+		%% Add the callee to the subcalls but remove the callee's subcalls
+		%% since we don't need those here.
+		undefined ->
+			Caller#call{calls = SubCalls#{MFA => Callee#call{calls = #{}}}};
+		%% Same as above, except we add to the existing values.
+		Sub = #call{incl = SubIncl, count = SubCount, waitIncl = SubWaitIncl} ->
+			Caller#call{calls = SubCalls#{MFA => Sub#call{
+				%% We do not care about self/wait here as we will be using incl/wait_incl in the output.
+				incl = SubIncl + CallerIncl,
+				count = SubCount + CallerCount,
+				waitIncl = SubWaitIncl + CallerWaitIncl
+			}}}
+	end.
 
 %% Processes get scheduled in and out. We get the corresponding
 %% in and out events when the 'running' option is set to true.
 %% We keep track of how many times the process was scheduled out
 %% per function, and how long.
 
-handleIn(Pid, InTs, Proc0 = #proc{stack = [Current0 | Stack0], out = OutTs}, State = #state{processes = Procs}) ->
-   #call{wait = Wait, wait_incl = WaitIncl, wait_count = WaitCount, wait_count_incl = WaitCountIncl} = Current0,
-   ThisWait = InTs - OutTs,
-   %% We increase the wait time for self first.
-   Current = Current0#call{wait = Wait + ThisWait, wait_incl = WaitIncl + ThisWait, wait_count = WaitCount + 1, wait_count_incl = WaitCountIncl + 1},
-   %% And then for the parent calls to include wait time of subcalls.
-   Stack = [
-      Call#call{wait_incl = ParentWaitIncl + ThisWait, wait_count_incl = ParentWaitCount + 1}
-      || Call = #call{wait_incl = ParentWaitIncl, wait_count_incl = ParentWaitCount} <- Stack0
-   ],
-   Proc = Proc0#proc{stack = [Current | Stack], out = undefined},
-   State#state{processes = Procs#{Pid => Proc}}.
-
-handleOut(Pid, Ts, Proc0 = #proc{out = undefined}, State = #state{processes = Procs}) ->
-   Proc = Proc0#proc{out = Ts},
-   State#state{processes = Procs#{Pid => Proc}}.
+handleIn(Pid, InTs, Proc0 = #proc{stack = [Current0 | Stack0], out = OutTs}, State = #state{procs = Procs}) ->
+	#call{wait = Wait, waitIncl = WaitIncl, waitCnt = WaitCount, waitCntIncl = WaitCountIncl} = Current0,
+	ThisWait = InTs - OutTs,
+	%% We increase the wait time for self first.
+	Current = Current0#call{wait = Wait + ThisWait, waitIncl = WaitIncl + ThisWait, waitCnt = WaitCount + 1, waitCntIncl = WaitCountIncl + 1},
+	%% And then for the parent calls to include wait time of subcalls.
+	Stack = [
+		Call#call{waitIncl = ParentWaitIncl + ThisWait, waitCntIncl = ParentWaitCount + 1}
+		|| Call = #call{waitIncl = ParentWaitIncl, waitCntIncl = ParentWaitCount} <- Stack0
+	],
+	Proc = Proc0#proc{stack = [Current | Stack], out = undefined},
+	State#state{procs = Procs#{Pid => Proc}}.
+
+handleOut(Pid, Ts, Proc0 = #proc{out = undefined}, State = #state{procs = Procs}) ->
+	Proc = Proc0#proc{out = Ts},
+	State#state{procs = Procs#{Pid => Proc}}.
 
 %% Update the profiling information we currently hold.
 updateMfas([], MFAs) ->
-   MFAs;
-updateMfas([Call = #call{mfa = MFA, incl = Incl, self = Self, wait = Wait, wait_incl = WaitIncl, wait_count = WaitCount, wait_count_incl = WaitCountIncl, count = Count, calls = SubCalls} | Tail], MFAs) ->
-   case MFAs of
-      #{MFA := AggCall0 = #call{incl = AggIncl, self = AggSelf, wait = AggWait, wait_incl = AggWaitIncl, wait_count = AggWaitCount, wait_count_incl = AggWaitCountIncl, count = AggCount, calls = AggSubCalls0}} ->
-         AggSubCalls = updateMfas(maps:values(SubCalls), AggSubCalls0),
-         AggCall = AggCall0#call{incl = Incl + AggIncl, self = Self + AggSelf,
-            wait = Wait + AggWait, wait_incl = WaitIncl + AggWaitIncl,
-            wait_count = WaitCount + AggWaitCount,
-            wait_count_incl = WaitCountIncl + AggWaitCountIncl,
-            count = Count + AggCount, calls = AggSubCalls
-         },
-         updateMfas(Tail, MFAs#{MFA => AggCall});
-      _ ->
-         updateMfas(Tail, MFAs#{MFA => Call})
-   end.
+	MFAs;
+updateMfas([Call = #call{mfa = MFA, incl = Incl, self = Self, wait = Wait, waitIncl = WaitIncl, waitCnt = WaitCount, waitCntIncl = WaitCountIncl, count = Count, calls = SubCalls} | Tail], MFAs) ->
+	case MFAs of
+		#{MFA := AggCall0 = #call{incl = AggIncl, self = AggSelf, wait = AggWait, waitIncl = AggWaitIncl, waitCnt = AggWaitCount, waitCntIncl = AggWaitCountIncl, count = AggCount, calls = AggSubCalls0}} ->
+			AggSubCalls = updateMfas(maps:values(SubCalls), AggSubCalls0),
+			AggCall = AggCall0#call{incl = Incl + AggIncl, self = Self + AggSelf,
+				wait = Wait + AggWait, waitIncl = WaitIncl + AggWaitIncl,
+				waitCnt = WaitCount + AggWaitCount,
+				waitCntIncl = WaitCountIncl + AggWaitCountIncl,
+				count = Count + AggCount, calls = AggSubCalls
+			},
+			updateMfas(Tail, MFAs#{MFA => AggCall});
+		_ ->
+			updateMfas(Tail, MFAs#{MFA => Call})
+	end.
 
 %% The callgrind format is documented at http://valgrind.org/docs/manual/cl-format.html
 %%
@@ -316,101 +301,109 @@ updateMfas([Call = #call{mfa = MFA, incl = Incl, self = Self, wait = Wait, wait_
 %%
 %% The option 'scope' can be used to enable per process tracking.
 
-writeHeader(#state{output_device = OutDevice, opts = #{running := true}}) ->
-   ok = file:write(OutDevice,
-      "# callgrind format\n"
-      "events: Total Active Wait WaitCount\n"
-      "event: Total : Total time in microseconds\n"
-      "event: Active : Active time in microseconds\n"
-      "event: Wait : Wait time in microseconds (scheduled out)\n"
-      "event: WaitCount : Number of times the process was scheduled out\n"
-      "\n");
-writeHeader(#state{output_device = OutDevice}) ->
-   ok = file:write(OutDevice,
-      "# callgrind format\n"
-      "events: Total\n"
-      "event: Total : Total time in microseconds\n"
-      "\n").
+writeHeader(#state{ioDevice = OutDevice, opts = #{running := true}}) ->
+	ok = file:write(OutDevice,
+		<<"# callgrind format\n"
+		"events: Total Active Wait WaitCount\n"
+		"event: Total : Total time in microseconds\n"
+		"event: Active : Active time in microseconds\n"
+		"event: Wait : Wait time in microseconds (scheduled out)\n"
+		"event: WaitCount : Number of times the process was scheduled out\n"
+		"\n">>);
+writeHeader(#state{ioDevice = OutDevice}) ->
+	ok = file:write(OutDevice,
+		<<"# callgrind format\n"
+		"events: Total\n"
+		"event: Total : Total time in microseconds\n"
+		"\n">>).
+
+flush(State = #state{procs = Procs}) ->
+	maps:fold(
+		fun(Pid, #proc{mfas = MFAs}, _) ->
+			writeMfas(Pid, MFAs, State)
+		end,
+		undefined, Procs),
+	ok.
 
 writeMfas(Pid, MFAs, State) ->
-   _ = [writeCall(Pid, Call, State) || Call <- maps:values(MFAs)],
-   ok.
-
-writeCall(Pid, #call{mfa = MFA, source = {Source, LN}, self = Self, wait = Wait, wait_count = WaitCount, calls = Calls0}, #state{output_device = OutDevice, opts = Opts}) ->
-   Calls = maps:values(Calls0),
-   Ob =
-      case Opts of
-         #{scope := per_process} ->
-            ["ob=", io_lib:write(Pid), "\n"];
-         _ ->
-            []
-      end,
-   RunningCosts =
-      case Opts of
-         #{running := true} ->
-            [
-               " ", integer_to_list(Self - Wait),
-               " ", integer_to_list(Wait),
-               " ", integer_to_list(WaitCount)
-            ];
-         _ ->
-            []
-      end,
-   file:write(OutDevice,
-      [
-         Ob,
-         "fl=", Source, "\n"
-      "fn=", atom_to_list(MFA), "\n",
-         integer_to_list(LN), " ", integer_to_list(Self), RunningCosts, "\n",
-         formatSubcalls(LN, Calls, Opts),
-         "\n"
-      ]).
+	_ = [writeCall(Pid, Call, State) || Call <- maps:values(MFAs)],
+	ok.
+
+writeCall(Pid, #call{mfa = MFA, source = {Source, LN}, self = Self, wait = Wait, waitCnt = WaitCount, calls = Calls0}, #state{ioDevice = OutDevice, opts = Opts}) ->
+	Calls = maps:values(Calls0),
+	Ob =
+		case Opts of
+			#{scope := per_process} ->
+				["ob=", io_lib:write(Pid), "\n"];
+			_ ->
+				[]
+		end,
+	RunningCosts =
+		case Opts of
+			#{running := true} ->
+				[
+					" ", integer_to_list(Self - Wait),
+					" ", integer_to_list(Wait),
+					" ", integer_to_list(WaitCount)
+				];
+			_ ->
+				[]
+		end,
+	file:write(OutDevice,
+		[
+			Ob,
+			"fl=", Source, "\n"
+		   "fn=", atom_to_list(MFA), "\n",
+			integer_to_list(LN), " ", integer_to_list(Self), RunningCosts, "\n",
+			formatSubcalls(LN, Calls, Opts),
+			"\n"
+		]).
 
 formatSubcalls(_, [], _) ->
-   [];
+	[];
 %% @todo We don't need to write the filename for functions in the same module.
 %% @todo We also don't want to put the full file name; instead we should remove
 %% the prefix (path to the release).
 %%
 %% We only look at where the function is defined, we can't really get
 %% the actual line number where the call happened, unfortunately.
-formatSubcalls(LN, [#call{mfa = MFA, source = {Source, TargetLN}, incl = Incl, wait_incl = Wait, wait_count_incl = WaitCount, count = Count, calls = _Calls} | Tail], Opts) ->
-   RunningCosts =
-      case Opts of
-         #{running := true} ->
-            [
-               " ", integer_to_list(Incl - Wait),
-               " ", integer_to_list(Wait),
-               " ", integer_to_list(WaitCount)
-            ];
-         _ ->
-            []
-      end,
-   [
-      [
-         "cfi=", Source, "\n"
-      "cfn=", atom_to_list(MFA), "\n"
-      "calls=", integer_to_list(Count), " ", integer_to_list(TargetLN), "\n",
-         integer_to_list(LN), " ", integer_to_list(Incl), RunningCosts, "\n"
-      ] | formatSubcalls(LN, Tail, Opts)
-   ].
+formatSubcalls(LN, [#call{mfa = MFA, source = {Source, TargetLN}, incl = Incl, waitIncl = Wait, waitCntIncl = WaitCount, count = Count, calls = _Calls} | Tail], Opts) ->
+	RunningCosts =
+		case Opts of
+			#{running := true} ->
+				[
+					" ", integer_to_list(Incl - Wait),
+					" ", integer_to_list(Wait),
+					" ", integer_to_list(WaitCount)
+				];
+			_ ->
+				[]
+		end,
+	[
+		[
+			"cfi=", Source, "\n"
+		   "cfn=", atom_to_list(MFA), "\n"
+		   "calls=", integer_to_list(Count), " ", integer_to_list(TargetLN), "\n",
+			integer_to_list(LN), " ", integer_to_list(Incl), RunningCosts, "\n"
+		] | formatSubcalls(LN, Tail, Opts)
+	].
 
 convertMfa(undefined) ->
-   undefined;
-convertMfa({M0, F0, A0}) ->
-   M = atom_to_binary(M0, latin1),
-   F = atom_to_binary(F0, latin1),
-   A = integer_to_binary(A0),
-   binary_to_atom(<<M/binary, $:, F/binary, $/, A/binary>>, latin1).
-
-findSource(MFA, State0 = #state{sources = Cache}) ->
-   case Cache of
-      #{MFA := Source} ->
-         {Source, State0};
-      _ ->
-         State = #state{sources = #{MFA := Source}} = cacheModule(MFA, State0),
-         {Source, State}
-   end.
+	undefined;
+convertMfa({M, F, A}) ->
+	MBin = atom_to_binary(M, latin1),
+	FBin = atom_to_binary(F, latin1),
+	ABin = integer_to_binary(A),
+	binary_to_atom(<<MBin/binary, $:, FBin/binary, $/, ABin/binary>>, latin1).
+
+findSource(MFA, #state{sources = Cache} = State) ->
+	case Cache of
+		#{MFA := Source} ->
+			{Source, State};
+		_ ->
+			NewState = #state{sources = #{MFA := Source}} = cacheModule(MFA, State),
+			{Source, NewState}
+	end.
 
 %% We extract the line number of the functions by loading the
 %% beam file (which is already loaded when we reach this function)
@@ -426,41 +419,41 @@ findSource(MFA, State0 = #state{sources = Cache}) ->
 %%
 %% While this is an expensive operation, we cache the result
 %% and therefore this function will only be called once per module.
-cacheModule(MFA = {Module, _, _}, State0 = #state{sources = Cache}) ->
-   try
-      %% If the module is in the path, we can simply query
-      %% it for the source file.
-      Info = Module:module_info(compile),
-      %% @todo We don't want to return an absolute path,
-      %% but rather the app/src/file.erl path if it's in
-      %% an application, or just folder/file.erl if not.
-      %% This allows different users to point to the
-      %% same source at different locations on their machine.
-      {_, Src} = lists:keyfind(source, 1, Info),
-      cacheModule(MFA, State0, Src)
-   catch _:_ ->
-      %% Either the module was not found, or it doesn't
-      %% have a 'source' key in the compile info.
-      %%
-      %% We can't cache the module; on the other hand
-      %% we can cache the result of this operation.
-      %% Just append .erl to the module name and set the
-      %% line number to 1, which is of course incorrect.
-      State0#state{sources = Cache#{MFA => {atom_to_list(Module) ++ ".erl", 1}}}
-   end.
-
-cacheModule(MFA = {Module, _, _}, State = #state{sources = Cache0}, Src) ->
-   {Module, Beam, _} = code:get_object_code(Module),
-   {ok, {Module, Chunks}} = beam_lib:chunks(Beam, [abstract_code]),
-   [{abstract_code, {raw_abstract_v1, Forms}}] = Chunks,
-   Funcs = [{{Module, F, A}, LN} || {function, LN, F, A, _} <- Forms],
-   Cache1 = lists:foldl(fun({Key, LN}, Acc) -> Acc#{Key => {Src, LN}} end, Cache0, Funcs),
-   %% We cannot currently retrieve line number information
-   %% for list comprehensions and funs. We therefore
-   %% cache the correct file with line number set to 1.
-   Cache =
-      case Cache1 of
-         #{MFA := _} -> Cache1;
-         _ -> Cache1#{MFA => {Src, 1}}
-      end,
-   State#state{sources = Cache}.
+cacheModule({Module, _, _} = MFA, #state{sources = Cache} = State) ->
+	try
+		%% If the module is in the path, we can simply query
+		%% it for the source file.
+		Info = Module:module_info(compile),
+		%% @todo We don't want to return an absolute path,
+		%% but rather the app/src/file.erl path if it's in
+		%% an application, or just folder/file.erl if not.
+		%% This allows different users to point to the
+		%% same source at different locations on their machine.
+		{_, Src} = lists:keyfind(source, 1, Info),
+		cacheModule(MFA, State, Src)
+	catch _:_ ->
+		%% Either the module was not found, or it doesn't
+		%% have a 'source' key in the compile info.
+		%%
+		%% We can't cache the module; on the other hand
+		%% we can cache the result of this operation.
+		%% Just append .erl to the module name and set the
+		%% line number to 1, which is of course incorrect.
+		State#state{sources = Cache#{MFA => {atom_to_list(Module) ++ ".erl", 1}}}
+	end.
+
+cacheModule({Module, _, _} = MFA, #state{sources = Cache} = State, Src) ->
+	{Module, Beam, _} = code:get_object_code(Module),
+	{ok, {Module, Chunks}} = beam_lib:chunks(Beam, [abstract_code]),
+	[{abstract_code, {raw_abstract_v1, Forms}}] = Chunks,
+	Funcs = [{{Module, F, A}, LN} || {function, LN, F, A, _} <- Forms],
+	NewCache = lists:foldl(fun({Key, LN}, Acc) -> Acc#{Key => {Src, LN}} end, Cache, Funcs),
+	%% We cannot currently retrieve line number information
+	%% for list comprehensions and funs. We therefore
+	%% cache the correct file with line number set to 1.
+	LastCache =
+		case NewCache of
+			#{MFA := _} -> NewCache;
+			_ -> NewCache#{MFA => {Src, 1}}
+		end,
+	State#state{sources = LastCache}.
diff --git a/src/eTpf.erl b/src/eTpf.erl
index feb72ee..c12d266 100644
--- a/src/eTpf.erl
+++ b/src/eTpf.erl
@@ -60,8 +60,9 @@ stop() ->
 stop(TracerId) ->
 	supervisor:terminate_child(eTpf_sup, TracerId).
 
+-spec pts() -> pattern().
 pts() ->
-	ok.
+	[{app, kernel}, {app, stdlib}, {app, looking_glass}].
 
 ptl() ->
 	ok.
diff --git a/src/flame/tpFlame.erl b/src/flame/tpFlame.erl
index 3d5ffcf..4f238e1 100644
--- a/src/flame/tpFlame.erl
+++ b/src/flame/tpFlame.erl
@@ -6,7 +6,7 @@
 ]).
 
 -record(state, {
-   outputPath = "",
+   outputFile = "",
    pid,
    lastTs,
    count = 0,
@@ -14,14 +14,14 @@
 }).
 
 -spec pfs(file:filename_all(), file:filename_all()) -> ok.
-pfs(InputFile, OutputPath) ->
-   {ok, FinalState} = tpFReader:fold(fun handleEvent/2, #state{outputPath = OutputPath}, InputFile),
+pfs(InputFile, OutputFile) ->
+   {ok, FinalState} = tpFReader:fold(fun handleEvent/2, #state{outputFile = OutputFile}, InputFile),
    flush(FinalState).
 
 -spec pfm(file:filename(), filelib:dirname(), file:filename()) -> ok.
-pfm(InputFiles, Cwd, OutputPath) ->
+pfm(InputFiles, Cwd, OutputFile) ->
    PfFiles = lists:sort(filelib:wildcard(InputFiles, Cwd)),
-   doPfm(PfFiles, #state{outputPath = OutputPath}).
+   doPfm(PfFiles, #state{outputFile = OutputFile}).
 
 doPfm([], State) ->
    flush(State);
@@ -36,12 +36,6 @@ handleEvent(Trace, State) ->
    setPidState(Pid, NewPidState),
    State.
 
-%% in & out, without call context, don't help us
-doExpInner({InOut, _Pid, _TS, _MFA}, #state{lastTs = undefined} = PS) when InOut == in; InOut == out ->
-   PS;
-%% return_from and return_to, without call context, don't help us
-doExpInner({Return, _Pid, _TS, _MFA}, #state{lastTs = undefined} = PS) when Return == return_from; Return == return_to ->
-   PS;
 doExpInner({call, Pid, TS, MFA, BIN}, #state{lastTs = LastTS, acc = Acc, count = Count} = PS) ->
    try
       %% Calculate time elapsed, TS-LastTs. 计算经过的时间，TS-LastTs。
@@ -68,7 +62,7 @@ doExpInner({call, Pid, TS, MFA, BIN}, #state{lastTs = LastTS, acc = Acc, count =
       io:format(user, "~p: ~p:~p @ ~p\n", [?LINE, Class, Reason, StackTrace]),
       PS
    end;
-doExpInner({return_to, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) ->
+doExpInner({return_to, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) when LastTS =/= undefined ->
    try
       %% Calculate time elapsed, TS-LastTs.
       %% 1. Credit elapsed time to the stack on the top of Acc.
@@ -112,7 +106,7 @@ doExpInner({gc_end, _Pid, TS, _Info}, #state{lastTs = LastTS, acc = Acc} = PS) -
       io:format(user, "~p: ~p:~p @ ~p\n", [?LINE, Class, Reason, StackTrace]),
       PS
    end;
-doExpInner({out, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) ->
+doExpInner({out, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) when LastTS =/= undefined ->
    try
       %% Push a 0 usec item onto Acc.
       %% The MFA reported here probably doesn't appear in the stacktrace
@@ -127,7 +121,7 @@ doExpInner({out, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) ->
       io:format(user, "~p: ~p:~p @ ~p\n", [?LINE, Class, Reason, StackTrace]),
       PS
    end;
-doExpInner({in, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) ->
+doExpInner({in, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) when LastTS =/= undefined ->
    try
       %% Push the Sleep time onto Acc, then push 0 usec item from last
       %% exec stack onto Acc.
@@ -210,10 +204,10 @@ mfaf(I) ->
    [_, C | _] = string:tokens(I, "()+"),
    string:strip(C).
 
-flush(#state{outputPath = OutputPath}) ->
+flush(#state{outputFile = OutputFile}) ->
    PidStates = get(),
-   {ok, FH} = file:open(OutputPath, [write, raw, binary, delayed_write]),
-   io:format("\n\nWriting to ~s for ~w processes...", [OutputPath, length(PidStates)]),
+   {ok, FH} = file:open(OutputFile, [write, raw, binary, delayed_write]),
+   io:format("\n\nWriting to ~s for ~w processes...", [OutputFile, length(PidStates)]),
    [
       [
          begin