ft: 部分优化调整

4 年前 · 8467b00eaf
--- a/src/callgrind/tpCallGrind.erl
+++ b/src/callgrind/tpCallGrind.erl
@ -1,160 +1,145 @@
 -module(tpCallGrind).
 -module(tpCallGrind).      %% 调用堆栈和时间分析

 -export([pts/0]).
 -export([pfs/2]).
 -export([pfs/3]).
 -export([pfm/3]).
 -export([pfm/4]).
 -export([
 	pfs/2
 	, pfs/3
 	, pfm/3
 	, pfm/4
 ]).

 -type opts() :: #{
   scope => global | per_process,                        %% Whether we filter the output per process.
   running => boolean()                                  %% Whether we compute and save wait times.
 scope => global | per_process,                        %% Whether we filter the output per process.
 running => boolean()                                  %% Whether we compute and save wait times.
 }.

 -record(call, {
   mfa :: atom(),                                     %% The MFA for the call.
   source :: {string(), pos_integer()},               %% The source file name.
   ts :: pos_integer(),                               %% The timestamp for the call.
   self_ts :: pos_integer(),                          %% The timestamp for when we last started executing this function.
   incl :: undefined | non_neg_integer(),             %% Execution time including subcalls.
   self = 0 :: integer(),                             %% Execution time excluding subcalls.
   count = 1 :: pos_integer(),                        %% Number of times the function was called.
   wait = 0 :: non_neg_integer(),                     %% Time when the process was not running in this function.
   wait_incl = 0 :: non_neg_integer(),                %% Time when the process was not running in this function or any subcalls.
   wait_count = 0 :: non_neg_integer(),               %% Number of times the process was scheduled out.
   wait_count_incl = 0 :: non_neg_integer(),          %% Number of times the function or any subcall was scheduled out.
   calls = #{} :: #{atom() => #call{}}                %% Calls done by this MFA.
 	mfa :: atom(),                                     %% The MFA for the call.
 	source :: {string(), pos_integer()},               %% The source file name.
 	ts :: pos_integer(),                               %% The timestamp for the call.
 	self_ts :: pos_integer(),                          %% The timestamp for when we last started executing this function.
 	incl :: undefined | non_neg_integer(),             %% Execution time including subcalls.
 	self = 0 :: integer(),                             %% Execution time excluding subcalls.
 	count = 1 :: pos_integer(),                        %% Number of times the function was called.
 	wait = 0 :: non_neg_integer(),                     %% Time when the process was not running in this function.
 	waitIncl = 0 :: non_neg_integer(),                 %% Time when the process was not running in this function or any subcalls.
 	waitCnt = 0 :: non_neg_integer(),                  %% Number of times the process was scheduled out.
 	waitCntIncl = 0 :: non_neg_integer(),              %% Number of times the function or any subcall was scheduled out.
 	calls = #{} :: #{atom() => #call{}}                %% Calls done by this MFA.
 }).

 -record(proc, {
   stack = [] :: [#call{}],                           %% Call stack.
   mfas = #{} :: #{atom() => #call{}},                %% Profile information waiting to be written to file.
   out = undefined :: undefined | non_neg_integer()   %% Timestamp the process got scheduled out.
 	stack = [] :: [#call{}],                           %% Call stack.
 	mfas = #{} :: #{atom() => #call{}},                %% Profile information waiting to be written to file.
 	out = undefined :: undefined | non_neg_integer()   %% Timestamp the process got scheduled out.
 }).

 -record(state, {
   input :: file:filename_all(),                      %% Input file name.
   output :: file:filename_all(),                     %% Output file name.
   output_device :: file:io_device(),                 %% Output fd.
   opts :: opts(),                                    %% Options.
   processes = #{} :: #{pid() => #proc{}},            %% List of processes.
   sources = #{} :: #{mfa() => {string(), pos_integer()}}    %% Cache of source file information.
 	input :: file:filename_all(),                      %% Input file name.
 	outputFile :: file:filename_all(),                     %% Output file name.
 	ioDevice :: file:io_device(),                      %% Output fd.
 	opts :: opts(),                                    %% Options.
 	procs = #{} :: #{pid() => #proc{}},                 %% List of processes.
 	sources = #{} :: #{mfa() => {string(), pos_integer()}}    %% Cache of source file information.
 }).

 -spec pts() -> eTpf:input().
 pts() ->
   [{app, kernel}, {app, stdlib}, {app, looking_glass}].

 -spec pfs(file:filename_all(), file:filename_all()) -> ok.
 pfs(InputFile, Output) ->
   pfs(InputFile, Output, #{}).
 pfs(InputFile, OutputFile) ->
 	pfs(InputFile, OutputFile, #{}).

 -spec pfs(file:filename_all(), file:filename_all(), opts()) -> ok.
 pfs(InputFile, Output, Opts) ->
   {ok, OutDevice} = file:open(Output, [write]),
   State = #state{input = InputFile, output = Output, output_device = OutDevice, opts = Opts},
   writeHeader(State),
   {ok, FinalState} = tpFReader:fold(fun handleEvent/2, State, InputFile),
   flush(FinalState),
   _ = file:close(OutDevice),
   ok.

 pfs(InputFile, OutputFile, Opts) ->
 	{ok, IoDevice} = file:open(OutputFile, [write]),
 	State = #state{input = InputFile, outputFile = OutputFile, ioDevice = IoDevice, opts = Opts},
 	writeHeader(State),
 	{ok, FinalState} = tpFReader:fold(fun handleEvent/2, State, InputFile),
 	flush(FinalState),
 	_ = file:close(IoDevice),
 	ok.

 -spec pfm(file:filename(), filelib:dirname(), file:filename()) -> ok.
 pfm(Wildcard, Cwd, Prefix) ->
   pfm(Wildcard, Cwd, Prefix, #{}).
 	pfm(Wildcard, Cwd, Prefix, #{}).

 -spec pfm(file:filename(), filelib:dirname(), file:filename(), opts()) -> ok.
 pfm(Wildcard, Cwd, Prefix, Opts) ->
   Files = lists:sort(filelib:wildcard(Wildcard, Cwd)),
   Seq = lists:seq(1, length(Files)),
   OutFiles = [Prefix ++ "." ++ integer_to_list(N) || N <- Seq],
   Many = lists:zip(Files, OutFiles),
   Refs = [monitor(process, spawn_link(?MODULE, pfs, [Input, Output, Opts])) || {Input, Output} <- Many],
   waitForProcs(Refs).

 flush(State = #state{processes = Procs}) ->
   maps:fold(
      fun(Pid, #proc{mfas = MFAs}, _) ->
         writeMfas(Pid, MFAs, State)
      end,
      undefined, Procs),
   ok.

 %% We do not need to worry about failure because we are linked.
 waitForProcs([]) ->
   ok;
 waitForProcs(Refs) ->
   receive
   %% We purposefully ignore any stray messages.
      {'DOWN', R, process, _, _} ->
         waitForProcs(Refs -- [R])
   end.
 pfm(InputFiles, Cwd, OutputPrefix, Opts) ->
 	PfFiles = lists:sort(filelib:wildcard(InputFiles, Cwd)),
 	SeqList = lists:seq(1, length(PfFiles)),
 	OutputFiles = [OutputPrefix ++ "." ++ integer_to_list(Index) || Index <- SeqList],
 	ManyFiles = lists:zip(PfFiles, OutputFiles),
 	Refs = [monitor(process, spawn_link(?MODULE, pfs, [InputFile, OutputFile, Opts])) || {InputFile, OutputFile} <- ManyFiles],
 	waitForProc(Refs).

 waitForProc([]) ->
 	ok;
 waitForProc(Refs) ->
 	receive
 		{'DOWN', R, process, _, _} ->
 			waitForProc(Refs -- [R])
 	end.

 %% We handle trace events one by one, keeping track of the execution stack for each process.
 %% 我们一一处理跟踪事件，跟踪每个进程的执行堆栈。

 %% 我们不关心匹配规范结果 for callgrind.
 handleEvent({call, Pid, Ts, MFA, _MSpec}, State) ->
   handleEvent({call, Pid, Ts, MFA}, State);
 handleEvent({call, Pid, Ts, MFA}, State0) ->
   Proc =
      case isProcProfiled(Pid, State0) of
         {true, P} ->
            P;
         {empty, P} ->
            P;
         false ->
            #proc{}
      end,
   {Source, State} = findSource(MFA, State0),
   handleCall(Pid, convertMfa(MFA), Source, Ts, Proc, State);
 	handleEvent({call, Pid, Ts, MFA}, State);
 handleEvent({call, Pid, Ts, MFA}, State) ->
 	case isProcProfiled(Pid, State) of
 		{true, P} ->
 			Proc = P;
 		{empty, P} ->
 			Proc = P;
 		false ->
 			Proc = #proc{}
 	end,
 	{Source, NewState} = findSource(MFA, State),
 	handleCall(Pid, convertMfa(MFA), Source, Ts, Proc, NewState);
 handleEvent({return_to, Pid, Ts, MFA}, State) ->
   case isProcProfiled(Pid, State) of
      {true, Proc} ->
         handleReturnTo(Pid, convertMfa(MFA), Ts, Proc, State);
      _ ->
         State
   end;
 	case isProcProfiled(Pid, State) of
 		{true, Proc} ->
 			handleReturnTo(Pid, convertMfa(MFA), Ts, Proc, State);
 		_ ->
 			State
 	end;
 %% Process exited. Unfold the stacktrace entirely.
 %%
 %% We use the atom exit because we know it will not match
 %% a function call and will therefore unfold everything.
 handleEvent({exit, Pid, Ts, _Reason}, State0) ->
   case isProcProfiled(Pid, State0) of
      {true, Proc} ->
         State = #state{processes = Procs} = handleReturnTo(Pid, exit, Ts, Proc, State0),
         %% Remove the pid from the state to save memory.
         State#state{processes = maps:remove(Pid, Procs)};
      _ ->
         State0
   end;
 	case isProcProfiled(Pid, State0) of
 		{true, Proc} ->
 			State = #state{procs = Procs} = handleReturnTo(Pid, exit, Ts, Proc, State0),
 			%% Remove the pid from the state to save memory.
 			State#state{procs = maps:remove(Pid, Procs)};
 		_ ->
 			State0
 	end;
 handleEvent({in, Pid, Ts, _MFA}, State = #state{opts = #{running := true}}) ->
   case isProcProfiled(Pid, State) of
      {true, Proc} -> handleIn(Pid, Ts, Proc, State);
      _ -> State
   end;
 	case isProcProfiled(Pid, State) of
 		{true, Proc} -> handleIn(Pid, Ts, Proc, State);
 		_ -> State
 	end;
 handleEvent({out, Pid, Ts, _MFA}, State = #state{opts = #{running := true}}) ->
   case isProcProfiled(Pid, State) of
      {true, Proc} -> handleOut(Pid, Ts, Proc, State);
      _ -> State
   end;
 	case isProcProfiled(Pid, State) of
 		{true, Proc} -> handleOut(Pid, Ts, Proc, State);
 		_ -> State
 	end;
 %% Ignore all other events. We do not need them for building the callgrind file.
 handleEvent(_, State) ->
   State.

 isProcProfiled(Pid, #state{processes = Procs}) ->
   case maps:get(Pid, Procs, undefined) of
      %% We never received events for this process. Ignore.
      undefined ->
         false;
      %% We received events but are not in a known function currently. Ignore.
      #proc{stack = []} = Proc ->
         {empty, Proc};
      %% All good!
      Proc ->
         {true, Proc}
   end.
 	State.

 isProcProfiled(Pid, #state{procs = Procs}) ->
 	case maps:get(Pid, Procs, undefined) of
 		undefined ->
 			%% We never received events for this process. Ignore.
 			false;
 		#proc{stack = []} = Proc ->
 			%% We received events but are not in a known function currently. Ignore.
 			{empty, Proc};
 		Proc ->
 			%% All good!
 			{true, Proc}
 	end.

 %% We track a number of different things:
 %% - how much time was spent in the different function calls
@ -175,15 +160,15 @@ isProcProfiled(Pid, #state{processes = Procs}) ->
 %% compared to an imperative language.

 %% Recursive call. Just increase the call count.
 handleCall(Pid, MFA, _Source, _Ts, Proc0 = #proc{stack = [Call = #call{mfa = MFA, count = Count} | Stack0]}, State = #state{processes = Procs}) ->
   Stack = [Call#call{count = Count + 1} | Stack0],
   Proc = Proc0#proc{stack = Stack},
   State#state{processes = Procs#{Pid => Proc}};
 handleCall(Pid, MFA, _Source, _Ts, #proc{stack = [Call = #call{mfa = MFA, count = Count} | Stack0]} = Proc, #state{procs = Procs} = State) ->
 	Stack = [Call#call{count = Count + 1} | Stack0],
 	NewProc = Proc#proc{stack = Stack},
 	State#state{procs = Procs#{Pid => NewProc}};
 %% Non-recursive call.
 handleCall(Pid, MFA, Source, Ts, Proc0 = #proc{stack = Stack0}, State = #state{processes = Procs}) ->
   Stack = [#call{mfa = MFA, source = Source, ts = Ts, self_ts = Ts} | Stack0],
   Proc = Proc0#proc{stack = Stack},
   State#state{processes = Procs#{Pid => Proc}}.
 handleCall(Pid, MFA, Source, Ts, #proc{stack = Stack0} = Proc0, #state{procs = Procs} = State) ->
 	Stack = [#call{mfa = MFA, source = Source, ts = Ts, self_ts = Ts} | Stack0],
 	Proc = Proc0#proc{stack = Stack},
 	State#state{procs = Procs#{Pid => Proc}}.

 %% We return from the current call, so the current call
 %% ends regardless of what it was doing. We stop as soon
@ -216,98 +201,98 @@ handleCall(Pid, MFA, Source, Ts, Proc0 = #proc{stack = Stack0}, State = #state{p
 %% and the return time for the end. Here again we also
 %% update the sub call times.

 handleReturnTo(Pid, MFA, Ts, Proc0 = #proc{stack = [Current0 | Stack0], mfas = MFAs0}, State = #state{processes = Procs}) ->
   {Returned0, Stack1} = lists:splitwith(fun(#call{mfa = E}) -> E =/= MFA end, Stack0),
   #call{ts = CurrentTs, self_ts = CurrentSelfTs, self = CurrentSelf} = Current0,
   Current = Current0#call{incl = Ts - CurrentTs, self = CurrentSelf + Ts - CurrentSelfTs},
   Returned = updateTailCalls([Current | Returned0], Ts),
   Stack = updateStack(Returned, Stack1, Ts),
   %% Save the profile information in the state, potentially flushing it
   %% to disk if the stack is empty.
   MFAs1 = updateMfas(Returned, MFAs0),
   MFAs =
      case Stack of
         [] ->
            writeMfas(Pid, MFAs1, State),
            #{};
         _ ->
            MFAs1
      end,
   Proc = Proc0#proc{stack = Stack, mfas = MFAs},
   State#state{processes = Procs#{Pid => Proc}}.
 handleReturnTo(Pid, MFA, Ts, Proc0 = #proc{stack = [Current0 | Stack0], mfas = MFAs0}, State = #state{procs = Procs}) ->
 	{Returned0, Stack1} = lists:splitwith(fun(#call{mfa = E}) -> E =/= MFA end, Stack0),
 	#call{ts = CurrentTs, self_ts = CurrentSelfTs, self = CurrentSelf} = Current0,
 	Current = Current0#call{incl = Ts - CurrentTs, self = CurrentSelf + Ts - CurrentSelfTs},
 	Returned = updateTailCalls([Current | Returned0], Ts),
 	Stack = updateStack(Returned, Stack1, Ts),
 	%% Save the profile information in the state, potentially flushing it
 	%% to disk if the stack is empty.
 	MFAs1 = updateMfas(Returned, MFAs0),
 	MFAs =
 		case Stack of
 			[] ->
 				writeMfas(Pid, MFAs1, State),
 				#{};
 			_ ->
 				MFAs1
 		end,
 	Proc = Proc0#proc{stack = Stack, mfas = MFAs},
 	State#state{procs = Procs#{Pid => Proc}}.

 updateTailCalls([Call], _) ->
   [Call];
 	[Call];
 updateTailCalls([Callee = #call{ts = CalleeTs}, Caller0 = #call{ts = CallerTs, self_ts = CallerSelfTs, self = CallerSelf} | Tail], ReturnToTs) ->
   Caller1 = Caller0#call{incl = ReturnToTs - CallerTs, self = CallerSelf + CalleeTs - CallerSelfTs},
   Caller = updateSubCalls(Callee, Caller1),
   [Callee | updateTailCalls([Caller | Tail], ReturnToTs)].
 	Caller1 = Caller0#call{incl = ReturnToTs - CallerTs, self = CallerSelf + CalleeTs - CallerSelfTs},
 	Caller = updateSubCalls(Callee, Caller1),
 	[Callee | updateTailCalls([Caller | Tail], ReturnToTs)].

 %% Update nothing; there's nothing in the stack.
 updateStack(_, [], _) ->
   [];
 	[];
 %% Update the incl/self value based on the top-level function we return from,
 %% but only update the sub calls based on the function we directly called.
 updateStack(Returned, [Caller0 = #call{self_ts = CallerSelfTs, self = CallerSelf} | Stack], ReturnToTs) ->
   Callee = #call{ts = CalleeTs} = hd(lists:reverse(Returned)),
   Caller = Caller0#call{self_ts = ReturnToTs, self = CallerSelf + CalleeTs - CallerSelfTs},
   [updateSubCalls(Callee, Caller) | Stack].

 updateSubCalls(Callee = #call{mfa = MFA, incl = CallerIncl, count = CallerCount, wait_incl = CallerWaitIncl}, Caller = #call{calls = SubCalls}) ->
   case maps:get(MFA, SubCalls, undefined) of
      %% Add the callee to the subcalls but remove the callee's subcalls
      %% since we don't need those here.
      undefined ->
         Caller#call{calls = SubCalls#{MFA => Callee#call{calls = #{}}}};
      %% Same as above, except we add to the existing values.
      Sub = #call{incl = SubIncl, count = SubCount, wait_incl = SubWaitIncl} ->
         Caller#call{calls = SubCalls#{MFA => Sub#call{
            %% We do not care about self/wait here as we will be using incl/wait_incl in the output.
            incl = SubIncl + CallerIncl,
            count = SubCount + CallerCount,
            wait_incl = SubWaitIncl + CallerWaitIncl
         }}}
   end.
 	Callee = #call{ts = CalleeTs} = hd(lists:reverse(Returned)),
 	Caller = Caller0#call{self_ts = ReturnToTs, self = CallerSelf + CalleeTs - CallerSelfTs},
 	[updateSubCalls(Callee, Caller) | Stack].

 updateSubCalls(Callee = #call{mfa = MFA, incl = CallerIncl, count = CallerCount, waitIncl = CallerWaitIncl}, Caller = #call{calls = SubCalls}) ->
 	case maps:get(MFA, SubCalls, undefined) of
 		%% Add the callee to the subcalls but remove the callee's subcalls
 		%% since we don't need those here.
 		undefined ->
 			Caller#call{calls = SubCalls#{MFA => Callee#call{calls = #{}}}};
 		%% Same as above, except we add to the existing values.
 		Sub = #call{incl = SubIncl, count = SubCount, waitIncl = SubWaitIncl} ->
 			Caller#call{calls = SubCalls#{MFA => Sub#call{
 				%% We do not care about self/wait here as we will be using incl/wait_incl in the output.
 				incl = SubIncl + CallerIncl,
 				count = SubCount + CallerCount,
 				waitIncl = SubWaitIncl + CallerWaitIncl
 			}}}
 	end.

 %% Processes get scheduled in and out. We get the corresponding
 %% in and out events when the 'running' option is set to true.
 %% We keep track of how many times the process was scheduled out
 %% per function, and how long.

 handleIn(Pid, InTs, Proc0 = #proc{stack = [Current0 | Stack0], out = OutTs}, State = #state{processes = Procs}) ->
   #call{wait = Wait, wait_incl = WaitIncl, wait_count = WaitCount, wait_count_incl = WaitCountIncl} = Current0,
   ThisWait = InTs - OutTs,
   %% We increase the wait time for self first.
   Current = Current0#call{wait = Wait + ThisWait, wait_incl = WaitIncl + ThisWait, wait_count = WaitCount + 1, wait_count_incl = WaitCountIncl + 1},
   %% And then for the parent calls to include wait time of subcalls.
   Stack = [
      Call#call{wait_incl = ParentWaitIncl + ThisWait, wait_count_incl = ParentWaitCount + 1}
      || Call = #call{wait_incl = ParentWaitIncl, wait_count_incl = ParentWaitCount} <- Stack0
   ],
   Proc = Proc0#proc{stack = [Current | Stack], out = undefined},
   State#state{processes = Procs#{Pid => Proc}}.

 handleOut(Pid, Ts, Proc0 = #proc{out = undefined}, State = #state{processes = Procs}) ->
   Proc = Proc0#proc{out = Ts},
   State#state{processes = Procs#{Pid => Proc}}.
 handleIn(Pid, InTs, Proc0 = #proc{stack = [Current0 | Stack0], out = OutTs}, State = #state{procs = Procs}) ->
 	#call{wait = Wait, waitIncl = WaitIncl, waitCnt = WaitCount, waitCntIncl = WaitCountIncl} = Current0,
 	ThisWait = InTs - OutTs,
 	%% We increase the wait time for self first.
 	Current = Current0#call{wait = Wait + ThisWait, waitIncl = WaitIncl + ThisWait, waitCnt = WaitCount + 1, waitCntIncl = WaitCountIncl + 1},
 	%% And then for the parent calls to include wait time of subcalls.
 	Stack = [
 		Call#call{waitIncl = ParentWaitIncl + ThisWait, waitCntIncl = ParentWaitCount + 1}
 		|| Call = #call{waitIncl = ParentWaitIncl, waitCntIncl = ParentWaitCount} <- Stack0
 	],
 	Proc = Proc0#proc{stack = [Current | Stack], out = undefined},
 	State#state{procs = Procs#{Pid => Proc}}.

 handleOut(Pid, Ts, Proc0 = #proc{out = undefined}, State = #state{procs = Procs}) ->
 	Proc = Proc0#proc{out = Ts},
 	State#state{procs = Procs#{Pid => Proc}}.

 %% Update the profiling information we currently hold.
 updateMfas([], MFAs) ->
   MFAs;
 updateMfas([Call = #call{mfa = MFA, incl = Incl, self = Self, wait = Wait, wait_incl = WaitIncl, wait_count = WaitCount, wait_count_incl = WaitCountIncl, count = Count, calls = SubCalls} | Tail], MFAs) ->
   case MFAs of
      #{MFA := AggCall0 = #call{incl = AggIncl, self = AggSelf, wait = AggWait, wait_incl = AggWaitIncl, wait_count = AggWaitCount, wait_count_incl = AggWaitCountIncl, count = AggCount, calls = AggSubCalls0}} ->
         AggSubCalls = updateMfas(maps:values(SubCalls), AggSubCalls0),
         AggCall = AggCall0#call{incl = Incl + AggIncl, self = Self + AggSelf,
            wait = Wait + AggWait, wait_incl = WaitIncl + AggWaitIncl,
            wait_count = WaitCount + AggWaitCount,
            wait_count_incl = WaitCountIncl + AggWaitCountIncl,
            count = Count + AggCount, calls = AggSubCalls
         },
         updateMfas(Tail, MFAs#{MFA => AggCall});
      _ ->
         updateMfas(Tail, MFAs#{MFA => Call})
   end.
 	MFAs;
 updateMfas([Call = #call{mfa = MFA, incl = Incl, self = Self, wait = Wait, waitIncl = WaitIncl, waitCnt = WaitCount, waitCntIncl = WaitCountIncl, count = Count, calls = SubCalls} | Tail], MFAs) ->
 	case MFAs of
 		#{MFA := AggCall0 = #call{incl = AggIncl, self = AggSelf, wait = AggWait, waitIncl = AggWaitIncl, waitCnt = AggWaitCount, waitCntIncl = AggWaitCountIncl, count = AggCount, calls = AggSubCalls0}} ->
 			AggSubCalls = updateMfas(maps:values(SubCalls), AggSubCalls0),
 			AggCall = AggCall0#call{incl = Incl + AggIncl, self = Self + AggSelf,
 				wait = Wait + AggWait, waitIncl = WaitIncl + AggWaitIncl,
 				waitCnt = WaitCount + AggWaitCount,
 				waitCntIncl = WaitCountIncl + AggWaitCountIncl,
 				count = Count + AggCount, calls = AggSubCalls
 			},
 			updateMfas(Tail, MFAs#{MFA => AggCall});
 		_ ->
 			updateMfas(Tail, MFAs#{MFA => Call})
 	end.

 %% The callgrind format is documented at http://valgrind.org/docs/manual/cl-format.html
 %%
@ -316,101 +301,109 @@ updateMfas([Call = #call{mfa = MFA, incl = Incl, self = Self, wait = Wait, wait_
 %%
 %% The option 'scope' can be used to enable per process tracking.

 writeHeader(#state{output_device = OutDevice, opts = #{running := true}}) ->
   ok = file:write(OutDevice,
      "# callgrind format\n"
      "events: Total Active Wait WaitCount\n"
      "event: Total : Total time in microseconds\n"
      "event: Active : Active time in microseconds\n"
      "event: Wait : Wait time in microseconds (scheduled out)\n"
      "event: WaitCount : Number of times the process was scheduled out\n"
      "\n");
 writeHeader(#state{output_device = OutDevice}) ->
   ok = file:write(OutDevice,
      "# callgrind format\n"
      "events: Total\n"
      "event: Total : Total time in microseconds\n"
      "\n").
 writeHeader(#state{ioDevice = OutDevice, opts = #{running := true}}) ->
 	ok = file:write(OutDevice,
 		<<"# callgrind format\n"
 		"events: Total Active Wait WaitCount\n"
 		"event: Total : Total time in microseconds\n"
 		"event: Active : Active time in microseconds\n"
 		"event: Wait : Wait time in microseconds (scheduled out)\n"
 		"event: WaitCount : Number of times the process was scheduled out\n"
 		"\n">>);
 writeHeader(#state{ioDevice = OutDevice}) ->
 	ok = file:write(OutDevice,
 		<<"# callgrind format\n"
 		"events: Total\n"
 		"event: Total : Total time in microseconds\n"
 		"\n">>).

 flush(State = #state{procs = Procs}) ->
 	maps:fold(
 		fun(Pid, #proc{mfas = MFAs}, _) ->
 			writeMfas(Pid, MFAs, State)
 		end,
 		undefined, Procs),
 	ok.

 writeMfas(Pid, MFAs, State) ->
   _ = [writeCall(Pid, Call, State) || Call <- maps:values(MFAs)],
   ok.

 writeCall(Pid, #call{mfa = MFA, source = {Source, LN}, self = Self, wait = Wait, wait_count = WaitCount, calls = Calls0}, #state{output_device = OutDevice, opts = Opts}) ->
   Calls = maps:values(Calls0),
   Ob =
      case Opts of
         #{scope := per_process} ->
            ["ob=", io_lib:write(Pid), "\n"];
         _ ->
            []
      end,
   RunningCosts =
      case Opts of
         #{running := true} ->
            [
               " ", integer_to_list(Self - Wait),
               " ", integer_to_list(Wait),
               " ", integer_to_list(WaitCount)
            ];
         _ ->
            []
      end,
   file:write(OutDevice,
      [
         Ob,
         "fl=", Source, "\n"
      "fn=", atom_to_list(MFA), "\n",
         integer_to_list(LN), " ", integer_to_list(Self), RunningCosts, "\n",
         formatSubcalls(LN, Calls, Opts),
         "\n"
      ]).
 	_ = [writeCall(Pid, Call, State) || Call <- maps:values(MFAs)],
 	ok.

 writeCall(Pid, #call{mfa = MFA, source = {Source, LN}, self = Self, wait = Wait, waitCnt = WaitCount, calls = Calls0}, #state{ioDevice = OutDevice, opts = Opts}) ->
 	Calls = maps:values(Calls0),
 	Ob =
 		case Opts of
 			#{scope := per_process} ->
 				["ob=", io_lib:write(Pid), "\n"];
 			_ ->
 				[]
 		end,
 	RunningCosts =
 		case Opts of
 			#{running := true} ->
 				[
 					" ", integer_to_list(Self - Wait),
 					" ", integer_to_list(Wait),
 					" ", integer_to_list(WaitCount)
 				];
 			_ ->
 				[]
 		end,
 	file:write(OutDevice,
 		[
 			Ob,
 			"fl=", Source, "\n"
 		   "fn=", atom_to_list(MFA), "\n",
 			integer_to_list(LN), " ", integer_to_list(Self), RunningCosts, "\n",
 			formatSubcalls(LN, Calls, Opts),
 			"\n"
 		]).

 formatSubcalls(_, [], _) ->
   [];
 	[];
 %% @todo We don't need to write the filename for functions in the same module.
 %% @todo We also don't want to put the full file name; instead we should remove
 %% the prefix (path to the release).
 %%
 %% We only look at where the function is defined, we can't really get
 %% the actual line number where the call happened, unfortunately.
 formatSubcalls(LN, [#call{mfa = MFA, source = {Source, TargetLN}, incl = Incl, wait_incl = Wait, wait_count_incl = WaitCount, count = Count, calls = _Calls} | Tail], Opts) ->
   RunningCosts =
      case Opts of
         #{running := true} ->
            [
               " ", integer_to_list(Incl - Wait),
               " ", integer_to_list(Wait),
               " ", integer_to_list(WaitCount)
            ];
         _ ->
            []
      end,
   [
      [
         "cfi=", Source, "\n"
      "cfn=", atom_to_list(MFA), "\n"
      "calls=", integer_to_list(Count), " ", integer_to_list(TargetLN), "\n",
         integer_to_list(LN), " ", integer_to_list(Incl), RunningCosts, "\n"
      ] | formatSubcalls(LN, Tail, Opts)
   ].
 formatSubcalls(LN, [#call{mfa = MFA, source = {Source, TargetLN}, incl = Incl, waitIncl = Wait, waitCntIncl = WaitCount, count = Count, calls = _Calls} | Tail], Opts) ->
 	RunningCosts =
 		case Opts of
 			#{running := true} ->
 				[
 					" ", integer_to_list(Incl - Wait),
 					" ", integer_to_list(Wait),
 					" ", integer_to_list(WaitCount)
 				];
 			_ ->
 				[]
 		end,
 	[
 		[
 			"cfi=", Source, "\n"
 		   "cfn=", atom_to_list(MFA), "\n"
 		   "calls=", integer_to_list(Count), " ", integer_to_list(TargetLN), "\n",
 			integer_to_list(LN), " ", integer_to_list(Incl), RunningCosts, "\n"
 		] | formatSubcalls(LN, Tail, Opts)
 	].

 convertMfa(undefined) ->
   undefined;
 convertMfa({M0, F0, A0}) ->
   M = atom_to_binary(M0, latin1),
   F = atom_to_binary(F0, latin1),
   A = integer_to_binary(A0),
   binary_to_atom(<<M/binary, $:, F/binary, $/, A/binary>>, latin1).

 findSource(MFA, State0 = #state{sources = Cache}) ->
   case Cache of
      #{MFA := Source} ->
         {Source, State0};
      _ ->
         State = #state{sources = #{MFA := Source}} = cacheModule(MFA, State0),
         {Source, State}
   end.
 	undefined;
 convertMfa({M, F, A}) ->
 	MBin = atom_to_binary(M, latin1),
 	FBin = atom_to_binary(F, latin1),
 	ABin = integer_to_binary(A),
 	binary_to_atom(<<MBin/binary, $:, FBin/binary, $/, ABin/binary>>, latin1).

 findSource(MFA, #state{sources = Cache} = State) ->
 	case Cache of
 		#{MFA := Source} ->
 			{Source, State};
 		_ ->
 			NewState = #state{sources = #{MFA := Source}} = cacheModule(MFA, State),
 			{Source, NewState}
 	end.

 %% We extract the line number of the functions by loading the
 %% beam file (which is already loaded when we reach this function)
@ -426,41 +419,41 @@ findSource(MFA, State0 = #state{sources = Cache}) ->
 %%
 %% While this is an expensive operation, we cache the result
 %% and therefore this function will only be called once per module.
 cacheModule(MFA = {Module, _, _}, State0 = #state{sources = Cache}) ->
   try
      %% If the module is in the path, we can simply query
      %% it for the source file.
      Info = Module:module_info(compile),
      %% @todo We don't want to return an absolute path,
      %% but rather the app/src/file.erl path if it's in
      %% an application, or just folder/file.erl if not.
      %% This allows different users to point to the
      %% same source at different locations on their machine.
      {_, Src} = lists:keyfind(source, 1, Info),
      cacheModule(MFA, State0, Src)
   catch _:_ ->
      %% Either the module was not found, or it doesn't
      %% have a 'source' key in the compile info.
      %%
      %% We can't cache the module; on the other hand
      %% we can cache the result of this operation.
      %% Just append .erl to the module name and set the
      %% line number to 1, which is of course incorrect.
      State0#state{sources = Cache#{MFA => {atom_to_list(Module) ++ ".erl", 1}}}
   end.

 cacheModule(MFA = {Module, _, _}, State = #state{sources = Cache0}, Src) ->
   {Module, Beam, _} = code:get_object_code(Module),
   {ok, {Module, Chunks}} = beam_lib:chunks(Beam, [abstract_code]),
   [{abstract_code, {raw_abstract_v1, Forms}}] = Chunks,
   Funcs = [{{Module, F, A}, LN} || {function, LN, F, A, _} <- Forms],
   Cache1 = lists:foldl(fun({Key, LN}, Acc) -> Acc#{Key => {Src, LN}} end, Cache0, Funcs),
   %% We cannot currently retrieve line number information
   %% for list comprehensions and funs. We therefore
   %% cache the correct file with line number set to 1.
   Cache =
      case Cache1 of
         #{MFA := _} -> Cache1;
         _ -> Cache1#{MFA => {Src, 1}}
      end,
   State#state{sources = Cache}.
 cacheModule({Module, _, _} = MFA, #state{sources = Cache} = State) ->
 	try
 		%% If the module is in the path, we can simply query
 		%% it for the source file.
 		Info = Module:module_info(compile),
 		%% @todo We don't want to return an absolute path,
 		%% but rather the app/src/file.erl path if it's in
 		%% an application, or just folder/file.erl if not.
 		%% This allows different users to point to the
 		%% same source at different locations on their machine.
 		{_, Src} = lists:keyfind(source, 1, Info),
 		cacheModule(MFA, State, Src)
 	catch _:_ ->
 		%% Either the module was not found, or it doesn't
 		%% have a 'source' key in the compile info.
 		%%
 		%% We can't cache the module; on the other hand
 		%% we can cache the result of this operation.
 		%% Just append .erl to the module name and set the
 		%% line number to 1, which is of course incorrect.
 		State#state{sources = Cache#{MFA => {atom_to_list(Module) ++ ".erl", 1}}}
 	end.

 cacheModule({Module, _, _} = MFA, #state{sources = Cache} = State, Src) ->
 	{Module, Beam, _} = code:get_object_code(Module),
 	{ok, {Module, Chunks}} = beam_lib:chunks(Beam, [abstract_code]),
 	[{abstract_code, {raw_abstract_v1, Forms}}] = Chunks,
 	Funcs = [{{Module, F, A}, LN} || {function, LN, F, A, _} <- Forms],
 	NewCache = lists:foldl(fun({Key, LN}, Acc) -> Acc#{Key => {Src, LN}} end, Cache, Funcs),
 	%% We cannot currently retrieve line number information
 	%% for list comprehensions and funs. We therefore
 	%% cache the correct file with line number set to 1.
 	LastCache =
 		case NewCache of
 			#{MFA := _} -> NewCache;
 			_ -> NewCache#{MFA => {Src, 1}}
 		end,
 	State#state{sources = LastCache}.
--- a/src/eTpf.erl
+++ b/src/eTpf.erl
@ -60,8 +60,9 @@ stop() ->
 stop(TracerId) ->
 	supervisor:terminate_child(eTpf_sup, TracerId).

 -spec pts() -> pattern().
 pts() ->
 	ok.
 	[{app, kernel}, {app, stdlib}, {app, looking_glass}].

 ptl() ->
 	ok.
--- a/src/flame/tpFlame.erl
+++ b/src/flame/tpFlame.erl
@ -6,7 +6,7 @@
 ]).

 -record(state, {
   outputPath = "",
   outputFile = "",
   pid,
   lastTs,
   count = 0,
@ -14,14 +14,14 @@
 }).

 -spec pfs(file:filename_all(), file:filename_all()) -> ok.
 pfs(InputFile, OutputPath) ->
   {ok, FinalState} = tpFReader:fold(fun handleEvent/2, #state{outputPath = OutputPath}, InputFile),
 pfs(InputFile, OutputFile) ->
   {ok, FinalState} = tpFReader:fold(fun handleEvent/2, #state{outputFile = OutputFile}, InputFile),
   flush(FinalState).

 -spec pfm(file:filename(), filelib:dirname(), file:filename()) -> ok.
 pfm(InputFiles, Cwd, OutputPath) ->
 pfm(InputFiles, Cwd, OutputFile) ->
   PfFiles = lists:sort(filelib:wildcard(InputFiles, Cwd)),
   doPfm(PfFiles, #state{outputPath = OutputPath}).
   doPfm(PfFiles, #state{outputFile = OutputFile}).

 doPfm([], State) ->
   flush(State);
@ -36,12 +36,6 @@ handleEvent(Trace, State) ->
   setPidState(Pid, NewPidState),
   State.

 %% in & out, without call context, don't help us
 doExpInner({InOut, _Pid, _TS, _MFA}, #state{lastTs = undefined} = PS) when InOut == in; InOut == out ->
   PS;
 %% return_from and return_to, without call context, don't help us
 doExpInner({Return, _Pid, _TS, _MFA}, #state{lastTs = undefined} = PS) when Return == return_from; Return == return_to ->
   PS;
 doExpInner({call, Pid, TS, MFA, BIN}, #state{lastTs = LastTS, acc = Acc, count = Count} = PS) ->
   try
      %% Calculate time elapsed, TS-LastTs. 计算经过的时间，TS-LastTs。
@ -68,7 +62,7 @@ doExpInner({call, Pid, TS, MFA, BIN}, #state{lastTs = LastTS, acc = Acc, count =
      io:format(user, "~p: ~p:~p @ ~p\n", [?LINE, Class, Reason, StackTrace]),
      PS
   end;
 doExpInner({return_to, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) ->
 doExpInner({return_to, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) when LastTS =/= undefined ->
   try
      %% Calculate time elapsed, TS-LastTs.
      %% 1. Credit elapsed time to the stack on the top of Acc.
@ -112,7 +106,7 @@ doExpInner({gc_end, _Pid, TS, _Info}, #state{lastTs = LastTS, acc = Acc} = PS) -
      io:format(user, "~p: ~p:~p @ ~p\n", [?LINE, Class, Reason, StackTrace]),
      PS
   end;
 doExpInner({out, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) ->
 doExpInner({out, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) when LastTS =/= undefined ->
   try
      %% Push a 0 usec item onto Acc.
      %% The MFA reported here probably doesn't appear in the stacktrace
@ -127,7 +121,7 @@ doExpInner({out, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) ->
      io:format(user, "~p: ~p:~p @ ~p\n", [?LINE, Class, Reason, StackTrace]),
      PS
   end;
 doExpInner({in, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) ->
 doExpInner({in, _Pid, TS, MFA}, #state{lastTs = LastTS, acc = Acc} = PS) when LastTS =/= undefined ->
   try
      %% Push the Sleep time onto Acc, then push 0 usec item from last
      %% exec stack onto Acc.
@ -210,10 +204,10 @@ mfaf(I) ->
   [_, C | _] = string:tokens(I, "()+"),
   string:strip(C).

 flush(#state{outputPath = OutputPath}) ->
 flush(#state{outputFile = OutputFile}) ->
   PidStates = get(),
   {ok, FH} = file:open(OutputPath, [write, raw, binary, delayed_write]),
   io:format("\n\nWriting to ~s for ~w processes...", [OutputPath, length(PidStates)]),
   {ok, FH} = file:open(OutputFile, [write, raw, binary, delayed_write]),
   io:format("\n\nWriting to ~s for ~w processes...", [OutputFile, length(PidStates)]),
   [
      [
         begin