Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

426 linhas
17 KiB

14 anos atrás
14 anos atrás
14 anos atrás
14 anos atrás
  1. %% Copyright (c) 2011-2012 Basho Technologies, Inc. All Rights Reserved.
  2. %%
  3. %% This file is provided to you under the Apache License,
  4. %% Version 2.0 (the "License"); you may not use this file
  5. %% except in compliance with the License. You may obtain
  6. %% a copy of the License at
  7. %%
  8. %% http://www.apache.org/licenses/LICENSE-2.0
  9. %%
  10. %% Unless required by applicable law or agreed to in writing,
  11. %% software distributed under the License is distributed on an
  12. %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  13. %% KIND, either express or implied. See the License for the
  14. %% specific language governing permissions and limitations
  15. %% under the License.
  16. %% @doc A error_logger backend for redirecting events into lager.
  17. %% Error messages and crash logs are also optionally written to a crash log.
  18. %% @see lager_crash_log
  19. %% @private
  20. -module(error_logger_lager_h).
  21. -include("lager.hrl").
  22. -behaviour(gen_event).
  23. -export([set_high_water/1]).
  24. -export([init/1, handle_call/2, handle_event/2, handle_info/2, terminate/2,
  25. code_change/3]).
  26. -export([format_reason/1]).
  27. -record(state, {
  28. %% how many messages per second we try to deliver
  29. hwm = undefined :: 'undefined' | pos_integer(),
  30. %% how many messages we've received this second
  31. mps = 0 :: non_neg_integer(),
  32. %% the current second
  33. lasttime = os:timestamp() :: erlang:timestamp(),
  34. %% count of dropped messages this second
  35. dropped = 0 :: non_neg_integer()
  36. }).
  37. -define(LOGMSG(Level, Pid, Msg),
  38. case ?SHOULD_LOG(Level) of
  39. true ->
  40. _ =lager:log(Level, Pid, Msg),
  41. ok;
  42. _ -> ok
  43. end).
  44. -define(LOGFMT(Level, Pid, Fmt, Args),
  45. case ?SHOULD_LOG(Level) of
  46. true ->
  47. _ = lager:log(Level, Pid, Fmt, Args),
  48. ok;
  49. _ -> ok
  50. end).
  51. -ifdef(TEST).
  52. -compile(export_all).
  53. %% Make CRASH synchronous when testing, to avoid timing headaches
  54. -define(CRASH_LOG(Event),
  55. catch(gen_server:call(lager_crash_log, {log, Event}))).
  56. -else.
  57. -define(CRASH_LOG(Event),
  58. gen_server:cast(lager_crash_log, {log, Event})).
  59. -endif.
  60. set_high_water(N) ->
  61. gen_event:call(error_logger, ?MODULE, {set_high_water, N}, infinity).
  62. -spec init(any()) -> {ok, #state{}}.
  63. init([HighWaterMark]) ->
  64. {ok, #state{hwm=HighWaterMark}}.
  65. handle_call({set_high_water, N}, State) ->
  66. {ok, ok, State#state{hwm = N}};
  67. handle_call(_Request, State) ->
  68. {ok, unknown_call, State}.
  69. handle_event(Event, State) ->
  70. case check_hwm(State) of
  71. {true, NewState} ->
  72. log_event(Event, NewState);
  73. {false, NewState} ->
  74. {ok, NewState}
  75. end.
  76. handle_info(_Info, State) ->
  77. {ok, State}.
  78. terminate(_Reason, _State) ->
  79. ok.
  80. code_change(_OldVsn, State, _Extra) ->
  81. {ok, State}.
  82. %% internal functions
  83. check_hwm(State = #state{hwm = undefined}) ->
  84. {true, State};
  85. check_hwm(State = #state{mps = Mps, hwm = Hwm}) when Mps < Hwm ->
  86. %% haven't hit high water mark yet, just log it
  87. {true, State#state{mps=Mps+1}};
  88. check_hwm(State = #state{hwm = Hwm, lasttime = Last, dropped = Drop}) ->
  89. %% are we still in the same second?
  90. {M, S, _} = Now = os:timestamp(),
  91. case Last of
  92. {M, S, _} ->
  93. %% still in same second, but have exceeded the high water mark
  94. NewDrops = discard_messages(Now, 0),
  95. {false, State#state{dropped=Drop+NewDrops}};
  96. _ ->
  97. %% different second, reset all counters and allow it
  98. case Drop > 0 of
  99. true ->
  100. ?LOGFMT(warning, self(), "lager_error_logger_h dropped ~p messages in the last second that exceeded the limit of ~p messages/sec",
  101. [Drop, Hwm]);
  102. false ->
  103. ok
  104. end,
  105. {true, State#state{dropped = 0, mps=1, lasttime = Now}}
  106. end.
  107. discard_messages(Second, Count) ->
  108. {M, S, _} = os:timestamp(),
  109. case Second of
  110. {M, S, _} ->
  111. receive
  112. _Msg ->
  113. discard_messages(Second, Count+1)
  114. after 0 ->
  115. Count
  116. end;
  117. _ ->
  118. Count
  119. end.
  120. log_event(Event, State) ->
  121. case Event of
  122. {error, _GL, {Pid, Fmt, Args}} ->
  123. case Fmt of
  124. "** Generic server "++_ ->
  125. %% gen_server terminate
  126. [Name, _Msg, _State, Reason] = Args,
  127. ?CRASH_LOG(Event),
  128. ?LOGFMT(error, Pid, "gen_server ~w terminated with reason: ~s",
  129. [Name, format_reason(Reason)]);
  130. "** State machine "++_ ->
  131. %% gen_fsm terminate
  132. [Name, _Msg, StateName, _StateData, Reason] = Args,
  133. ?CRASH_LOG(Event),
  134. ?LOGFMT(error, Pid, "gen_fsm ~w in state ~w terminated with reason: ~s",
  135. [Name, StateName, format_reason(Reason)]);
  136. "** gen_event handler"++_ ->
  137. %% gen_event handler terminate
  138. [ID, Name, _Msg, _State, Reason] = Args,
  139. ?CRASH_LOG(Event),
  140. ?LOGFMT(error, Pid, "gen_event ~w installed in ~w terminated with reason: ~s",
  141. [ID, Name, format_reason(Reason)]);
  142. "** Cowboy handler"++_ ->
  143. %% Cowboy HTTP server error
  144. ?CRASH_LOG(Event),
  145. case Args of
  146. [Module, Function, Arity, _Request, _State] ->
  147. %% we only get the 5-element list when its a non-exported function
  148. ?LOGFMT(error, Pid,
  149. "Cowboy handler ~p terminated with reason: call to undefined function ~p:~p/~p",
  150. [Module, Module, Function, Arity]);
  151. [Module, Function, Arity, _Class, Reason | Tail] ->
  152. %% any other cowboy error_format list *always* ends with the stacktrace
  153. StackTrace = lists:last(Tail),
  154. ?LOGFMT(error, Pid,
  155. "Cowboy handler ~p terminated in ~p:~p/~p with reason: ~s",
  156. [Module, Module, Function, Arity, format_reason({Reason, StackTrace})])
  157. end;
  158. "webmachine error"++_ ->
  159. %% Webmachine HTTP server error
  160. ?CRASH_LOG(Event),
  161. [Path, Error] = Args,
  162. %% webmachine likes to mangle the stack, for some reason
  163. StackTrace = case Error of
  164. {error, {error, Reason, Stack}} ->
  165. {Reason, Stack};
  166. _ ->
  167. Error
  168. end,
  169. ?LOGFMT(error, Pid, "Webmachine error at path ~p : ~s", [Path, format_reason(StackTrace)]);
  170. _ ->
  171. ?CRASH_LOG(Event),
  172. ?LOGMSG(error, Pid, lager:safe_format(Fmt, Args, ?DEFAULT_TRUNCATION))
  173. end;
  174. {error_report, _GL, {Pid, std_error, D}} ->
  175. ?CRASH_LOG(Event),
  176. ?LOGMSG(error, Pid, print_silly_list(D));
  177. {error_report, _GL, {Pid, supervisor_report, D}} ->
  178. ?CRASH_LOG(Event),
  179. case lists:sort(D) of
  180. [{errorContext, Ctx}, {offender, Off}, {reason, Reason}, {supervisor, Name}] ->
  181. Offender = format_offender(Off),
  182. ?LOGFMT(error, Pid,
  183. "Supervisor ~w had child ~s exit with reason ~s in context ~w",
  184. [supervisor_name(Name), Offender, format_reason(Reason), Ctx]);
  185. _ ->
  186. ?LOGMSG(error, Pid, "SUPERVISOR REPORT " ++ print_silly_list(D))
  187. end;
  188. {error_report, _GL, {Pid, crash_report, [Self, Neighbours]}} ->
  189. ?CRASH_LOG(Event),
  190. ?LOGMSG(error, Pid, "CRASH REPORT " ++ format_crash_report(Self, Neighbours));
  191. {warning_msg, _GL, {Pid, Fmt, Args}} ->
  192. ?LOGMSG(warning, Pid, lager:safe_format(Fmt, Args, ?DEFAULT_TRUNCATION));
  193. {warning_report, _GL, {Pid, std_warning, Report}} ->
  194. ?LOGMSG(warning, Pid, print_silly_list(Report));
  195. {info_msg, _GL, {Pid, Fmt, Args}} ->
  196. ?LOGMSG(info, Pid, lager:safe_format(Fmt, Args, ?DEFAULT_TRUNCATION));
  197. {info_report, _GL, {Pid, std_info, D}} when is_list(D) ->
  198. Details = lists:sort(D),
  199. case Details of
  200. [{application, App}, {exited, Reason}, {type, _Type}] ->
  201. ?LOGFMT(info, Pid, "Application ~w exited with reason: ~s",
  202. [App, format_reason(Reason)]);
  203. _ ->
  204. ?LOGMSG(info, Pid, print_silly_list(D))
  205. end;
  206. {info_report, _GL, {Pid, std_info, D}} ->
  207. ?LOGFMT(info, Pid, "~w", [D]);
  208. {info_report, _GL, {P, progress, D}} ->
  209. Details = lists:sort(D),
  210. case Details of
  211. [{application, App}, {started_at, Node}] ->
  212. ?LOGFMT(info, P, "Application ~w started on node ~w",
  213. [App, Node]);
  214. [{started, Started}, {supervisor, Name}] ->
  215. MFA = format_mfa(proplists:get_value(mfargs, Started)),
  216. Pid = proplists:get_value(pid, Started),
  217. ?LOGFMT(debug, P, "Supervisor ~w started ~s at pid ~w",
  218. [supervisor_name(Name), MFA, Pid]);
  219. _ ->
  220. ?LOGMSG(info, P, "PROGRESS REPORT " ++ print_silly_list(D))
  221. end;
  222. _ ->
  223. ?LOGFMT(warning, self(), "Unexpected error_logger event ~w", [Event])
  224. end,
  225. {ok, State}.
  226. format_crash_report(Report, Neighbours) ->
  227. Name = case proplists:get_value(registered_name, Report, []) of
  228. [] ->
  229. %% process_info(Pid, registered_name) returns [] for unregistered processes
  230. proplists:get_value(pid, Report);
  231. Atom -> Atom
  232. end,
  233. {Class, Reason, Trace} = proplists:get_value(error_info, Report),
  234. ReasonStr = format_reason({Reason, Trace}),
  235. Type = case Class of
  236. exit -> "exited";
  237. _ -> "crashed"
  238. end,
  239. io_lib:format("Process ~w with ~w neighbours ~s with reason: ~s",
  240. [Name, length(Neighbours), Type, ReasonStr]).
  241. format_offender(Off) ->
  242. case proplists:get_value(mfargs, Off) of
  243. undefined ->
  244. %% supervisor_bridge
  245. io_lib:format("at module ~w at ~w",
  246. [proplists:get_value(mod, Off), proplists:get_value(pid, Off)]);
  247. MFArgs ->
  248. %% regular supervisor
  249. MFA = format_mfa(MFArgs),
  250. Name = proplists:get_value(name, Off),
  251. io_lib:format("~p started with ~s at ~w",
  252. [Name, MFA, proplists:get_value(pid, Off)])
  253. end.
  254. format_reason({'function not exported', [{M, F, A},MFA|_]}) ->
  255. ["call to undefined function ", format_mfa({M, F, length(A)}),
  256. " from ", format_mfa(MFA)];
  257. format_reason({'function not exported', [{M, F, A, _Props},MFA|_]}) ->
  258. %% R15 line numbers
  259. ["call to undefined function ", format_mfa({M, F, length(A)}),
  260. " from ", format_mfa(MFA)];
  261. format_reason({undef, [MFA|_]}) ->
  262. ["call to undefined function ", format_mfa(MFA)];
  263. format_reason({bad_return, {_MFA, {'EXIT', Reason}}}) ->
  264. format_reason(Reason);
  265. format_reason({bad_return, {MFA, Val}}) ->
  266. ["bad return value ", print_val(Val), " from ", format_mfa(MFA)];
  267. format_reason({bad_return_value, Val}) ->
  268. ["bad return value: ", print_val(Val)];
  269. format_reason({{bad_return_value, Val}, MFA}) ->
  270. ["bad return value: ", print_val(Val), " in ", format_mfa(MFA)];
  271. format_reason({{badrecord, Record}, [MFA|_]}) ->
  272. ["bad record ", print_val(Record), " in ", format_mfa(MFA)];
  273. format_reason({{case_clause, Val}, [MFA|_]}) ->
  274. ["no case clause matching ", print_val(Val), " in ", format_mfa(MFA)];
  275. format_reason({function_clause, [MFA|_]}) ->
  276. ["no function clause matching ", format_mfa(MFA)];
  277. format_reason({if_clause, [MFA|_]}) ->
  278. ["no true branch found while evaluating if expression in ", format_mfa(MFA)];
  279. format_reason({{try_clause, Val}, [MFA|_]}) ->
  280. ["no try clause matching ", print_val(Val), " in ", format_mfa(MFA)];
  281. format_reason({badarith, [MFA|_]}) ->
  282. ["bad arithmetic expression in ", format_mfa(MFA)];
  283. format_reason({{badmatch, Val}, [MFA|_]}) ->
  284. ["no match of right hand value ", print_val(Val), " in ", format_mfa(MFA)];
  285. format_reason({emfile, _Trace}) ->
  286. "maximum number of file descriptors exhausted, check ulimit -n";
  287. format_reason({system_limit, [{M, F, _}|_] = Trace}) ->
  288. Limit = case {M, F} of
  289. {erlang, open_port} ->
  290. "maximum number of ports exceeded";
  291. {erlang, spawn} ->
  292. "maximum number of processes exceeded";
  293. {erlang, spawn_opt} ->
  294. "maximum number of processes exceeded";
  295. {erlang, list_to_atom} ->
  296. "tried to create an atom larger than 255, or maximum atom count exceeded";
  297. {ets, new} ->
  298. "maximum number of ETS tables exceeded";
  299. _ ->
  300. {Str, _} = lager_trunc_io:print(Trace, 500),
  301. Str
  302. end,
  303. ["system limit: ", Limit];
  304. format_reason({badarg, [MFA,MFA2|_]}) ->
  305. case MFA of
  306. {_M, _F, A, _Props} when is_list(A) ->
  307. %% R15 line numbers
  308. ["bad argument in call to ", format_mfa(MFA), " in ", format_mfa(MFA2)];
  309. {_M, _F, A} when is_list(A) ->
  310. ["bad argument in call to ", format_mfa(MFA), " in ", format_mfa(MFA2)];
  311. _ ->
  312. %% seems to be generated by a bad call to a BIF
  313. ["bad argument in ", format_mfa(MFA)]
  314. end;
  315. format_reason({{badarity, {Fun, Args}}, [MFA|_]}) ->
  316. {arity, Arity} = lists:keyfind(arity, 1, erlang:fun_info(Fun)),
  317. [io_lib:format("fun called with wrong arity of ~w instead of ~w in ",
  318. [length(Args), Arity]), format_mfa(MFA)];
  319. format_reason({noproc, MFA}) ->
  320. ["no such process or port in call to ", format_mfa(MFA)];
  321. format_reason({{badfun, Term}, [MFA|_]}) ->
  322. ["bad function ", print_val(Term), " in ", format_mfa(MFA)];
  323. format_reason({Reason, [{M, F, A}|_]}) when is_atom(M), is_atom(F), is_integer(A) ->
  324. [format_reason(Reason), " in ", format_mfa({M, F, A})];
  325. format_reason({Reason, [{M, F, A, Props}|_]}) when is_atom(M), is_atom(F), is_integer(A), is_list(Props) ->
  326. %% line numbers
  327. [format_reason(Reason), " in ", format_mfa({M, F, A, Props})];
  328. format_reason(Reason) ->
  329. {Str, _} = lager_trunc_io:print(Reason, 500),
  330. Str.
  331. format_mfa({M, F, A}) when is_list(A) ->
  332. {FmtStr, Args} = format_args(A, [], []),
  333. io_lib:format("~w:~w("++FmtStr++")", [M, F | Args]);
  334. format_mfa({M, F, A}) when is_integer(A) ->
  335. io_lib:format("~w:~w/~w", [M, F, A]);
  336. format_mfa({M, F, A, Props}) when is_list(Props) ->
  337. case proplists:get_value(line, Props) of
  338. undefined ->
  339. format_mfa({M, F, A});
  340. Line ->
  341. [format_mfa({M, F, A}), io_lib:format(" line ~w", [Line])]
  342. end;
  343. format_mfa([{M, F, A}, _]) ->
  344. %% this kind of weird stacktrace can be generated by a uncaught throw in a gen_server
  345. format_mfa({M, F, A});
  346. format_mfa([{M, F, A, Props}, _]) when is_list(Props) ->
  347. %% this kind of weird stacktrace can be generated by a uncaught throw in a gen_server
  348. format_mfa({M, F, A, Props});
  349. format_mfa(Other) ->
  350. io_lib:format("~w", [Other]).
  351. format_args([], FmtAcc, ArgsAcc) ->
  352. {string:join(lists:reverse(FmtAcc), ", "), lists:reverse(ArgsAcc)};
  353. format_args([H|T], FmtAcc, ArgsAcc) ->
  354. {Str, _} = lager_trunc_io:print(H, 100),
  355. format_args(T, ["~s"|FmtAcc], [Str|ArgsAcc]).
  356. print_silly_list(L) when is_list(L) ->
  357. case lager_stdlib:string_p(L) of
  358. true ->
  359. lager_trunc_io:format("~s", [L], ?DEFAULT_TRUNCATION);
  360. _ ->
  361. print_silly_list(L, [], [])
  362. end;
  363. print_silly_list(L) ->
  364. {Str, _} = lager_trunc_io:print(L, ?DEFAULT_TRUNCATION),
  365. Str.
  366. print_silly_list([], Fmt, Acc) ->
  367. lager_trunc_io:format(string:join(lists:reverse(Fmt), ", "),
  368. lists:reverse(Acc), ?DEFAULT_TRUNCATION);
  369. print_silly_list([{K,V}|T], Fmt, Acc) ->
  370. print_silly_list(T, ["~p: ~p" | Fmt], [V, K | Acc]);
  371. print_silly_list([H|T], Fmt, Acc) ->
  372. print_silly_list(T, ["~p" | Fmt], [H | Acc]).
  373. print_val(Val) ->
  374. {Str, _} = lager_trunc_io:print(Val, 500),
  375. Str.
  376. supervisor_name({local, Name}) -> Name;
  377. supervisor_name(Name) -> Name.
  378. -ifdef(TEST).
  379. %% Not intended to be a fully paranoid EUnit test....
  380. t0() ->
  381. application:stop(lager),
  382. application:stop(sasl),
  383. lager:start(),
  384. set_high_water(5),
  385. [error_logger:warning_msg("Foo ~p!", [X]) || X <- lists:seq(1,10)],
  386. timer:sleep(1000),
  387. [error_logger:warning_msg("Bar ~p!", [X]) || X <- lists:seq(1,10)],
  388. timer:sleep(1000),
  389. error_logger:warning_msg("Baz!"),
  390. ok.
  391. -endif. % TEST