You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

426 lines
17 KiB

преди 12 години
преди 14 години
преди 12 години
преди 12 години
преди 12 години
преди 12 години
преди 14 години
преди 14 години
преди 13 години
преди 14 години
преди 14 години
преди 14 години
преди 14 години
преди 14 години
преди 14 години
преди 14 години
преди 14 години
преди 14 години
преди 13 години
преди 11 години
  1. %% Copyright (c) 2011-2012 Basho Technologies, Inc. All Rights Reserved.
  2. %%
  3. %% This file is provided to you under the Apache License,
  4. %% Version 2.0 (the "License"); you may not use this file
  5. %% except in compliance with the License. You may obtain
  6. %% a copy of the License at
  7. %%
  8. %% http://www.apache.org/licenses/LICENSE-2.0
  9. %%
  10. %% Unless required by applicable law or agreed to in writing,
  11. %% software distributed under the License is distributed on an
  12. %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  13. %% KIND, either express or implied. See the License for the
  14. %% specific language governing permissions and limitations
  15. %% under the License.
  16. %% @doc A error_logger backend for redirecting events into lager.
  17. %% Error messages and crash logs are also optionally written to a crash log.
  18. %% @see lager_crash_log
  19. %% @private
  20. -module(error_logger_lager_h).
  21. -include("lager.hrl").
  22. -behaviour(gen_event).
  23. -export([set_high_water/1]).
  24. -export([init/1, handle_call/2, handle_event/2, handle_info/2, terminate/2,
  25. code_change/3]).
  26. -export([format_reason/1]).
  27. -record(state, {
  28. %% how many messages per second we try to deliver
  29. hwm = undefined :: 'undefined' | pos_integer(),
  30. %% how many messages we've received this second
  31. mps = 0 :: non_neg_integer(),
  32. %% the current second
  33. lasttime = os:timestamp() :: erlang:timestamp(),
  34. %% count of dropped messages this second
  35. dropped = 0 :: non_neg_integer()
  36. }).
  37. -define(LOGMSG(Level, Pid, Msg),
  38. case ?SHOULD_LOG(Level) of
  39. true ->
  40. _ =lager:log(Level, Pid, Msg),
  41. ok;
  42. _ -> ok
  43. end).
  44. -define(LOGFMT(Level, Pid, Fmt, Args),
  45. case ?SHOULD_LOG(Level) of
  46. true ->
  47. _ = lager:log(Level, Pid, Fmt, Args),
  48. ok;
  49. _ -> ok
  50. end).
  51. -ifdef(TEST).
  52. -compile(export_all).
  53. %% Make CRASH synchronous when testing, to avoid timing headaches
  54. -define(CRASH_LOG(Event),
  55. catch(gen_server:call(lager_crash_log, {log, Event}))).
  56. -else.
  57. -define(CRASH_LOG(Event),
  58. gen_server:cast(lager_crash_log, {log, Event})).
  59. -endif.
  60. set_high_water(N) ->
  61. gen_event:call(error_logger, ?MODULE, {set_high_water, N}, infinity).
  62. -spec init(any()) -> {ok, #state{}}.
  63. init([HighWaterMark]) ->
  64. {ok, #state{hwm=HighWaterMark}}.
  65. handle_call({set_high_water, N}, State) ->
  66. {ok, ok, State#state{hwm = N}};
  67. handle_call(_Request, State) ->
  68. {ok, unknown_call, State}.
  69. handle_event(Event, State) ->
  70. case check_hwm(State) of
  71. {true, NewState} ->
  72. log_event(Event, NewState);
  73. {false, NewState} ->
  74. {ok, NewState}
  75. end.
  76. handle_info(_Info, State) ->
  77. {ok, State}.
  78. terminate(_Reason, _State) ->
  79. ok.
  80. code_change(_OldVsn, State, _Extra) ->
  81. {ok, State}.
  82. %% internal functions
  83. check_hwm(State = #state{hwm = undefined}) ->
  84. {true, State};
  85. check_hwm(State = #state{mps = Mps, hwm = Hwm}) when Mps < Hwm ->
  86. %% haven't hit high water mark yet, just log it
  87. {true, State#state{mps=Mps+1}};
  88. check_hwm(State = #state{hwm = Hwm, lasttime = Last, dropped = Drop}) ->
  89. %% are we still in the same second?
  90. {M, S, _} = Now = os:timestamp(),
  91. case Last of
  92. {M, S, _} ->
  93. %% still in same second, but have exceeded the high water mark
  94. NewDrops = discard_messages(Now, 0),
  95. {false, State#state{dropped=Drop+NewDrops}};
  96. _ ->
  97. %% different second, reset all counters and allow it
  98. case Drop > 0 of
  99. true ->
  100. ?LOGFMT(warning, self(), "lager_error_logger_h dropped ~p messages in the last second that exceeded the limit of ~p messages/sec",
  101. [Drop, Hwm]);
  102. false ->
  103. ok
  104. end,
  105. {true, State#state{dropped = 0, mps=1, lasttime = Now}}
  106. end.
  107. discard_messages(Second, Count) ->
  108. {M, S, _} = os:timestamp(),
  109. case Second of
  110. {M, S, _} ->
  111. receive
  112. _Msg ->
  113. discard_messages(Second, Count+1)
  114. after 0 ->
  115. Count
  116. end;
  117. _ ->
  118. Count
  119. end.
  120. log_event(Event, State) ->
  121. case Event of
  122. {error, _GL, {Pid, Fmt, Args}} ->
  123. case Fmt of
  124. "** Generic server "++_ ->
  125. %% gen_server terminate
  126. [Name, _Msg, _State, Reason] = Args,
  127. ?CRASH_LOG(Event),
  128. ?LOGFMT(error, Pid, "gen_server ~w terminated with reason: ~s",
  129. [Name, format_reason(Reason)]);
  130. "** State machine "++_ ->
  131. %% gen_fsm terminate
  132. [Name, _Msg, StateName, _StateData, Reason] = Args,
  133. ?CRASH_LOG(Event),
  134. ?LOGFMT(error, Pid, "gen_fsm ~w in state ~w terminated with reason: ~s",
  135. [Name, StateName, format_reason(Reason)]);
  136. "** gen_event handler"++_ ->
  137. %% gen_event handler terminate
  138. [ID, Name, _Msg, _State, Reason] = Args,
  139. ?CRASH_LOG(Event),
  140. ?LOGFMT(error, Pid, "gen_event ~w installed in ~w terminated with reason: ~s",
  141. [ID, Name, format_reason(Reason)]);
  142. "** Cowboy handler"++_ ->
  143. %% Cowboy HTTP server error
  144. ?CRASH_LOG(Event),
  145. case Args of
  146. [Module, Function, Arity, _Request, _State] ->
  147. %% we only get the 5-element list when its a non-exported function
  148. ?LOGFMT(error, Pid,
  149. "Cowboy handler ~p terminated with reason: call to undefined function ~p:~p/~p",
  150. [Module, Module, Function, Arity]);
  151. [Module, Function, Arity, _Class, Reason | Tail] ->
  152. %% any other cowboy error_format list *always* ends with the stacktrace
  153. StackTrace = lists:last(Tail),
  154. ?LOGFMT(error, Pid,
  155. "Cowboy handler ~p terminated in ~p:~p/~p with reason: ~s",
  156. [Module, Module, Function, Arity, format_reason({Reason, StackTrace})])
  157. end;
  158. "webmachine error"++_ ->
  159. %% Webmachine HTTP server error
  160. ?CRASH_LOG(Event),
  161. [Path, Error] = Args,
  162. %% webmachine likes to mangle the stack, for some reason
  163. StackTrace = case Error of
  164. {error, {error, Reason, Stack}} ->
  165. {Reason, Stack};
  166. _ ->
  167. Error
  168. end,
  169. ?LOGFMT(error, Pid, "Webmachine error at path ~p : ~s", [Path, format_reason(StackTrace)]);
  170. _ ->
  171. ?CRASH_LOG(Event),
  172. ?LOGMSG(error, Pid, lager:safe_format(Fmt, Args, ?DEFAULT_TRUNCATION))
  173. end;
  174. {error_report, _GL, {Pid, std_error, D}} ->
  175. ?CRASH_LOG(Event),
  176. ?LOGMSG(error, Pid, print_silly_list(D));
  177. {error_report, _GL, {Pid, supervisor_report, D}} ->
  178. ?CRASH_LOG(Event),
  179. case lists:sort(D) of
  180. [{errorContext, Ctx}, {offender, Off}, {reason, Reason}, {supervisor, Name}] ->
  181. Offender = format_offender(Off),
  182. ?LOGFMT(error, Pid,
  183. "Supervisor ~w had child ~s exit with reason ~s in context ~w",
  184. [supervisor_name(Name), Offender, format_reason(Reason), Ctx]);
  185. _ ->
  186. ?LOGMSG(error, Pid, "SUPERVISOR REPORT " ++ print_silly_list(D))
  187. end;
  188. {error_report, _GL, {Pid, crash_report, [Self, Neighbours]}} ->
  189. ?CRASH_LOG(Event),
  190. ?LOGMSG(error, Pid, "CRASH REPORT " ++ format_crash_report(Self, Neighbours));
  191. {warning_msg, _GL, {Pid, Fmt, Args}} ->
  192. ?LOGMSG(warning, Pid, lager:safe_format(Fmt, Args, ?DEFAULT_TRUNCATION));
  193. {warning_report, _GL, {Pid, std_warning, Report}} ->
  194. ?LOGMSG(warning, Pid, print_silly_list(Report));
  195. {info_msg, _GL, {Pid, Fmt, Args}} ->
  196. ?LOGMSG(info, Pid, lager:safe_format(Fmt, Args, ?DEFAULT_TRUNCATION));
  197. {info_report, _GL, {Pid, std_info, D}} when is_list(D) ->
  198. Details = lists:sort(D),
  199. case Details of
  200. [{application, App}, {exited, Reason}, {type, _Type}] ->
  201. ?LOGFMT(info, Pid, "Application ~w exited with reason: ~s",
  202. [App, format_reason(Reason)]);
  203. _ ->
  204. ?LOGMSG(info, Pid, print_silly_list(D))
  205. end;
  206. {info_report, _GL, {Pid, std_info, D}} ->
  207. ?LOGFMT(info, Pid, "~w", [D]);
  208. {info_report, _GL, {P, progress, D}} ->
  209. Details = lists:sort(D),
  210. case Details of
  211. [{application, App}, {started_at, Node}] ->
  212. ?LOGFMT(info, P, "Application ~w started on node ~w",
  213. [App, Node]);
  214. [{started, Started}, {supervisor, Name}] ->
  215. MFA = format_mfa(proplists:get_value(mfargs, Started)),
  216. Pid = proplists:get_value(pid, Started),
  217. ?LOGFMT(debug, P, "Supervisor ~w started ~s at pid ~w",
  218. [supervisor_name(Name), MFA, Pid]);
  219. _ ->
  220. ?LOGMSG(info, P, "PROGRESS REPORT " ++ print_silly_list(D))
  221. end;
  222. _ ->
  223. ?LOGFMT(warning, self(), "Unexpected error_logger event ~w", [Event])
  224. end,
  225. {ok, State}.
  226. format_crash_report(Report, Neighbours) ->
  227. Name = case proplists:get_value(registered_name, Report, []) of
  228. [] ->
  229. %% process_info(Pid, registered_name) returns [] for unregistered processes
  230. proplists:get_value(pid, Report);
  231. Atom -> Atom
  232. end,
  233. {Class, Reason, Trace} = proplists:get_value(error_info, Report),
  234. ReasonStr = format_reason({Reason, Trace}),
  235. Type = case Class of
  236. exit -> "exited";
  237. _ -> "crashed"
  238. end,
  239. io_lib:format("Process ~w with ~w neighbours ~s with reason: ~s",
  240. [Name, length(Neighbours), Type, ReasonStr]).
  241. format_offender(Off) ->
  242. case proplists:get_value(mfargs, Off) of
  243. undefined ->
  244. %% supervisor_bridge
  245. io_lib:format("at module ~w at ~w",
  246. [proplists:get_value(mod, Off), proplists:get_value(pid, Off)]);
  247. MFArgs ->
  248. %% regular supervisor
  249. MFA = format_mfa(MFArgs),
  250. Name = proplists:get_value(name, Off),
  251. io_lib:format("~p started with ~s at ~w",
  252. [Name, MFA, proplists:get_value(pid, Off)])
  253. end.
  254. format_reason({'function not exported', [{M, F, A},MFA|_]}) ->
  255. ["call to undefined function ", format_mfa({M, F, length(A)}),
  256. " from ", format_mfa(MFA)];
  257. format_reason({'function not exported', [{M, F, A, _Props},MFA|_]}) ->
  258. %% R15 line numbers
  259. ["call to undefined function ", format_mfa({M, F, length(A)}),
  260. " from ", format_mfa(MFA)];
  261. format_reason({undef, [MFA|_]}) ->
  262. ["call to undefined function ", format_mfa(MFA)];
  263. format_reason({bad_return, {_MFA, {'EXIT', Reason}}}) ->
  264. format_reason(Reason);
  265. format_reason({bad_return, {MFA, Val}}) ->
  266. ["bad return value ", print_val(Val), " from ", format_mfa(MFA)];
  267. format_reason({bad_return_value, Val}) ->
  268. ["bad return value: ", print_val(Val)];
  269. format_reason({{bad_return_value, Val}, MFA}) ->
  270. ["bad return value: ", print_val(Val), " in ", format_mfa(MFA)];
  271. format_reason({{badrecord, Record}, [MFA|_]}) ->
  272. ["bad record ", print_val(Record), " in ", format_mfa(MFA)];
  273. format_reason({{case_clause, Val}, [MFA|_]}) ->
  274. ["no case clause matching ", print_val(Val), " in ", format_mfa(MFA)];
  275. format_reason({function_clause, [MFA|_]}) ->
  276. ["no function clause matching ", format_mfa(MFA)];
  277. format_reason({if_clause, [MFA|_]}) ->
  278. ["no true branch found while evaluating if expression in ", format_mfa(MFA)];
  279. format_reason({{try_clause, Val}, [MFA|_]}) ->
  280. ["no try clause matching ", print_val(Val), " in ", format_mfa(MFA)];
  281. format_reason({badarith, [MFA|_]}) ->
  282. ["bad arithmetic expression in ", format_mfa(MFA)];
  283. format_reason({{badmatch, Val}, [MFA|_]}) ->
  284. ["no match of right hand value ", print_val(Val), " in ", format_mfa(MFA)];
  285. format_reason({emfile, _Trace}) ->
  286. "maximum number of file descriptors exhausted, check ulimit -n";
  287. format_reason({system_limit, [{M, F, _}|_] = Trace}) ->
  288. Limit = case {M, F} of
  289. {erlang, open_port} ->
  290. "maximum number of ports exceeded";
  291. {erlang, spawn} ->
  292. "maximum number of processes exceeded";
  293. {erlang, spawn_opt} ->
  294. "maximum number of processes exceeded";
  295. {erlang, list_to_atom} ->
  296. "tried to create an atom larger than 255, or maximum atom count exceeded";
  297. {ets, new} ->
  298. "maximum number of ETS tables exceeded";
  299. _ ->
  300. {Str, _} = lager_trunc_io:print(Trace, 500),
  301. Str
  302. end,
  303. ["system limit: ", Limit];
  304. format_reason({badarg, [MFA,MFA2|_]}) ->
  305. case MFA of
  306. {_M, _F, A, _Props} when is_list(A) ->
  307. %% R15 line numbers
  308. ["bad argument in call to ", format_mfa(MFA), " in ", format_mfa(MFA2)];
  309. {_M, _F, A} when is_list(A) ->
  310. ["bad argument in call to ", format_mfa(MFA), " in ", format_mfa(MFA2)];
  311. _ ->
  312. %% seems to be generated by a bad call to a BIF
  313. ["bad argument in ", format_mfa(MFA)]
  314. end;
  315. format_reason({{badarity, {Fun, Args}}, [MFA|_]}) ->
  316. {arity, Arity} = lists:keyfind(arity, 1, erlang:fun_info(Fun)),
  317. [io_lib:format("fun called with wrong arity of ~w instead of ~w in ",
  318. [length(Args), Arity]), format_mfa(MFA)];
  319. format_reason({noproc, MFA}) ->
  320. ["no such process or port in call to ", format_mfa(MFA)];
  321. format_reason({{badfun, Term}, [MFA|_]}) ->
  322. ["bad function ", print_val(Term), " in ", format_mfa(MFA)];
  323. format_reason({Reason, [{M, F, A}|_]}) when is_atom(M), is_atom(F), is_integer(A) ->
  324. [format_reason(Reason), " in ", format_mfa({M, F, A})];
  325. format_reason({Reason, [{M, F, A, Props}|_]}) when is_atom(M), is_atom(F), is_integer(A), is_list(Props) ->
  326. %% line numbers
  327. [format_reason(Reason), " in ", format_mfa({M, F, A, Props})];
  328. format_reason(Reason) ->
  329. {Str, _} = lager_trunc_io:print(Reason, 500),
  330. Str.
  331. format_mfa({M, F, A}) when is_list(A) ->
  332. {FmtStr, Args} = format_args(A, [], []),
  333. io_lib:format("~w:~w("++FmtStr++")", [M, F | Args]);
  334. format_mfa({M, F, A}) when is_integer(A) ->
  335. io_lib:format("~w:~w/~w", [M, F, A]);
  336. format_mfa({M, F, A, Props}) when is_list(Props) ->
  337. case proplists:get_value(line, Props) of
  338. undefined ->
  339. format_mfa({M, F, A});
  340. Line ->
  341. [format_mfa({M, F, A}), io_lib:format(" line ~w", [Line])]
  342. end;
  343. format_mfa([{M, F, A}, _]) ->
  344. %% this kind of weird stacktrace can be generated by a uncaught throw in a gen_server
  345. format_mfa({M, F, A});
  346. format_mfa([{M, F, A, Props}, _]) when is_list(Props) ->
  347. %% this kind of weird stacktrace can be generated by a uncaught throw in a gen_server
  348. format_mfa({M, F, A, Props});
  349. format_mfa(Other) ->
  350. io_lib:format("~w", [Other]).
  351. format_args([], FmtAcc, ArgsAcc) ->
  352. {string:join(lists:reverse(FmtAcc), ", "), lists:reverse(ArgsAcc)};
  353. format_args([H|T], FmtAcc, ArgsAcc) ->
  354. {Str, _} = lager_trunc_io:print(H, 100),
  355. format_args(T, ["~s"|FmtAcc], [Str|ArgsAcc]).
  356. print_silly_list(L) when is_list(L) ->
  357. case lager_stdlib:string_p(L) of
  358. true ->
  359. lager_trunc_io:format("~s", [L], ?DEFAULT_TRUNCATION);
  360. _ ->
  361. print_silly_list(L, [], [])
  362. end;
  363. print_silly_list(L) ->
  364. {Str, _} = lager_trunc_io:print(L, ?DEFAULT_TRUNCATION),
  365. Str.
  366. print_silly_list([], Fmt, Acc) ->
  367. lager_trunc_io:format(string:join(lists:reverse(Fmt), ", "),
  368. lists:reverse(Acc), ?DEFAULT_TRUNCATION);
  369. print_silly_list([{K,V}|T], Fmt, Acc) ->
  370. print_silly_list(T, ["~p: ~p" | Fmt], [V, K | Acc]);
  371. print_silly_list([H|T], Fmt, Acc) ->
  372. print_silly_list(T, ["~p" | Fmt], [H | Acc]).
  373. print_val(Val) ->
  374. {Str, _} = lager_trunc_io:print(Val, 500),
  375. Str.
  376. supervisor_name({local, Name}) -> Name;
  377. supervisor_name(Name) -> Name.
  378. -ifdef(TEST).
  379. %% Not intended to be a fully paranoid EUnit test....
  380. t0() ->
  381. application:stop(lager),
  382. application:stop(sasl),
  383. lager:start(),
  384. set_high_water(5),
  385. [error_logger:warning_msg("Foo ~p!", [X]) || X <- lists:seq(1,10)],
  386. timer:sleep(1000),
  387. [error_logger:warning_msg("Bar ~p!", [X]) || X <- lists:seq(1,10)],
  388. timer:sleep(1000),
  389. error_logger:warning_msg("Baz!"),
  390. ok.
  391. -endif. % TEST