瀏覽代碼

Implement configurable error_logger msg drop threshold

Implement a new config option error_logger_hwm, which is a number
representing how many messages per second we should log from the
error_logger. If that threshold is exceeded, messages will be discarded
for the remainder of that second.

This is only effective if lager itself can process the messages fast
enough to satisfy the threshold. If your threshold is 1000 and lager
itself is only writing 100 messages a second (because error messages are
causing fsyncs or whatever) you'll never exceed the threshold and drops
will never happen. Thus care needs to be taken when selecting this
feature.

Setting it low is not as bad as it might seem, because when using lager,
hopefully error_logger messages are unusual. In my testing, 50/second
with the default config seemed reasonable (which has 2 file backends
installed, both of which fsync on messages at error or above).
pull/116/head
Andrew Thompson 12 年之前
父節點
當前提交
ef691d8947
共有 2 個檔案被更改,包括 84 行新增14 行删除
  1. +73
    -13
      src/error_logger_lager_h.erl
  2. +11
    -1
      src/lager_app.erl

+ 73
- 13
src/error_logger_lager_h.erl 查看文件

@ -32,6 +32,17 @@
-export([format_reason/1]).
-record(state, {
%% how many messages per second we try to deliver
hwm = undefined :: 'undefined' | pos_integer(),
%% how many messages we've received this second
mps = 0 :: non_neg_integer(),
%% the current second
lasttime = os:timestamp() :: os:timestamp(),
%% count of dropped messages this second
dropped = 0 :: non_neg_integer()
}).
-define(LOGMSG(Level, Pid, Msg),
case ?SHOULD_LOG(Level) of
true ->
@ -58,13 +69,73 @@
-endif.
-spec init(any()) -> {ok, {}}.
init(_) ->
{ok, {}}.
init([HighWaterMark]) ->
{ok, #state{hwm=HighWaterMark}}.
handle_call(_Request, State) ->
{ok, ok, State}.
handle_event(Event, State) ->
case check_hwm(State) of
{true, NewState} ->
log_event(Event, NewState);
{false, NewState} ->
{ok, NewState}
end.
handle_info(_Info, State) ->
{ok, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%% internal functions
check_hwm(State = #state{hwm = undefined}) ->
{true, State};
check_hwm(State = #state{mps = Mps, hwm = Hwm}) when Mps < Hwm ->
%% haven't hit high water mark yet, just log it
{true, State#state{mps=Mps+1}};
check_hwm(State = #state{hwm = Hwm, lasttime = Last, dropped = Drop}) ->
%% are we still in the same second?
{M, S, _} = Now = os:timestamp(),
case Last of
{M, S, _} ->
%% still in same second, but have exceeded the high water mark
NewDrops = discard_messages(Now, 0),
%io:format("dropped ~p messages~n", [NewDrops]),
{false, State#state{dropped=Drop+NewDrops}};
_ ->
%% different second, reset all counters and allow it
%% TODO - do we care if the clock goes backwards?
case Drop > 0 of
true ->
?LOGFMT(warning, self(), "lager_error_logger_h dropped ~p messages in the last second that exceeded the limit of ~p messages/sec",
[Drop, Hwm]);
false ->
ok
end,
{true, State#state{dropped = 0, mps=1, lasttime = Now}}
end.
discard_messages(Second, Count) ->
{M, S, _} = os:timestamp(),
case Second of
{M, S, _} ->
receive
_Msg ->
discard_messages(Second, Count+1)
after 0 ->
Count
end;
_ ->
Count
end.
log_event(Event, State) ->
case Event of
{error, _GL, {Pid, Fmt, Args}} ->
case Fmt of
@ -143,17 +214,6 @@ handle_event(Event, State) ->
end,
{ok, State}.
handle_info(_Info, State) ->
{ok, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%% internal functions
format_crash_report(Report, Neighbours) ->
Name = case proplists:get_value(registered_name, Report, []) of
[] ->

+ 11
- 1
src/lager_app.erl 查看文件

@ -52,6 +52,16 @@ start(_StartType, _StartArgs) ->
{_, Traces} = lager_config:get(loglevel),
lager_config:set(loglevel, {MinLog, Traces}),
HighWaterMark = case application:get_env(lager, error_logger_hwm) of
{ok, HwmVal} when is_integer(HwmVal), HwmVal > 0 ->
HwmVal;
{ok, BadVal} ->
_ = lager:log(warning, self(), "Invalid error_logger high water mark: ~p, disabling", [BadVal]),
undefined;
undefined ->
undefined
end,
SavedHandlers =
case application:get_env(lager, error_logger_redirect) of
{ok, false} ->
@ -64,7 +74,7 @@ start(_StartType, _StartArgs) ->
WhiteList
end,
case supervisor:start_child(lager_handler_watcher_sup, [error_logger, error_logger_lager_h, []]) of
case supervisor:start_child(lager_handler_watcher_sup, [error_logger, error_logger_lager_h, [HighWaterMark]]) of
{ok, _} ->
[begin error_logger:delete_report_handler(X), X end ||
X <- gen_event:which_handlers(error_logger) -- [error_logger_lager_h | WhiteList]];

Loading…
取消
儲存