소스 검색

Implement configurable error_logger msg drop threshold

Implement a new config option error_logger_hwm, which is a number
representing how many messages per second we should log from the
error_logger. If that threshold is exceeded, messages will be discarded
for the remainder of that second.

This is only effective if lager itself can process the messages fast
enough to satisfy the threshold. If your threshold is 1000 and lager
itself is only writing 100 messages a second (because error messages are
causing fsyncs or whatever) you'll never exceed the threshold and drops
will never happen. Thus care needs to be taken when selecting this
feature.

Setting it low is not as bad as it might seem, because when using lager,
hopefully error_logger messages are unusual. In my testing, 50/second
with the default config seemed reasonable (which has 2 file backends
installed, both of which fsync on messages at error or above).
pull/116/head
Andrew Thompson 12 년 전
부모
커밋
ef691d8947
2개의 변경된 파일84개의 추가작업 그리고 14개의 파일을 삭제
  1. +73
    -13
      src/error_logger_lager_h.erl
  2. +11
    -1
      src/lager_app.erl

+ 73
- 13
src/error_logger_lager_h.erl 파일 보기

@ -32,6 +32,17 @@
-export([format_reason/1]).
-record(state, {
%% how many messages per second we try to deliver
hwm = undefined :: 'undefined' | pos_integer(),
%% how many messages we've received this second
mps = 0 :: non_neg_integer(),
%% the current second
lasttime = os:timestamp() :: os:timestamp(),
%% count of dropped messages this second
dropped = 0 :: non_neg_integer()
}).
-define(LOGMSG(Level, Pid, Msg),
case ?SHOULD_LOG(Level) of
true ->
@ -58,13 +69,73 @@
-endif.
-spec init(any()) -> {ok, {}}.
init(_) ->
{ok, {}}.
init([HighWaterMark]) ->
{ok, #state{hwm=HighWaterMark}}.
handle_call(_Request, State) ->
{ok, ok, State}.
handle_event(Event, State) ->
case check_hwm(State) of
{true, NewState} ->
log_event(Event, NewState);
{false, NewState} ->
{ok, NewState}
end.
handle_info(_Info, State) ->
{ok, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%% internal functions
check_hwm(State = #state{hwm = undefined}) ->
{true, State};
check_hwm(State = #state{mps = Mps, hwm = Hwm}) when Mps < Hwm ->
%% haven't hit high water mark yet, just log it
{true, State#state{mps=Mps+1}};
check_hwm(State = #state{hwm = Hwm, lasttime = Last, dropped = Drop}) ->
%% are we still in the same second?
{M, S, _} = Now = os:timestamp(),
case Last of
{M, S, _} ->
%% still in same second, but have exceeded the high water mark
NewDrops = discard_messages(Now, 0),
%io:format("dropped ~p messages~n", [NewDrops]),
{false, State#state{dropped=Drop+NewDrops}};
_ ->
%% different second, reset all counters and allow it
%% TODO - do we care if the clock goes backwards?
case Drop > 0 of
true ->
?LOGFMT(warning, self(), "lager_error_logger_h dropped ~p messages in the last second that exceeded the limit of ~p messages/sec",
[Drop, Hwm]);
false ->
ok
end,
{true, State#state{dropped = 0, mps=1, lasttime = Now}}
end.
discard_messages(Second, Count) ->
{M, S, _} = os:timestamp(),
case Second of
{M, S, _} ->
receive
_Msg ->
discard_messages(Second, Count+1)
after 0 ->
Count
end;
_ ->
Count
end.
log_event(Event, State) ->
case Event of
{error, _GL, {Pid, Fmt, Args}} ->
case Fmt of
@ -143,17 +214,6 @@ handle_event(Event, State) ->
end,
{ok, State}.
handle_info(_Info, State) ->
{ok, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%% internal functions
format_crash_report(Report, Neighbours) ->
Name = case proplists:get_value(registered_name, Report, []) of
[] ->

+ 11
- 1
src/lager_app.erl 파일 보기

@ -52,6 +52,16 @@ start(_StartType, _StartArgs) ->
{_, Traces} = lager_config:get(loglevel),
lager_config:set(loglevel, {MinLog, Traces}),
HighWaterMark = case application:get_env(lager, error_logger_hwm) of
{ok, HwmVal} when is_integer(HwmVal), HwmVal > 0 ->
HwmVal;
{ok, BadVal} ->
_ = lager:log(warning, self(), "Invalid error_logger high water mark: ~p, disabling", [BadVal]),
undefined;
undefined ->
undefined
end,
SavedHandlers =
case application:get_env(lager, error_logger_redirect) of
{ok, false} ->
@ -64,7 +74,7 @@ start(_StartType, _StartArgs) ->
WhiteList
end,
case supervisor:start_child(lager_handler_watcher_sup, [error_logger, error_logger_lager_h, []]) of
case supervisor:start_child(lager_handler_watcher_sup, [error_logger, error_logger_lager_h, [HighWaterMark]]) of
{ok, _} ->
[begin error_logger:delete_report_handler(X), X end ||
X <- gen_event:which_handlers(error_logger) -- [error_logger_lager_h | WhiteList]];

불러오는 중...
취소
저장