From 4680b7f8eb253602809846dadb0a1a846d9658c9 Mon Sep 17 00:00:00 2001 From: Lajos Gerecs Date: Thu, 11 Apr 2019 17:13:58 +0200 Subject: [PATCH 1/2] Fix for incorrectly counting dropped messages Lager changes the error level of supervisor progress messages from info to debug. This is done after the hwm checking changes the received message count. This PR implements a fix by removing one from the count if no message was sent. Closes: #496 --- src/error_logger_lager_h.erl | 125 ++++++++++++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 9 deletions(-) diff --git a/src/error_logger_lager_h.erl b/src/error_logger_lager_h.erl index 72ff9d9..733e0d7 100644 --- a/src/error_logger_lager_h.erl +++ b/src/error_logger_lager_h.erl @@ -45,16 +45,16 @@ case ?SHOULD_LOG(Sink, Level) of true -> _ =lager:log(Sink, Level, Pid, Msg, []), - ok; - _ -> ok + logged; + _ -> no_log end). -define(LOGFMT(Sink, Level, Pid, Fmt, Args), case ?SHOULD_LOG(Sink, Level) of true -> _ = lager:log(Sink, Level, Pid, Fmt, Args), - ok; - _ -> ok + logged; + _ -> no_log end). -ifdef(TEST). @@ -179,7 +179,7 @@ eval_gl(Event, State) -> log_event(Event, State). log_event(Event, #state{sink=Sink} = State) -> - case Event of + DidLog = case Event of {error, _GL, {Pid, Fmt, Args}} -> FormatRaw = State#state.raw, case {FormatRaw, Fmt} of @@ -316,7 +316,7 @@ log_event(Event, #state{sink=Sink} = State) -> [{application, App}, {exited, Reason}, {type, _Type}] -> case application:get_env(lager, suppress_application_start_stop) of {ok, true} when Reason == stopped -> - ok; + no_log; _ -> {Md, Formatted} = format_reason_md(Reason), ?LOGFMT(Sink, info, [{pid, Pid} | Md], "Application ~w exited with reason: ~s", @@ -333,7 +333,7 @@ log_event(Event, #state{sink=Sink} = State) -> [{application, App}, {started_at, Node}] -> case application:get_env(lager, suppress_application_start_stop) of {ok, true} -> - ok; + no_log; _ -> ?LOGFMT(Sink, info, P, "Application ~w started on node ~w", [App, Node]) @@ -341,7 +341,7 @@ log_event(Event, #state{sink=Sink} = State) -> [{started, Started}, {supervisor, Name}] -> case application:get_env(lager, suppress_supervisor_start_stop, false) of true -> - ok; + no_log; _ -> MFA = format_mfa(get_value(mfargs, Started)), Pid = get_value(pid, Started), @@ -354,7 +354,20 @@ log_event(Event, #state{sink=Sink} = State) -> _ -> ?LOGFMT(Sink, warning, self(), "Unexpected error_logger event ~w", [Event]) end, - {ok, State}. + case DidLog of + logged -> + {ok, State}; + no_log -> + Shaper = State#state.shaper, + {ok, State#state{ + shaper = Shaper#lager_shaper{ + mps = Shaper#lager_shaper.mps - 1 + } + }}; + Invalid -> + ?LOGFMT(Sink, error, self(), "Unexpeted log result: ~p", [Invalid]), + {ok, State} + end. format_crash_report(Report, Neighbours) -> Name = case get_value(registered_name, Report, []) of @@ -646,4 +659,98 @@ no_silent_hwm_drops_test_() -> ] }. +shaper_does_not_forward_sup_progress_messages_to_info_level_backend_test_() -> + {timeout, 10000, + [fun() -> + error_logger:tty(false), + application:load(lager), + application:set_env(lager, handlers, [{lager_test_backend, info}]), + application:set_env(lager, error_logger_redirect, true), + application:set_env(lager, error_logger_hwm, 5), + application:set_env(lager, suppress_supervisor_start_stop, false), + application:set_env(lager, suppress_application_start_stop, false), + application:unset_env(lager, crash_log), + lager:start(), + try + PidPlaceholder = self(), + SupervisorMsg = + [{supervisor, {PidPlaceholder,rabbit_connection_sup}}, + {started, + [{pid, PidPlaceholder}, + {name,helper_sup}, + {mfargs, + {rabbit_connection_helper_sup,start_link,[]}}, + {restart_type,intrinsic}, + {shutdown,infinity}, + {child_type,supervisor}]}], + ApplicationExit = + [{application, error_logger_lager_h_test}, + {exited, stopped}, + {type, permanent}], + + error_logger:info_report("This is not a progress message"), + error_logger:info_report(ApplicationExit), + [error_logger:info_report(progress, SupervisorMsg) || _K <- lists:seq(0, 100)], + error_logger:info_report("This is not a progress message 2"), + + timer:sleep(1000), + + 3 = lager_test_backend:count(), + 0 = lager_test_backend:count_ignored() % it's not forwarded at all + after + application:stop(lager), + application:stop(goldrush), + error_logger:tty(true) + end + end + ] + }. + +supressed_messages_are_not_counted_for_hwm_test_() -> + {timeout, 10000, + [fun() -> + error_logger:tty(false), + application:load(lager), + application:set_env(lager, handlers, [{lager_test_backend, debug}]), + application:set_env(lager, error_logger_redirect, true), + application:set_env(lager, error_logger_hwm, 5), + application:set_env(lager, suppress_supervisor_start_stop, true), + application:set_env(lager, suppress_application_start_stop, true), + application:unset_env(lager, crash_log), + lager:start(), + try + PidPlaceholder = self(), + SupervisorMsg = + [{supervisor, {PidPlaceholder,rabbit_connection_sup}}, + {started, + [{pid, PidPlaceholder}, + {name,helper_sup}, + {mfargs, + {rabbit_connection_helper_sup,start_link,[]}}, + {restart_type,intrinsic}, + {shutdown,infinity}, + {child_type,supervisor}]}], + ApplicationExit = + [{application, error_logger_lager_h_test}, + {exited, stopped}, + {type, permanent}], + + lager_test_backend:flush(), + error_logger:info_report("This is not a progress message"), + [error_logger:info_report(ApplicationExit) || _K <- lists:seq(0, 100)], + [error_logger:info_report(progress, SupervisorMsg) || _K <- lists:seq(0, 100)], + error_logger:info_report("This is not a progress message 2"), + + timer:sleep(1000), + + 2 = lager_test_backend:count(), + 0 = lager_test_backend:count_ignored() + after + application:stop(lager), + application:stop(goldrush), + error_logger:tty(true) + end + end + ] + }. -endif. From 7549c85416fd72bb63e3f96eac9130f580707351 Mon Sep 17 00:00:00 2001 From: Luke Bakken Date: Tue, 30 Apr 2019 09:22:04 -0700 Subject: [PATCH 2/2] Fix test Retry strategy for flaky tests --- src/error_logger_lager_h.erl | 74 +++++++++++++++++++++++++---------- src/lager_handler_watcher.erl | 1 + test/lager_test_backend.erl | 27 ++++++++++++- 3 files changed, 80 insertions(+), 22 deletions(-) diff --git a/src/error_logger_lager_h.erl b/src/error_logger_lager_h.erl index 733e0d7..1dcc89e 100644 --- a/src/error_logger_lager_h.erl +++ b/src/error_logger_lager_h.erl @@ -314,8 +314,8 @@ log_event(Event, #state{sink=Sink} = State) -> Details = lists:sort(D), case Details of [{application, App}, {exited, Reason}, {type, _Type}] -> - case application:get_env(lager, suppress_application_start_stop) of - {ok, true} when Reason == stopped -> + case application:get_env(lager, suppress_application_start_stop, false) of + true when Reason == stopped -> no_log; _ -> {Md, Formatted} = format_reason_md(Reason), @@ -331,8 +331,8 @@ log_event(Event, #state{sink=Sink} = State) -> Details = lists:sort(D), case Details of [{application, App}, {started_at, Node}] -> - case application:get_env(lager, suppress_application_start_stop) of - {ok, true} -> + case application:get_env(lager, suppress_application_start_stop, false) of + true -> no_log; _ -> ?LOGFMT(Sink, info, P, "Application ~w started on node ~w", @@ -640,16 +640,12 @@ no_silent_hwm_drops_test_() -> try {_, _, MS} = os:timestamp(), timer:sleep((1000000 - MS) div 1000 + 1), - %start close to the beginning of a new second + % start close to the beginning of a new second [error_logger:error_msg("Foo ~p~n", [K]) || K <- lists:seq(1, 15)], - timer:sleep(1000), - lager_handler_watcher:pop_until("lager_error_logger_h dropped 10 messages in the last second that exceeded the limit of 5 messages/sec", - fun lists:flatten/1), - %and once again + wait_for_message("lager_error_logger_h dropped 10 messages in the last second that exceeded the limit of 5 messages/sec", 100, 50), + % and once again [error_logger:error_msg("Foo1 ~p~n", [K]) || K <- lists:seq(1, 20)], - timer:sleep(1000), - lager_handler_watcher:pop_until("lager_error_logger_h dropped 15 messages in the last second that exceeded the limit of 5 messages/sec", - fun lists:flatten/1) + wait_for_message("lager_error_logger_h dropped 15 messages in the last second that exceeded the limit of 5 messages/sec", 100, 50) after application:stop(lager), application:stop(goldrush), @@ -693,10 +689,12 @@ shaper_does_not_forward_sup_progress_messages_to_info_level_backend_test_() -> [error_logger:info_report(progress, SupervisorMsg) || _K <- lists:seq(0, 100)], error_logger:info_report("This is not a progress message 2"), - timer:sleep(1000), - - 3 = lager_test_backend:count(), - 0 = lager_test_backend:count_ignored() % it's not forwarded at all + % Note: this gets logged in slow environments: + % Application lager started on node nonode@nohost + wait_for_count(fun lager_test_backend:count/0, [3, 4], 100, 50), + % Note: this debug msg gets ignored in slow environments: + % Lager installed handler lager_test_backend into lager_event + wait_for_count(fun lager_test_backend:count_ignored/0, [0, 1], 100, 50) after application:stop(lager), application:stop(goldrush), @@ -741,10 +739,8 @@ supressed_messages_are_not_counted_for_hwm_test_() -> [error_logger:info_report(progress, SupervisorMsg) || _K <- lists:seq(0, 100)], error_logger:info_report("This is not a progress message 2"), - timer:sleep(1000), - - 2 = lager_test_backend:count(), - 0 = lager_test_backend:count_ignored() + wait_for_count(fun lager_test_backend:count/0, 2, 100, 50), + wait_for_count(fun lager_test_backend:count_ignored/0, 0, 100, 50) after application:stop(lager), application:stop(goldrush), @@ -753,4 +749,42 @@ supressed_messages_are_not_counted_for_hwm_test_() -> end ] }. + +wait_for_message(Expected, Tries, Sleep) -> + maybe_find_expected_message(lager_test_backend:get_buffer(), Expected, Tries, Sleep). + +maybe_find_expected_message(_Buffer, Expected, 0, _Sleep) -> + throw({not_found, Expected}); +maybe_find_expected_message([], Expected, Tries, Sleep) -> + timer:sleep(Sleep), + maybe_find_expected_message(lager_test_backend:get_buffer(), Expected, Tries - 1, Sleep); +maybe_find_expected_message([{_Severity, _Date, Msg, _Metadata}|T], Expected, Tries, Sleep) -> + case lists:flatten(Msg) of + Expected -> + ok; + _ -> + maybe_find_expected_message(T, Expected, Tries, Sleep) + end. + +wait_for_count(Fun, _Expected, 0, _Sleep) -> + Actual = Fun(), + Msg = io_lib:format("wait_for_count: fun ~p final value: ~p~n", [Fun, Actual]), + throw({failed, Msg}); +wait_for_count(Fun, Expected, Tries, Sleep) when is_list(Expected) -> + Actual = Fun(), + case lists:member(Actual, Expected) of + true -> + ok; + false -> + timer:sleep(Sleep), + wait_for_count(Fun, Expected, Tries - 1, Sleep) + end; +wait_for_count(Fun, Expected, Tries, Sleep) -> + case Fun() of + Expected -> + ok; + _ -> + timer:sleep(Sleep), + wait_for_count(Fun, Expected, Tries - 1, Sleep) + end. -endif. diff --git a/src/lager_handler_watcher.erl b/src/lager_handler_watcher.erl index 1f4cc06..73d5a41 100644 --- a/src/lager_handler_watcher.erl +++ b/src/lager_handler_watcher.erl @@ -27,6 +27,7 @@ -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-export([pop_until/2]). -endif. %% callbacks diff --git a/test/lager_test_backend.erl b/test/lager_test_backend.erl index cb4a520..69489b9 100644 --- a/test/lager_test_backend.erl +++ b/test/lager_test_backend.erl @@ -50,7 +50,9 @@ flush/0, message_stuffer/3, pop/0, - print_state/0 + pop_ignored/0, + print_state/0, + get_buffer/0 ]). -endif. @@ -70,6 +72,15 @@ handle_call(pop, #state{buffer=Buffer} = State) -> [H|T] -> {ok, H, State#state{buffer=T}} end; +handle_call(pop_ignored, #state{ignored=Ignored} = State) -> + case Ignored of + [] -> + {ok, undefined, State}; + [H|T] -> + {ok, H, State#state{ignored=T}} + end; +handle_call(get_buffer, #state{buffer=Buffer} = State) -> + {ok, Buffer, State}; handle_call(get_loglevel, #state{level=Level} = State) -> {ok, Level, State}; handle_call({set_loglevel, Level}, State) -> @@ -94,7 +105,7 @@ handle_event({log, Msg}, lager_msg:datetime(Msg), lager_msg:message(Msg), lager_msg:metadata(Msg)}]}}; _ -> - {ok, State#state{ignored=Ignored ++ [ignored]}} + {ok, State#state{ignored=Ignored ++ [Msg]}} end; handle_event(_Event, State) -> {ok, State}. @@ -113,6 +124,12 @@ code_change(_OldVsn, State, _Extra) -> pop() -> pop(lager_event). +pop_ignored() -> + pop_ignored(lager_event). + +get_buffer() -> + get_buffer(lager_event). + count() -> count(lager_event). @@ -131,6 +148,12 @@ print_bad_state() -> pop(Sink) -> gen_event:call(Sink, ?MODULE, pop). +pop_ignored(Sink) -> + gen_event:call(Sink, ?MODULE, pop_ignored). + +get_buffer(Sink) -> + gen_event:call(Sink, ?MODULE, get_buffer). + count(Sink) -> gen_event:call(Sink, ?MODULE, count).