From 48005453523b03bb52f05c5c635192c6e05dbef6 Mon Sep 17 00:00:00 2001 From: Ted Burghart Date: Fri, 20 Jan 2017 18:06:11 -0500 Subject: [PATCH 1/2] Fix `async_threshold_test_` flappiness. On final analysis, it appears the effective fix is clearing out leftover lager state in the application controller. Along the way, the test body was modified to account for the runtime system, and as this doesn't seem like a bad thing it's left in place. This commit also moves `otp_version/0` to `lager_util`'s public API, as it may be helpful in future tests that check for version-specific messages (see issue #383). --- src/lager_util.erl | 27 +++-- test/lager_test_backend.erl | 205 +++++++++++++++++++++--------------- 2 files changed, 141 insertions(+), 91 deletions(-) diff --git a/src/lager_util.erl b/src/lager_util.erl index 468baa0..090d45f 100644 --- a/src/lager_util.erl +++ b/src/lager_util.erl @@ -22,12 +22,15 @@ -include_lib("kernel/include/file.hrl"). --export([levels/0, level_to_num/1, level_to_chr/1, - num_to_level/1, config_to_mask/1, config_to_levels/1, mask_to_levels/1, - open_logfile/2, ensure_logfile/4, rotate_logfile/2, format_time/0, format_time/1, - localtime_ms/0, localtime_ms/1, maybe_utc/1, parse_rotation_date_spec/1, - calculate_next_rotation/1, validate_trace/1, check_traces/4, is_loggable/3, - trace_filter/1, trace_filter/2, expand_path/1, check_hwm/1, make_internal_sink_name/1]). +-export([ + levels/0, level_to_num/1, level_to_chr/1, + num_to_level/1, config_to_mask/1, config_to_levels/1, mask_to_levels/1, + open_logfile/2, ensure_logfile/4, rotate_logfile/2, format_time/0, format_time/1, + localtime_ms/0, localtime_ms/1, maybe_utc/1, parse_rotation_date_spec/1, + calculate_next_rotation/1, validate_trace/1, check_traces/4, is_loggable/3, + trace_filter/1, trace_filter/2, expand_path/1, check_hwm/1, + make_internal_sink_name/1, otp_version/0 +]). -ifdef(TEST). -export([create_test_dir/0, delete_test_dir/1]). @@ -544,6 +547,18 @@ make_internal_sink_name(lager) -> make_internal_sink_name(Sink) -> list_to_atom(atom_to_list(Sink) ++ "_lager_event"). +-spec otp_version() -> pos_integer(). +%% @doc Return the major version of the current Erlang/OTP runtime as an integer. +otp_version() -> + {Vsn, _} = string:to_integer( + case erlang:system_info(otp_release) of + [$R | Rel] -> + Rel; + Rel -> + Rel + end), + Vsn. + -ifdef(TEST). parse_test() -> diff --git a/test/lager_test_backend.erl b/test/lager_test_backend.erl index 36409a3..c2ba183 100644 --- a/test/lager_test_backend.erl +++ b/test/lager_test_backend.erl @@ -1,6 +1,6 @@ %% ------------------------------------------------------------------- %% -%% Copyright (c) 2011-2015 Basho Technologies, Inc. +%% Copyright (c) 2011-2017 Basho Technologies, Inc. %% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file @@ -20,23 +20,38 @@ -module(lager_test_backend). --include("lager.hrl"). - -behaviour(gen_event). -export([init/1, handle_call/2, handle_event/2, handle_info/2, terminate/2, code_change/3]). +-include("lager.hrl"). + -define(TEST_SINK_NAME, '__lager_test_sink'). %% <-- used by parse transform -define(TEST_SINK_EVENT, '__lager_test_sink_lager_event'). %% <-- used by lager API calls and internals for gen_event --record(state, {level :: list(), buffer :: list(), ignored :: term()}). +-record(state, { + level :: list(), + buffer :: list(), + ignored :: term() +}). -compile({parse_transform, lager_transform}). -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). --record(test, {attrs :: list(), format :: list(), args :: list()}). --export([pop/0, count/0, count_ignored/0, flush/0, print_state/0]). +-record(test, { + attrs :: list(), + format :: list(), + args :: list() +}). +-export([ + count/0, + count_ignored/0, + flush/0, + message_stuffer/3, + pop/0, + print_state/0 +]). -endif. init(Level) -> @@ -137,23 +152,13 @@ has_line_numbers() -> % this gets called a LOT - cache the answer case erlang:get({?MODULE, has_line_numbers}) of undefined -> - R = otp_version() >= 15, + R = lager_util:otp_version() >= 15, erlang:put({?MODULE, has_line_numbers}, R), R; Bool -> Bool end. -otp_version() -> - otp_version(erlang:system_info(otp_release)). - -otp_version([$R | Rel]) -> - {Ver, _} = string:to_integer(Rel), - Ver; -otp_version(Rel) -> - {Ver, _} = string:to_integer(Rel), - Ver. - not_running_test() -> ?assertEqual({error, lager_not_running}, lager:log(info, self(), "not running")). @@ -900,7 +905,7 @@ error_logger_redirect_crash_cleanup(_Sink) -> error_logger:tty(true). error_logger_redirect_crash_test_() -> - TestBody=fun(Name,CrashReason,Expected) -> + TestBody=fun(Name,CrashReason,Expected) -> fun(Sink) -> {Name, fun() -> @@ -915,7 +920,7 @@ error_logger_redirect_crash_test_() -> end end, Tests = [ - fun(Sink) -> + fun(Sink) -> {"again, there is nothing up my sleeve", fun() -> ?assertEqual(undefined, pop(Sink)), @@ -943,7 +948,7 @@ error_logger_redirect_crash_test_() -> TestBody("badfun",badfun,"gen_server crash terminated with reason: bad function booger in crash:handle_call/3") ], {"Error logger redirect crash", [ - {"Redirect to default sink", + {"Redirect to default sink", {foreach, fun error_logger_redirect_crash_setup/0, fun error_logger_redirect_crash_cleanup/1, @@ -1520,7 +1525,7 @@ error_logger_redirect_test_() -> end, Tests), {"Error logger redirect", [ - {"Redirect to default sink", + {"Redirect to default sink", {foreach, fun error_logger_redirect_setup/0, fun error_logger_redirect_cleanup/1, @@ -1543,77 +1548,107 @@ unsafe_format_test() -> ok. async_threshold_test_() -> - {foreach, + Cleanup = fun(Reset) -> + _ = error_logger:tty(false), + _ = application:stop(lager), + _ = application:stop(goldrush), + _ = application:unset_env(lager, async_threshold), + if + Reset -> + true = ets:delete(async_threshold_test), + error_logger:tty(true); + true -> + _ = (catch ets:delete(async_threshold_test)), + ok + end + end, + Setup = fun() -> + % Evidence suggests that previous tests somewhere are leaving some of this stuff + % loaded, and cleaning it out forcefully to allows the test to succeed. + _ = Cleanup(false), + _ = ets:new(async_threshold_test, [set, named_table, public]), + ?assertEqual(true, ets:insert_new(async_threshold_test, {sync_toggled, 0})), + ?assertEqual(true, ets:insert_new(async_threshold_test, {async_toggled, 0})), + _ = application:load(lager), + ok = application:set_env(lager, error_logger_redirect, false), + ok = application:set_env(lager, async_threshold, 2), + ok = application:set_env(lager, async_threshold_window, 1), + ok = application:set_env(lager, handlers, [{?MODULE, info}]), + ok = lager:start(), + true + end, + {foreach, Setup, Cleanup, [ + {"async threshold works", fun() -> - error_logger:tty(false), - ets:new(async_threshold_test, [set, named_table, public]), - ets:insert_new(async_threshold_test, {sync_toggled, 0}), - ets:insert_new(async_threshold_test, {async_toggled, 0}), - application:load(lager), - application:set_env(lager, error_logger_redirect, false), - application:set_env(lager, async_threshold, 2), - application:set_env(lager, async_threshold_window, 1), - application:set_env(lager, handlers, [{?MODULE, info}]), - lager:start() - end, - fun(_) -> - application:unset_env(lager, async_threshold), - application:stop(lager), - application:stop(goldrush), - ets:delete(async_threshold_test), - error_logger:tty(true) - end, - [ - {"async threshold works", - fun() -> - %% we start out async - ?assertEqual(true, lager_config:get(async)), - - %% put a ton of things in the queue - Workers = [spawn_monitor(fun() -> [lager:info("hello world") || _ <- lists:seq(1, 100)] end) - || _ <- lists:seq(1, 10)], - - %% serialize on mailbox - _ = gen_event:which_handlers(lager_event), - timer:sleep(500), - - %% By now the flood of messages will have - %% forced the backend throttle to turn off - %% async mode, but it's possible all - %% outstanding requests have been processed, - %% so checking the current status (sync or - %% async) is an exercise in race control. - - %% Instead, we'll see whether the backend - %% throttle has toggled into sync mode at any - %% point in the past - ?assertMatch([{sync_toggled, N}] when N > 0, - ets:lookup(async_threshold_test, sync_toggled)), - %% wait for all the workers to return, meaning that all - %% the messages have been logged (since we're - %% definitely in sync mode at the end of the run) - collect_workers(Workers), - %% serialize on the mailbox again - _ = gen_event:which_handlers(lager_event), - %% just in case... - timer:sleep(1000), - lager:info("hello world"), - _ = gen_event:which_handlers(lager_event), + %% we start out async + ?assertEqual(true, lager_config:get(async)), + ?assertEqual([{sync_toggled, 0}], + ets:lookup(async_threshold_test, sync_toggled)), + + %% put a ton of things in the queue + WorkCnt = erlang:max(10, (erlang:system_info(schedulers) * 2)), + OtpVsn = lager_util:otp_version(), + % newer OTPs *may* handle the messages faster, so we'll send more + MsgCnt = ((OtpVsn * OtpVsn) div 2), + Workers = spawn_stuffers(WorkCnt, [MsgCnt, info, "hello world"], []), + + %% serialize on mailbox + _ = gen_event:which_handlers(lager_event), + timer:sleep(500), + + %% By now the flood of messages should have forced the backend throttle + %% to turn off async mode, but it's possible all outstanding requests + %% have been processed, so checking the current status (sync or async) + %% is an exercise in race control. + %% Instead, we'll see whether the backend throttle has toggled into sync + %% mode at any point in the past. + ?assertMatch([{sync_toggled, N}] when N > 0, + ets:lookup(async_threshold_test, sync_toggled)), + + %% Wait for all the workers to return, meaning that all the messages have + %% been logged (since we're definitely in sync mode at the end of the run). + collect_workers(Workers), + + %% serialize on the mailbox again + _ = gen_event:which_handlers(lager_event), + + %% just in case... + timer:sleep(1000), + lager:info("hello world"), + _ = gen_event:which_handlers(lager_event), + + %% async is true again now that the mailbox has drained + ?assertEqual(true, lager_config:get(async)), + ok + end} + ]}. - %% async is true again now that the mailbox has drained - ?assertEqual(true, lager_config:get(async)), - ok - end - } - ] - }. +% Fire off the stuffers with minimal resource overhead - speed is of the essence. +spawn_stuffers(0, _, Refs) -> + % Attempt to return them in about the order that they'll finish. + lists:reverse(Refs); +spawn_stuffers(N, Args, Refs) -> + {_Pid, Ref} = erlang:spawn_monitor(?MODULE, message_stuffer, Args), + spawn_stuffers((N - 1), Args, [Ref | Refs]). + +% Spawned process to stuff N copies of Message into lager's message queue as fast as possible. +% Skip using a list function for speed and low memory footprint - don't want to take the +% resources to create a sequence (or pass one in). +message_stuffer(N, Level, Message) -> + message_stuffer_(N, Level, [{pid, erlang:self()}], Message). + +message_stuffer_(0, _, _, _) -> + ok; +message_stuffer_(N, Level, Meta, Message) -> + lager:log(Level, Meta, Message), + message_stuffer_((N - 1), Level, Meta, Message). collect_workers([]) -> ok; -collect_workers(Workers) -> +collect_workers([Ref | Refs]) -> receive {'DOWN', Ref, _, _, _} -> - collect_workers(lists:keydelete(Ref, 2, Workers)) + collect_workers(Refs) end. produce_n_error_logger_msgs(N) -> From d6625df6842fd4583df5bcb543db26e92d90198f Mon Sep 17 00:00:00 2001 From: Ted Burghart Date: Sat, 21 Jan 2017 07:10:02 -0500 Subject: [PATCH 2/2] Fixes another flappy test, this time lager_file_backend:trace_files_test_. --- src/lager_file_backend.erl | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/lager_file_backend.erl b/src/lager_file_backend.erl index 21fddcf..2ec4cb0 100644 --- a/src/lager_file_backend.erl +++ b/src/lager_file_backend.erl @@ -971,13 +971,27 @@ trace_files_test_() -> re:split(BinInfo2, "\n", [{return, list}, {parts, 2}])), ?assert(filelib:is_regular(Debug)), %% XXX Aughhhh, wish I could force this to flush somehow... - timer:sleep(1000), - {ok, BinInfo3} = file:read_file(Debug), - ?assertEqual(2, length(re:split(BinInfo3, "\n", [{return, list}, trim]))) + % should take about 1 second, try for 3 ... + ?assertEqual(2, count_lines_until(2, add_secs(os:timestamp(), 3), Debug, 0)) end} end ]}. +count_lines_until(Lines, Timeout, File, Last) -> + case timer:now_diff(Timeout, os:timestamp()) > 0 of + true -> + timer:sleep(333), + {ok, Bin} = file:read_file(File), + case erlang:length(re:split(Bin, "\n", [{return, list}, trim])) of + Count when Count < Lines -> + count_lines_until(Lines, Timeout, File, Count); + Count -> + Count + end; + _ -> + Last + end. + formatting_test_() -> {foreach, fun() ->