From a2f10e31cfc8340dd69339783e95311f7f455231 Mon Sep 17 00:00:00 2001 From: Fred Hebert Date: Tue, 12 May 2020 20:38:36 +0000 Subject: [PATCH] Fix DAG pruning perf for extra_src_dirs We run the analysis for an extra_src_dir by making a fake app for it, but send in the app alone for analysis. In there, the DAG pruning routine looks at the DAG and files submitted and goes "this right here is a project with 99% of its apps deleted". It then tries to prune the whole DAG except for some test files. The code "works" simply because there's a false-positive check that makes sure the file is on disk before removing it for the DAG. This ends up making extra runs where ~80% of the time is spent double-checking the false positives for file deletions. This commit fixes this by merging in all extra_src fake apps and making them run in a single analysis phase, meaning we only pay the cost of the DAG pruning once for the whole project, making it faster than any sparse repo. There's also a small patch needed for the root-level extra src dirs; turns out that since the context-handling in the `rebar_compiler` uses a map to store content, running single-pass analysis clobbered entries for a given app if they had more than one extra_src_dir in there. I also took the time to clean up the ordering of that file. --- src/rebar_compiler.erl | 147 +++++++++++++++++++++---------------- src/rebar_compiler_dag.erl | 43 +++++++---- src/rebar_prv_compile.erl | 16 +++- 3 files changed, 125 insertions(+), 81 deletions(-) diff --git a/src/rebar_compiler.erl b/src/rebar_compiler.erl index 3d7c16ad..f4b4c472 100644 --- a/src/rebar_compiler.erl +++ b/src/rebar_compiler.erl @@ -1,6 +1,7 @@ -module(rebar_compiler). -export([analyze_all/2, + analyze_all_extras/2, compile_analyzed/3, compile_all/2, clean/2, @@ -61,7 +62,7 @@ analyze_all({Compiler, G}, Apps) -> OutExt = maps:get(artifact_exts, Contexts), rebar_compiler_dag:prune( - G, SrcExt, OutExt, lists:append(AbsSources), AppOutPaths + G, SrcExt, OutExt, lists:append(AbsSources), lists:append(AppOutPaths) ), rebar_compiler_dag:populate_deps(G, SrcExt, OutExt), rebar_compiler_dag:propagate_stamps(G), @@ -72,6 +73,78 @@ analyze_all({Compiler, G}, Apps) -> AppNames = rebar_compiler_dag:compile_order(G, AppPaths), {Contexts, sort_apps(AppNames, Apps)}. +%% @doc same as analyze_all/2, but over extra_src_apps, +%% which are a big cheat. +-spec analyze_all_extras(DAG, [App, ...]) -> {map(), [App]} when + DAG :: {module(), digraph:graph()}, + App :: rebar_app_info:t(). +analyze_all_extras(DAG, Apps) -> + case lists:append([annotate_extras(App) || App <- Apps]) of + [] -> {#{}, []}; + ExtraApps -> analyze_all(DAG, ExtraApps) + end. + +-spec compile_analyzed({module(), digraph:graph()}, rebar_app_info:t(), map()) -> ok. +compile_analyzed({Compiler, G}, AppInfo, Contexts) -> % > 3.13.2 + run(G, Compiler, AppInfo, Contexts), + ok. + +-spec compile_all([module(), ...], rebar_app_info:t()) -> ok. +compile_all(Compilers, AppInfo) -> % =< 3.13.0 interface; plugins use this! + %% Support the old-style API by re-declaring a local DAG for the + %% compile steps needed. + lists:foreach(fun(Compiler) -> + OutDir = rebar_app_info:out_dir(AppInfo), + G = rebar_compiler_dag:init(OutDir, Compiler, undefined, []), + {Ctx, _} = analyze_all({Compiler, G}, [AppInfo]), + compile_analyzed({Compiler, G}, AppInfo, Ctx), + rebar_compiler_dag:maybe_store(G, OutDir, Compiler, undefined, []), + rebar_compiler_dag:terminate(G) + end, Compilers). + +%% @doc remove compiled artifacts from an AppDir. +-spec clean([module()], rebar_app_info:t()) -> 'ok'. +clean(Compilers, AppInfo) -> + lists:foreach(fun(CompilerMod) -> + clean_(CompilerMod, AppInfo, undefined), + Extras = annotate_extras(AppInfo), + [clean_(CompilerMod, ExtraApp, "extra") || ExtraApp <- Extras] + end, Compilers). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% COMPILER UTIL EXPORTS %%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% These functions are here for the ultimate goal of getting rid of +%% rebar_base_compiler. This can't be done because of existing plugins. + +-spec needs_compile(filename:all(), extension(), [{extension(), file:dirname()}]) -> boolean(). +needs_compile(Source, OutExt, Mappings) -> + Ext = filename:extension(Source), + BaseName = filename:basename(Source, Ext), + {_, OutDir} = lists:keyfind(OutExt, 1, Mappings), + Target = filename:join(OutDir, BaseName++OutExt), + filelib:last_modified(Source) > filelib:last_modified(Target). + +ok_tuple(Source, Ws) -> + rebar_base_compiler:ok_tuple(Source, Ws). + +error_tuple(Source, Es, Ws, Opts) -> + rebar_base_compiler:error_tuple(Source, Es, Ws, Opts). + +maybe_report(Reportable) -> + rebar_base_compiler:maybe_report(Reportable). + +format_error_source(Path, Opts) -> + rebar_base_compiler:format_error_source(Path, Opts). + +report(Messages) -> + rebar_base_compiler:report(Messages). + +%%%%%%%%%%%%%%% +%%% PRIVATE %%% +%%%%%%%%%%%%%%% + gather_contexts(Compiler, Apps) -> Default = default_ctx(), Contexts = [{rebar_app_info:name(AppInfo), @@ -121,37 +194,14 @@ analyze_app({Compiler, G}, Contexts, AppInfo) -> rebar_compiler_dag:populate_sources( G, Compiler, InDirs, AbsSources, DepOpts ), - {{BaseDir, ArtifactDir}, AbsSources}. + {[{filename:join([BaseDir, SrcDir]), ArtifactDir} || SrcDir <- SrcDirs], + AbsSources}. sort_apps(Names, Apps) -> NamedApps = [{rebar_app_info:name(App), App} || App <- Apps], [App || Name <- Names, {_, App} <- [lists:keyfind(Name, 1, NamedApps)]]. --spec compile_analyzed({module(), digraph:graph()}, rebar_app_info:t(), map()) -> ok. -compile_analyzed({Compiler, G}, AppInfo, Contexts) -> % > 3.13.2 - run(G, Compiler, AppInfo, Contexts), - %% Extras are tricky and get their own mini-analysis - ExtraApps = annotate_extras(AppInfo), - [begin - {ExtraCtx, [SortedExtra]} = analyze_all({Compiler, G}, [ExtraAppInfo]), - run(G, Compiler, SortedExtra, ExtraCtx) - end || ExtraAppInfo <- ExtraApps], - ok. - --spec compile_all([module(), ...], rebar_app_info:t()) -> ok. -compile_all(Compilers, AppInfo) -> % =< 3.13.0 interface; plugins use this! - %% Support the old-style API by re-declaring a local DAG for the - %% compile steps needed. - lists:foreach(fun(Compiler) -> - OutDir = rebar_app_info:out_dir(AppInfo), - G = rebar_compiler_dag:init(OutDir, Compiler, undefined, []), - {Ctx, _} = analyze_all({Compiler, G}, [AppInfo]), - compile_analyzed({Compiler, G}, AppInfo, Ctx), - rebar_compiler_dag:maybe_store(G, OutDir, Compiler, undefined, []), - rebar_compiler_dag:terminate(G) - end, Compilers). - prepare_compiler_env(Compiler, Apps) -> lists:foreach( fun(AppInfo) -> @@ -293,16 +343,6 @@ compile_handler({Error, Source}, [Config | _Rest]) -> maybe_report(Error), ?FAIL. - -%% @doc remove compiled artifacts from an AppDir. --spec clean([module()], rebar_app_info:t()) -> 'ok'. -clean(Compilers, AppInfo) -> - lists:foreach(fun(CompilerMod) -> - clean_(CompilerMod, AppInfo, undefined), - Extras = annotate_extras(AppInfo), - [clean_(CompilerMod, ExtraApp, "extra") || ExtraApp <- Extras] - end, Compilers). - clean_(CompilerMod, AppInfo, _Label) -> #{src_dirs := SrcDirs, src_ext := SrcExt} = CompilerMod:context(AppInfo), @@ -313,21 +353,18 @@ clean_(CompilerMod, AppInfo, _Label) -> CompilerMod:clean(FoundFiles, AppInfo), ok. --spec needs_compile(filename:all(), extension(), [{extension(), file:dirname()}]) -> boolean(). -needs_compile(Source, OutExt, Mappings) -> - Ext = filename:extension(Source), - BaseName = filename:basename(Source, Ext), - {_, OutDir} = lists:keyfind(OutExt, 1, Mappings), - Target = filename:join(OutDir, BaseName++OutExt), - filelib:last_modified(Source) > filelib:last_modified(Target). - annotate_extras(AppInfo) -> ExtraDirs = rebar_dir:extra_src_dirs(rebar_app_info:opts(AppInfo), []), OldSrcDirs = rebar_app_info:get(AppInfo, src_dirs, ["src"]), AppDir = rebar_app_info:dir(AppInfo), lists:map(fun(Dir) -> EbinDir = filename:join(rebar_app_info:out_dir(AppInfo), Dir), - AppInfo1 = rebar_app_info:ebin_dir(AppInfo, EbinDir), + %% need a unique name to prevent lookup issues that clobber entries + AppName = unicode:characters_to_binary( + [rebar_app_info:name(AppInfo), "_", Dir] + ), + AppInfo0 = rebar_app_info:name(AppInfo, AppName), + AppInfo1 = rebar_app_info:ebin_dir(AppInfo0, EbinDir), AppInfo2 = rebar_app_info:set(AppInfo1, src_dirs, [Dir]), AppInfo3 = rebar_app_info:set(AppInfo2, extra_src_dirs, OldSrcDirs), add_to_includes( % give access to .hrl in app's src/ @@ -339,26 +376,6 @@ annotate_extras(AppInfo) -> filelib:is_dir(filename:join(AppDir, ExtraDir))] ). -%% These functions are here for the ultimate goal of getting rid of -%% rebar_base_compiler. This can't be done because of existing plugins. - -ok_tuple(Source, Ws) -> - rebar_base_compiler:ok_tuple(Source, Ws). - -error_tuple(Source, Es, Ws, Opts) -> - rebar_base_compiler:error_tuple(Source, Es, Ws, Opts). - -maybe_report(Reportable) -> - rebar_base_compiler:maybe_report(Reportable). - -format_error_source(Path, Opts) -> - rebar_base_compiler:format_error_source(Path, Opts). - -report(Messages) -> - rebar_base_compiler:report(Messages). - -%%% private functions - find_source_files(BaseDir, SrcExt, SrcDirs, Opts) -> SourceExtRe = "^(?!\\._).*\\" ++ SrcExt ++ [$$], lists:flatmap(fun(SrcDir) -> diff --git a/src/rebar_compiler_dag.erl b/src/rebar_compiler_dag.erl index 04b4e8c7..db903b16 100644 --- a/src/rebar_compiler_dag.erl +++ b/src/rebar_compiler_dag.erl @@ -41,7 +41,7 @@ init(Dir, Compiler, Label, CritMeta) -> %% The file must be in one of the directories that may contain source files %% for an OTP application; source files found in the DAG `G' that lie outside %% of these directories may be used in other circumstances (i.e. options affecting -%% visibility). +%% visibility, extra_src_dirs). %% Prune out files that have no corresponding sources prune(G, SrcExt, ArtifactExt, Sources, AppPaths) -> %% Collect source files that may have been removed. These files: @@ -50,14 +50,28 @@ prune(G, SrcExt, ArtifactExt, Sources, AppPaths) -> %% In the process, prune header files - those don't have ArtifactExt %% extension - using side effect in is_deleted_source/5. case [Del || Del <- (digraph:vertices(G) -- Sources), - is_deleted_source(G, Del, filename:extension(Del), SrcExt, ArtifactExt)] of + is_deleted_source(G, Del, filename:extension(Del), SrcExt, ArtifactExt)] of [] -> ok; %% short circuit without sorting AppPaths Deleted -> - prune_source_files(G, SrcExt, ArtifactExt, - lists:sort(AppPaths), lists:sort(Deleted)) + SafeAppPaths = safe_dirs(AppPaths), + OutFiles = filter_prefix(G, lists:sort(SafeAppPaths), lists:sort(Deleted)), + [maybe_rm_artifact_and_edge(G, Out, SrcExt, ArtifactExt, File) + || {File, Out} <- OutFiles], + ok end. +%% Some app paths may be prefixes of one another; for example, +%% `/some/app/directory' may be seen as a prefix +%% of `/some/app/directory_trick' and cause pruning outside +%% of the proper scopes. +safe_dirs(AppPaths) -> + [{safe_dir(AppDir), Path} || {AppDir, Path} <- AppPaths]. + +safe_dir([]) -> "/"; +safe_dir("/") -> "/"; +safe_dir([H|T]) -> [H|safe_dir(T)]. + is_deleted_source(_G, _F, Extension, Extension, _ArtifactExt) -> %% source file true; @@ -74,22 +88,21 @@ is_deleted_source(G, F, _Extension, _SrcExt, _ArtifactExt) -> %% AppDirs & Fs are sorted, and to check if File is outside of %% App, lists:prefix is checked. When the App with File in it %% exists, verify file is still there on disk. -prune_source_files(_G, _SrcExt, _ArtifactExt, [], _) -> - ok; -prune_source_files(_G, _SrcExt, _ArtifactExt, _, []) -> - ok; -prune_source_files(G, SrcExt, ArtifactExt, [AppDir | AppTail], Fs) when is_atom(AppDir) -> +filter_prefix(_G, [], _) -> + []; +filter_prefix(_G, _, []) -> + []; +filter_prefix(G, Apps, [F|Fs]) when is_atom(F) -> %% dirty bit shenanigans - prune_source_files(G, SrcExt, ArtifactExt, AppTail, Fs); -prune_source_files(G, SrcExt, ArtifactExt, [{App, Out} | AppTail] = AppPaths, [File | FTail]) -> + filter_prefix(G, Apps, Fs); +filter_prefix(G, [{App, Out} | AppTail] = AppPaths, [File | FTail]) -> case lists:prefix(App, File) of true -> - maybe_rm_artifact_and_edge(G, Out, SrcExt, ArtifactExt, File), - prune_source_files(G, SrcExt, ArtifactExt, AppPaths, FTail); + [{File, Out} | filter_prefix(G, AppPaths, FTail)]; false when App < File -> - prune_source_files(G, SrcExt, ArtifactExt, AppTail, [File|FTail]); + filter_prefix(G, AppTail, [File|FTail]); false -> - prune_source_files(G, SrcExt, ArtifactExt, AppPaths, FTail) + filter_prefix(G, AppPaths, FTail) end. %% @doc this function scans all the source files found and looks into diff --git a/src/rebar_prv_compile.erl b/src/rebar_prv_compile.erl index f8974466..74b2545b 100644 --- a/src/rebar_prv_compile.erl +++ b/src/rebar_prv_compile.erl @@ -251,7 +251,13 @@ extra_virtual_apps(State, VApp0, [Dir|Dirs]) -> VApp2 = rebar_app_info:ebin_dir(VApp1, OutDir), Opts = rebar_state:opts(State), VApp3 = rebar_app_info:opts(VApp2, Opts), - [rebar_app_info:set(VApp3, src_dirs, [OutDir]) + %% ensure we don't end up running a similar extra fake app while + %% compiling root-level extras by marking it explicitly null + VApp4 = rebar_app_info:set(VApp3, extra_src_dirs, []), + %% need a unique name to prevent lookup issues that clobber entries + AppName = unicode:characters_to_binary(["extra_", Dir]), + VApp5 = rebar_app_info:name(VApp4, AppName), + [rebar_app_info:set(VApp5, src_dirs, [OutDir]) | extra_virtual_apps(State, VApp0, Dirs)] end. @@ -303,6 +309,7 @@ build_rebar3_apps(DAGs, Apps, State) -> LastDAG = lists:last(DAGs), %% we actually need to compile each DAG one after the other to prevent %% issues where a .yrl file that generates a .erl file gets to be seen. + ?INFO("Analyzing applications", []), [begin {Ctx, ReorderedApps} = rebar_compiler:analyze_all(DAG, Apps), lists:foreach( @@ -312,6 +319,13 @@ build_rebar3_apps(DAGs, Apps, State) -> rebar_compiler:compile_analyzed(DAG, AppInfo, Ctx) end, ReorderedApps + ), + {ExtraCtx, ReorderedExtraApps} = rebar_compiler:analyze_all_extras(DAG, Apps), + lists:foreach( + fun(AppInfo) -> + rebar_compiler:compile_analyzed(DAG, AppInfo, ExtraCtx) + end, + ReorderedExtraApps ) end || DAG <- DAGs], ok.