You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

221 line
9.0 KiB

Split up the compiler DAG This is another tricky commit towards replacing the current module analysis with EPP. The compiler DAG is now being shared across multiple applications being compiled, rather than a per-application basis, which promises to allow better ordering, parallelism, and more thorough invalidation of runtime dependencies when they are modified. This however required changes: - The compiler DAG is no longer private to `rebar_compiler`, and has been extracted to the `rebar_compiler_dag` module - The compiler DAG is now started by the `rebar_prv_compile` module, which oversees the calls to `rebar_compiler` for each OTP application - The compiler DAG has been refactored to use a "dirty flag" to know if it was modified, rather than just tracking modifications in a functional manner, since the scope change (going multi-app) makes it impossible to cleanly use the functional approach without much larger changes - The DAG used to be cached within each OTP application. This is no longer possible since it is shared. Instead the DAG is stored in the state's deps_dir, which allows to cleanly split caches between regular apps for the user's project and plugins - The DAG supported a "label" mode that was used to store distinct DAGs for extra_src_dir runs and regular modules; this label is now used (and extended to `rebar_prv_compile` internals) to distinguish between "compile runs", such as "project_apps", or just "apps" (deps). The label is optional (i.e. not used by plugins which have no such need) - The extra_src_dirs for each app is now compiled using the main app's DAG, but the run takes place later in the compilation process. This may need changing to detect and prevent dependencies from src_dirs into extra_src_dirs, but this should not technically be a problem for runtime anyway. - Reworked the support for extra_src_dirs that are at the root of an umbrella project (and therefore do not belong to any single app) to use the new structure, also as part of the project_apps DAG. All tests keep passing, and this puts us in a better place to use EPP with cross-app support in the near-future.
5 年之前
  1. %%% Module handling the directed graph required for the analysis
  2. %%% of all top-level applications by the various compiler plugins.
  3. -module(rebar_compiler_dag).
  4. -export([init/4, prune/4, update/4, maybe_store/5, terminate/1]).
  5. -include("rebar.hrl").
  6. -define(DAG_VSN, 3).
  7. -define(DAG_ROOT, "source").
  8. -define(DAG_EXT, ".dag").
  9. -type dag_v() :: {digraph:vertex(), term()} | 'false'.
  10. -type dag_e() :: {digraph:vertex(), digraph:vertex()}.
  11. -type critical_meta() :: term(). % if this changes, the DAG is invalid
  12. -type dag_rec() :: {list(dag_v()), list(dag_e()), critical_meta()}.
  13. -type dag() :: digraph:graph().
  14. -record(dag, {vsn = ?DAG_VSN :: pos_integer(),
  15. info = {[], [], []} :: dag_rec()}).
  16. %% You should initialize one DAG per compiler module.
  17. %% `CritMeta' is any contextual information that, if it is found to change,
  18. %% must invalidate the DAG loaded from disk.
  19. -spec init(file:filename_all(), atom(), string() | undefined, critical_meta()) -> dag().
  20. init(Dir, Compiler, Label, CritMeta) ->
  21. G = digraph:new([acyclic]),
  22. File = dag_file(Dir, Compiler, Label),
  23. try
  24. restore_dag(G, File, CritMeta)
  25. catch
  26. _:_ ->
  27. %% Don't mark as dirty yet to avoid creating compiler DAG files for
  28. %% compilers that are actually never used.
  29. ?WARN("Failed to restore ~ts file. Discarding it.~n", [File]),
  30. file:delete(File)
  31. end,
  32. G.
  33. -spec prune(dag(), file:filename_all(), file:filename_all(), [file:filename_all()]) -> ok.
  34. prune(G, SrcDirs, EbinDir, Erls) ->
  35. %% A source file may have been renamed or deleted. Remove it from the graph
  36. %% and remove any beam file for that source if it exists.
  37. Vertices = digraph:vertices(G),
  38. SrcParts = [filename:split(SrcDir) || SrcDir <- SrcDirs],
  39. [maybe_rm_beam_and_edge(G, EbinDir, File)
  40. || File <- lists:sort(Vertices) -- lists:sort(Erls),
  41. filename:extension(File) =:= ".erl",
  42. lists:any(fun(Src) -> lists:prefix(Src, filename:split(File)) end,
  43. SrcParts)],
  44. ok.
  45. %% @doc this function scans all the source files found and looks into
  46. %% all the `InDirs' for deps (other erl or .hrl files) that are related
  47. %% to them (by calling `CompileMod:dependencies()' on them).
  48. %%
  49. %% The trick here is that change detection, done with last_modified stamps,
  50. %% takes place at the same time as the graph propagation (finding deps)
  51. %% themselves. As such, this is a confusing mutually recursive depth-first
  52. %% search function that relies on side-effects and precise order-of-traversal
  53. %% to propagate file changes.
  54. %%
  55. %% To be replaced by a more declarative EPP-based flow.
  56. -spec update(dag(), module(), [file:filename_all()], [file:filename_all()]) -> ok.
  57. update(_, _, _, []) ->
  58. ok;
  59. update(G, Compiler, InDirs, [Source|Erls]) ->
  60. case digraph:vertex(G, Source) of
  61. {_, LastUpdated} ->
  62. case filelib:last_modified(Source) of
  63. 0 ->
  64. %% The file doesn't exist anymore,
  65. %% erase it from the graph.
  66. %% All the edges will be erased automatically.
  67. digraph:del_vertex(G, Source),
  68. mark_dirty(G),
  69. update(G, Compiler, InDirs, Erls);
  70. LastModified when LastUpdated < LastModified ->
  71. add_to_dag(G, Compiler, InDirs, Source, LastModified, filename:dirname(Source)),
  72. update(G, Compiler, InDirs, Erls);
  73. _ ->
  74. AltErls = digraph:out_neighbours(G, Source),
  75. %% Deps must be explored before the module itself
  76. update(G, Compiler, InDirs, AltErls),
  77. Modified = is_dirty(G),
  78. MaxModified = update_max_modified_deps(G, Source),
  79. case Modified orelse MaxModified > LastUpdated of
  80. true -> mark_dirty(G);
  81. false -> ok
  82. end,
  83. update(G, Compiler, InDirs, Erls)
  84. end;
  85. false ->
  86. add_to_dag(G, Compiler, InDirs, Source, filelib:last_modified(Source), filename:dirname(Source)),
  87. update(G, Compiler, InDirs, Erls)
  88. end.
  89. maybe_store(G, Dir, Compiler, Label, CritMeta) ->
  90. case is_dirty(G) of
  91. true ->
  92. clear_dirty(G),
  93. File = dag_file(Dir, Compiler, Label),
  94. store_dag(G, File, CritMeta);
  95. false ->
  96. ok
  97. end.
  98. terminate(G) ->
  99. true = digraph:delete(G).
  100. %%%%%%%%%%%%%%%
  101. %%% PRIVATE %%%
  102. %%%%%%%%%%%%%%%
  103. %% @private generate the name for the DAG based on the compiler module and
  104. %% a custom label, both of which are used to prevent various compiler runs
  105. %% from clobbering each other. The label `undefined' is kept for a default
  106. %% run of the compiler, to keep in line with previous versions of the file.
  107. dag_file(Dir, CompilerMod, undefined) ->
  108. filename:join([rebar_dir:local_cache_dir(Dir), CompilerMod,
  109. ?DAG_ROOT ++ ?DAG_EXT]);
  110. dag_file(Dir, CompilerMod, Label) ->
  111. filename:join([rebar_dir:local_cache_dir(Dir), CompilerMod,
  112. ?DAG_ROOT ++ "_" ++ Label ++ ?DAG_EXT]).
  113. restore_dag(G, File, CritMeta) ->
  114. case file:read_file(File) of
  115. {ok, Data} ->
  116. %% The CritMeta value is checked and if it doesn't match, we fail
  117. %% the whole restore operation.
  118. #dag{vsn=?DAG_VSN, info={Vs, Es, CritMeta}} = binary_to_term(Data),
  119. [digraph:add_vertex(G, V, LastUpdated) || {V, LastUpdated} <- Vs],
  120. [digraph:add_edge(G, V1, V2) || {_, V1, V2, _} <- Es],
  121. ok;
  122. {error, _Err} ->
  123. ok
  124. end.
  125. store_dag(G, File, CritMeta) ->
  126. ok = filelib:ensure_dir(File),
  127. Vs = lists:map(fun(V) -> digraph:vertex(G, V) end, digraph:vertices(G)),
  128. Es = lists:map(fun(E) -> digraph:edge(G, E) end, digraph:edges(G)),
  129. Data = term_to_binary(#dag{info={Vs, Es, CritMeta}}, [{compressed, 2}]),
  130. file:write_file(File, Data).
  131. %% Drop a file from the digraph if it doesn't exist, and if so,
  132. %% delete its related build artifact
  133. maybe_rm_beam_and_edge(G, OutDir, Source) ->
  134. %% This is NOT a double check it is the only check that the source file is actually gone
  135. case filelib:is_regular(Source) of
  136. true ->
  137. %% Actually exists, don't delete
  138. false;
  139. false ->
  140. Target = target_base(OutDir, Source) ++ ".beam",
  141. ?DEBUG("Source ~ts is gone, deleting previous beam file if it exists ~ts", [Source, Target]),
  142. file:delete(Target),
  143. digraph:del_vertex(G, Source),
  144. mark_dirty(G),
  145. true
  146. end.
  147. %% @private Return what should be the base name of an erl file, relocated to the
  148. %% target directory. For example:
  149. %% target_base("ebin/", "src/my_module.erl") -> "ebin/my_module"
  150. target_base(OutDir, Source) ->
  151. filename:join(OutDir, filename:basename(Source, ".erl")).
  152. %% @private a file has been found to change or wasn't part of the DAG before,
  153. %% and must be added, along with all its dependencies.
  154. add_to_dag(G, Compiler, InDirs, Source, LastModified, SourceDir) ->
  155. AbsIncls = Compiler:dependencies(Source, SourceDir, InDirs),
  156. digraph:add_vertex(G, Source, LastModified),
  157. digraph:del_edges(G, digraph:out_edges(G, Source)),
  158. %% Deps must be explored before the module itself
  159. [begin
  160. update(G, Compiler, InDirs, [Incl]),
  161. digraph:add_edge(G, Source, Incl)
  162. end || Incl <- AbsIncls],
  163. mark_dirty(G),
  164. AbsIncls.
  165. %% @private change status propagation: if the dependencies of a file have
  166. %% been updated, mark the last_modified time for that file to be equivalent
  167. %% to its most-recently-changed dependency; that way, nested header file
  168. %% change stamps are propagated to the final module.
  169. %% This is required because at some point the module is compared to its
  170. %% associated .beam file's last-generation stamp to know if it requires
  171. %% rebuilding.
  172. %% The responsibility for this is however diffuse across various modules.
  173. update_max_modified_deps(G, Source) ->
  174. MaxModified = lists:foldl(
  175. fun(File, Acc) ->
  176. case digraph:vertex(G, File) of
  177. {_, MaxModified} when MaxModified > Acc -> MaxModified;
  178. _ -> Acc
  179. end
  180. end,
  181. 0,
  182. [Source | digraph:out_neighbours(G, Source)]
  183. ),
  184. digraph:add_vertex(G, Source, MaxModified),
  185. MaxModified.
  186. %% Mark the digraph as having been modified, which is required to
  187. %% save its updated form on disk after the compiling run.
  188. %% This uses a magic vertex to carry the dirty state. This is less
  189. %% than ideal because listing vertices may expect filenames and
  190. %% instead there's going to be one trick atom through it.
  191. mark_dirty(G) ->
  192. digraph:add_vertex(G, '$r3_dirty_bit', true),
  193. ok.
  194. %% Check whether the digraph has been modified and is considered dirty.
  195. is_dirty(G) ->
  196. case digraph:vertex(G, '$r3_dirty_bit') of
  197. {_, Bool} -> Bool;
  198. false -> false
  199. end.
  200. %% Remove the dirty status. Because the saving of a digraph on disk saves all
  201. %% vertices, clear the flag before serializing it.
  202. clear_dirty(G) ->
  203. digraph:del_vertex(G, '$r3_dirty_bit').