選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

529 行
18 KiB

  1. %% Vendored from hex_core v0.6.8, do not edit manually
  2. -module(r3_hex_tarball).
  3. -export([create/2, create_docs/1, unpack/2, unpack_docs/2, format_checksum/1, format_error/1]).
  4. -ifdef(TEST).
  5. -export([do_decode_metadata/1, gzip/1, normalize_requirements/1]).
  6. -endif.
  7. -define(VERSION, <<"3">>).
  8. -define(TARBALL_MAX_SIZE, 8 * 1024 * 1024).
  9. -define(TARBALL_MAX_UNCOMPRESSED_SIZE, 64 * 1024 * 1024).
  10. -define(BUILD_TOOL_FILES, [
  11. {<<"mix.exs">>, <<"mix">>},
  12. {<<"rebar.config">>, <<"rebar3">>},
  13. {<<"rebar">>, <<"rebar3">>},
  14. {<<"Makefile">>, <<"make">>},
  15. {<<"Makefile.win">>, <<"make">>}
  16. ]).
  17. -include_lib("kernel/include/file.hrl").
  18. -type checksum() :: binary().
  19. -type contents() :: #{filename() => binary()}.
  20. -type filename() :: string().
  21. -type files() :: [{filename(), filename() | binary()}].
  22. -type metadata() :: map().
  23. -type tarball() :: binary().
  24. %%====================================================================
  25. %% API functions
  26. %%====================================================================
  27. %% @doc
  28. %% Creates a package tarball.
  29. %%
  30. %% Returns the binary of the tarball the "inner checksum" and "outer checksum".
  31. %% The inner checksum is deprecated in favor of the inner checksum.
  32. %%
  33. %% Examples:
  34. %%
  35. %% ```
  36. %% > Metadata = #{<<"name">> => <<"foo">>, <<"version">> => <<"1.0.0">>},
  37. %% > Files = [{"src/foo.erl", <<"-module(foo).">>}],
  38. %% > r3_hex_tarball:create(Metadata, Files).
  39. %% {ok, #{tarball => <<86,69,...>>,
  40. %% outer_checksum => <<40,32,...>>,
  41. %% inner_checksum => <<178,12,...>>}}
  42. %% '''
  43. %% @end
  44. -spec create(metadata(), files()) -> {ok, {tarball(), checksum()}} | {error, term()}.
  45. create(Metadata, Files) ->
  46. MetadataBinary = encode_metadata(Metadata),
  47. ContentsTarball = create_memory_tarball(Files),
  48. ContentsTarballCompressed = gzip(ContentsTarball),
  49. InnerChecksum = inner_checksum(?VERSION, MetadataBinary, ContentsTarballCompressed),
  50. InnerChecksumBase16 = encode_base16(InnerChecksum),
  51. OuterFiles = [
  52. {"VERSION", ?VERSION},
  53. {"CHECKSUM", InnerChecksumBase16},
  54. {"metadata.config", MetadataBinary},
  55. {"contents.tar.gz", ContentsTarballCompressed}
  56. ],
  57. Tarball = create_memory_tarball(OuterFiles),
  58. OuterChecksum = checksum(Tarball),
  59. UncompressedSize = byte_size(ContentsTarball),
  60. case(byte_size(Tarball) > ?TARBALL_MAX_SIZE) or (UncompressedSize > ?TARBALL_MAX_UNCOMPRESSED_SIZE) of
  61. true ->
  62. {error, {tarball, too_big}};
  63. false ->
  64. {ok, #{tarball => Tarball, outer_checksum => OuterChecksum, inner_checksum => InnerChecksum}}
  65. end.
  66. %% @doc
  67. %% Creates a docs tarball.
  68. %%
  69. %% Examples:
  70. %%
  71. %% ```
  72. %% > Files = [{"doc/index.html", <<"Docs">>}],
  73. %% > r3_hex_tarball:create_docs(Files).
  74. %% {ok, <<86,69,...>>}
  75. %% '''
  76. %% @end
  77. -spec create_docs(files()) -> {ok, tarball()}.
  78. create_docs(Files) ->
  79. UncompressedTarball = create_memory_tarball(Files),
  80. UncompressedSize = byte_size(UncompressedTarball),
  81. Tarball = gzip(UncompressedTarball),
  82. Size = byte_size(Tarball),
  83. case(Size > ?TARBALL_MAX_SIZE) or (UncompressedSize > ?TARBALL_MAX_UNCOMPRESSED_SIZE) of
  84. true ->
  85. {error, {tarball, too_big}};
  86. false ->
  87. {ok, Tarball}
  88. end.
  89. %% @doc
  90. %% Unpacks a package tarball.
  91. %%
  92. %% Remember to verify the outer tarball checksum against the registry checksum
  93. %% returned from `r3_hex_repo:get_package(Config, Package)`.
  94. %%
  95. %% Examples:
  96. %%
  97. %% ```
  98. %% > r3_hex_tarball:unpack(Tarball, memory).
  99. %% {ok,#{outer_checksum => <<...>>,
  100. %% contents => [{"src/foo.erl",<<"-module(foo).">>}],
  101. %% metadata => #{<<"name">> => <<"foo">>, ...}}}
  102. %%
  103. %% > r3_hex_tarball:unpack(Tarball, "path/to/unpack").
  104. %% {ok,#{outer_checksum => <<...>>,
  105. %% metadata => #{<<"name">> => <<"foo">>, ...}}}
  106. %% '''
  107. -spec unpack(tarball(), memory) ->
  108. {ok, #{checksum => checksum(), metadata => metadata(), contents => contents()}} |
  109. {error, term()};
  110. (tarball(), filename()) ->
  111. {ok, #{checksum => checksum(), metadata => metadata()}} |
  112. {error, term()}.
  113. unpack(Tarball, _) when byte_size(Tarball) > ?TARBALL_MAX_SIZE ->
  114. {error, {tarball, too_big}};
  115. unpack(Tarball, Output) ->
  116. case r3_hex_erl_tar:extract({binary, Tarball}, [memory]) of
  117. {ok, []} ->
  118. {error, {tarball, empty}};
  119. {ok, FileList} ->
  120. OuterChecksum = crypto:hash(sha256, Tarball),
  121. do_unpack(maps:from_list(FileList), OuterChecksum, Output);
  122. {error, Reason} ->
  123. {error, {tarball, Reason}}
  124. end.
  125. %% @doc
  126. %% Unpacks a documentation tarball.
  127. %%
  128. %% Examples:
  129. %%
  130. %% ```
  131. %% > r3_hex_tarball:unpack_docs(Tarball, memory).
  132. %% {ok, [{"index.html", <<"<!doctype>">>}, ...]}
  133. %%
  134. %% > r3_hex_tarball:unpack_docs(Tarball, "path/to/unpack").
  135. %% ok
  136. %% '''
  137. -spec unpack_docs(tarball(), memory) -> {ok, contents()} | {error, term()};
  138. (tarball(), filename()) -> ok | {error, term()}.
  139. unpack_docs(Tarball, _) when byte_size(Tarball) > ?TARBALL_MAX_SIZE ->
  140. {error, {tarball, too_big}};
  141. unpack_docs(Tarball, Output) ->
  142. unpack_tarball(Tarball, Output).
  143. %% @doc
  144. %% Returns base16-encoded representation of checksum.
  145. -spec format_checksum(checksum()) -> binary().
  146. format_checksum(Checksum) ->
  147. encode_base16(Checksum).
  148. %% @doc
  149. %% Converts an error reason term to a human-readable error message string.
  150. -spec format_error(term()) -> string().
  151. format_error({tarball, empty}) -> "empty tarball";
  152. format_error({tarball, too_big}) -> "tarball is too big";
  153. format_error({tarball, {missing_files, Files}}) -> io_lib:format("missing files: ~p", [Files]);
  154. format_error({tarball, {bad_version, Vsn}}) -> io_lib:format("unsupported version: ~p", [Vsn]);
  155. format_error({tarball, invalid_checksum}) -> "invalid tarball checksum";
  156. format_error({tarball, Reason}) -> "tarball error, " ++ r3_hex_erl_tar:format_error(Reason);
  157. format_error({inner_tarball, Reason}) -> "inner tarball error, " ++ r3_hex_erl_tar:format_error(Reason);
  158. format_error({metadata, invalid_terms}) -> "error reading package metadata: invalid terms";
  159. format_error({metadata, not_key_value}) -> "error reading package metadata: not in key-value format";
  160. format_error({metadata, Reason}) -> "error reading package metadata" ++ r3_safe_erl_term:format_error(Reason);
  161. format_error({checksum_mismatch, ExpectedChecksum, ActualChecksum}) ->
  162. io_lib:format(
  163. "tarball checksum mismatch~n~n" ++
  164. "Expected (base16-encoded): ~s~n" ++
  165. "Actual (base16-encoded): ~s",
  166. [encode_base16(ExpectedChecksum), encode_base16(ActualChecksum)]).
  167. %%====================================================================
  168. %% Internal functions
  169. %%====================================================================
  170. inner_checksum(Version, MetadataBinary, ContentsBinary) ->
  171. Blob = <<Version/binary, MetadataBinary/binary, ContentsBinary/binary>>,
  172. crypto:hash(sha256, Blob).
  173. checksum(ContentsBinary) when is_binary(ContentsBinary) ->
  174. crypto:hash(sha256, ContentsBinary).
  175. encode_metadata(Meta) ->
  176. Data = lists:map(
  177. fun(MetaPair) ->
  178. String = io_lib_pretty:print(binarify(MetaPair), [{encoding, utf8}]),
  179. unicode:characters_to_binary([String, ".\n"])
  180. end, maps:to_list(Meta)),
  181. iolist_to_binary(Data).
  182. do_unpack(Files, OuterChecksum, Output) ->
  183. State = #{
  184. inner_checksum => undefined,
  185. outer_checksum => OuterChecksum,
  186. contents => undefined,
  187. files => Files,
  188. metadata => undefined,
  189. output => Output
  190. },
  191. State1 = check_files(State),
  192. State2 = check_version(State1),
  193. State3 = check_inner_checksum(State2),
  194. State4 = decode_metadata(State3),
  195. finish_unpack(State4).
  196. finish_unpack({error, _} = Error) ->
  197. Error;
  198. finish_unpack(#{metadata := Metadata, files := Files, inner_checksum := InnerChecksum, outer_checksum := OuterChecksum, output := Output}) ->
  199. _ = maps:get("VERSION", Files),
  200. ContentsBinary = maps:get("contents.tar.gz", Files),
  201. filelib:ensure_dir(filename:join(Output, "*")),
  202. case unpack_tarball(ContentsBinary, Output) of
  203. ok ->
  204. copy_metadata_config(Output, maps:get("metadata.config", Files)),
  205. {ok, #{inner_checksum => InnerChecksum, outer_checksum => OuterChecksum, metadata => Metadata}};
  206. {ok, Contents} ->
  207. {ok, #{inner_checksum => InnerChecksum, outer_checksum => OuterChecksum, metadata => Metadata, contents => Contents}};
  208. {error, Reason} ->
  209. {error, {inner_tarball, Reason}}
  210. end.
  211. copy_metadata_config(Output, MetadataBinary) ->
  212. ok = file:write_file(filename:join(Output, "hex_metadata.config"), MetadataBinary).
  213. check_files(#{files := Files} = State) ->
  214. RequiredFiles = ["VERSION", "CHECKSUM", "metadata.config", "contents.tar.gz"],
  215. case diff_keys(Files, RequiredFiles, []) of
  216. ok ->
  217. State;
  218. {error, {missing_keys, Keys}} ->
  219. {error, {tarball, {missing_files, Keys}}}
  220. end.
  221. check_version({error, _} = Error) ->
  222. Error;
  223. check_version(#{files := Files} = State) ->
  224. case maps:get("VERSION", Files) of
  225. <<"3">> ->
  226. State;
  227. Version ->
  228. {error, {tarball, {bad_version, Version}}}
  229. end.
  230. % Note: This checksum is deprecated
  231. check_inner_checksum({error, _} = Error) ->
  232. Error;
  233. check_inner_checksum(#{files := Files} = State) ->
  234. ChecksumBase16 = maps:get("CHECKSUM", Files),
  235. ExpectedChecksum = decode_base16(ChecksumBase16),
  236. Version = maps:get("VERSION", Files),
  237. MetadataBinary = maps:get("metadata.config", Files),
  238. ContentsBinary = maps:get("contents.tar.gz", Files),
  239. ActualChecksum = inner_checksum(Version, MetadataBinary, ContentsBinary),
  240. if
  241. byte_size(ExpectedChecksum) /= 32 ->
  242. {error, {tarball, invalid_inner_checksum}};
  243. ExpectedChecksum == ActualChecksum ->
  244. maps:put(inner_checksum, ExpectedChecksum, State);
  245. true ->
  246. {error, {tarball, {inner_checksum_mismatch, ExpectedChecksum, ActualChecksum}}}
  247. end.
  248. decode_metadata({error, _} = Error) ->
  249. Error;
  250. decode_metadata(#{files := #{"metadata.config" := Binary}} = State) when is_binary(Binary) ->
  251. case do_decode_metadata(Binary) of
  252. #{} = Metadata -> maps:put(metadata, normalize_metadata(Metadata), State);
  253. Other -> Other
  254. end.
  255. do_decode_metadata(Binary) when is_binary(Binary) ->
  256. {ok, String} = characters_to_list(Binary),
  257. case r3_safe_erl_term:string(String) of
  258. {ok, Tokens, _Line} ->
  259. try
  260. Terms = r3_safe_erl_term:terms(Tokens),
  261. maps:from_list(Terms)
  262. catch
  263. error:function_clause ->
  264. {error, {metadata, invalid_terms}};
  265. error:badarg ->
  266. {error, {metadata, not_key_value}}
  267. end;
  268. {error, {_Line, r3_safe_erl_term, Reason}, _Line2} ->
  269. {error, {metadata, Reason}}
  270. end.
  271. characters_to_list(Binary) ->
  272. case unicode:characters_to_list(Binary) of
  273. List when is_list(List) ->
  274. {ok, List};
  275. {error, _, _} ->
  276. case unicode:characters_to_list(Binary, latin1) of
  277. List when is_list(List) -> {ok, List};
  278. Other -> Other
  279. end
  280. end.
  281. normalize_metadata(Metadata1) ->
  282. Metadata2 = maybe_update_with(<<"requirements">>, fun normalize_requirements/1, Metadata1),
  283. Metadata3 = maybe_update_with(<<"links">>, fun try_into_map/1, Metadata2),
  284. Metadata4 = maybe_update_with(<<"extra">>, fun try_into_map/1, Metadata3),
  285. guess_build_tools(Metadata4).
  286. normalize_requirements(Requirements) ->
  287. case is_list(Requirements) andalso (Requirements /= []) andalso is_list(hd(Requirements)) of
  288. true ->
  289. maps:from_list(lists:map(fun normalize_legacy_requirement/1, Requirements));
  290. false ->
  291. try_into_map(fun normalize_normal_requirement/1, Requirements)
  292. end.
  293. normalize_normal_requirement({Name, Requirement}) ->
  294. {Name, try_into_map(Requirement)}.
  295. normalize_legacy_requirement(Requirement) ->
  296. Map = maps:from_list(Requirement),
  297. Name = maps:get(<<"name">>, Map),
  298. {Name, maps:without([<<"name">>], Map)}.
  299. guess_build_tools(#{<<"build_tools">> := BuildTools} = Metadata) when is_list(BuildTools) ->
  300. Metadata;
  301. guess_build_tools(#{<<"files">> := Filenames} = Metadata) ->
  302. BaseFiles = [Filename || Filename <- Filenames, filename:dirname(binary_to_list(Filename)) == "."],
  303. BuildTools = lists:usort([Tool || {Filename, Tool} <- ?BUILD_TOOL_FILES, lists:member(Filename, BaseFiles)]),
  304. Metadata#{<<"build_tools">> => BuildTools};
  305. guess_build_tools(Metadata) ->
  306. Metadata.
  307. %%====================================================================
  308. %% Tar Helpers
  309. %%====================================================================
  310. unpack_tarball(ContentsBinary, memory) ->
  311. r3_hex_erl_tar:extract({binary, ContentsBinary}, [memory, compressed]);
  312. unpack_tarball(ContentsBinary, Output) ->
  313. filelib:ensure_dir(filename:join(Output, "*")),
  314. case r3_hex_erl_tar:extract({binary, ContentsBinary}, [{cwd, Output}, compressed]) of
  315. ok ->
  316. [try_updating_mtime(filename:join(Output, Path)) || Path <- filelib:wildcard("**", Output)],
  317. ok;
  318. Other ->
  319. Other
  320. end.
  321. %% let it silently fail for bad symlinks
  322. try_updating_mtime(Path) ->
  323. Time = calendar:universal_time(),
  324. _ = file:write_file_info(Path, #file_info{mtime=Time}, [{time, universal}]),
  325. ok.
  326. create_memory_tarball(Files) ->
  327. Path = tmp_path(),
  328. {ok, Tar} = r3_hex_erl_tar:open(Path, [write]),
  329. try
  330. add_files(Tar, Files)
  331. after
  332. ok = r3_hex_erl_tar:close(Tar)
  333. end,
  334. {ok, Tarball} = file:read_file(Path),
  335. ok = file:delete(Path),
  336. Tarball.
  337. tmp_path() ->
  338. "tmp_" ++ binary_to_list(encode_base16(crypto:strong_rand_bytes(32))).
  339. add_files(Tar, Files) when is_list(Files) ->
  340. lists:map(fun(File) -> add_file(Tar, File) end, Files).
  341. add_file(Tar, {Filename, Contents}) when is_list(Filename) and is_binary(Contents) ->
  342. ok = r3_hex_erl_tar:add(Tar, Contents, Filename, tar_opts());
  343. add_file(Tar, Filename) when is_list(Filename) ->
  344. add_file(Tar, {Filename, Filename});
  345. add_file(Tar, {Filename, AbsFilename}) when is_list(Filename), is_list(AbsFilename) ->
  346. {ok, FileInfo} = file:read_link_info(AbsFilename, []),
  347. case FileInfo#file_info.type of
  348. symlink ->
  349. ok = r3_hex_erl_tar:add(Tar, {Filename, AbsFilename}, tar_opts());
  350. directory ->
  351. case file:list_dir(AbsFilename) of
  352. {ok, []} ->
  353. r3_hex_erl_tar:add(Tar, {Filename, AbsFilename}, tar_opts());
  354. {ok, _} ->
  355. ok
  356. end;
  357. _ ->
  358. Mode = FileInfo#file_info.mode,
  359. {ok, Contents} = file:read_file(AbsFilename),
  360. ok = r3_hex_erl_tar:add(Tar, Contents, Filename, Mode, tar_opts())
  361. end.
  362. tar_opts() ->
  363. NixEpoch = calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}),
  364. Y2kEpoch = calendar:datetime_to_gregorian_seconds({{2000, 1, 1}, {0, 0, 0}}),
  365. Epoch = Y2kEpoch - NixEpoch,
  366. [{atime, Epoch}, {mtime, Epoch}, {ctime, Epoch}, {uid, 0}, {gid, 0}].
  367. %% Reproducible gzip by not setting mtime and OS
  368. %%
  369. %% From https://tools.ietf.org/html/rfc1952
  370. %%
  371. %% +---+---+---+---+---+---+---+---+---+---+
  372. %% |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
  373. %% +---+---+---+---+---+---+---+---+---+---+
  374. %%
  375. %% +=======================+
  376. %% |...compressed blocks...| (more-->)
  377. %% +=======================+
  378. %%
  379. %% +---+---+---+---+---+---+---+---+
  380. %% | CRC32 | ISIZE |
  381. %% +---+---+---+---+---+---+---+---+
  382. gzip(Uncompressed) ->
  383. Compressed = gzip_no_header(Uncompressed),
  384. Header = <<31, 139, 8, 0, 0, 0, 0, 0, 0, 0>>,
  385. Crc = erlang:crc32(Uncompressed),
  386. Size = byte_size(Uncompressed),
  387. Trailer = <<Crc:32/little, Size:32/little>>,
  388. iolist_to_binary([Header, Compressed, Trailer]).
  389. gzip_no_header(Uncompressed) ->
  390. Zstream = zlib:open(),
  391. try
  392. zlib:deflateInit(Zstream, default, deflated, -15, 8, default),
  393. Compressed = zlib:deflate(Zstream, Uncompressed, finish),
  394. zlib:deflateEnd(Zstream),
  395. iolist_to_binary(Compressed)
  396. after
  397. zlib:close(Zstream)
  398. end.
  399. %%====================================================================
  400. %% Helpers
  401. %%====================================================================
  402. binarify(Binary) when is_binary(Binary) -> Binary;
  403. binarify(Number) when is_number(Number) -> Number;
  404. binarify(Atom) when Atom == undefined orelse is_boolean(Atom) -> Atom;
  405. binarify(Atom) when is_atom(Atom) -> atom_to_binary(Atom, utf8);
  406. binarify(List) when is_list(List) ->
  407. [binarify(E) || E <- List];
  408. binarify({Key, Value}) ->
  409. {binarify(Key), binarify(Value)};
  410. binarify(Map) when is_map(Map) ->
  411. List = maps:to_list(Map),
  412. lists:map(fun({K, V}) -> binarify({K, V}) end, List).
  413. diff_keys(Map, RequiredKeys, OptionalKeys) ->
  414. Keys = maps:keys(Map),
  415. MissingKeys = RequiredKeys -- Keys,
  416. UnknownKeys = Keys -- (RequiredKeys ++ OptionalKeys),
  417. case {MissingKeys, UnknownKeys} of
  418. {[], []} ->
  419. ok;
  420. % Server should validate this but clients should not
  421. % {_, [_ | _]} ->
  422. % {error, {unknown_keys, UnknownKeys}};
  423. _ ->
  424. {error, {missing_keys, MissingKeys}}
  425. end.
  426. maybe_update_with(Key, Fun, Map) ->
  427. case maps:find(Key, Map) of
  428. {ok, Value} -> maps:put(Key, Fun(Value), Map);
  429. error -> Map
  430. end.
  431. try_into_map(List) ->
  432. try_into_map(fun(X) -> X end, List).
  433. try_into_map(Fun, Input) ->
  434. case is_list(Input) andalso lists:all(fun(E) -> is_tuple(E) andalso (tuple_size(E) == 2) end, Input) of
  435. true -> maps:from_list(lists:map(Fun, Input));
  436. false -> Input
  437. end.
  438. encode_base16(Binary) ->
  439. <<X:256/big-unsigned-integer>> = Binary,
  440. String = string:to_upper(lists:flatten(io_lib:format("~64.16.0b", [X]))),
  441. list_to_binary(String).
  442. %% Based on https://github.com/goj/base16/blob/master/src/base16.erl
  443. %% (C) 2012, Erlang Solutions Ltd.
  444. decode_base16(Base16) ->
  445. << <<(unhex(H) bsl 4 + unhex(L))>> || <<H,L>> <= Base16 >>.
  446. unhex(D) when $0 =< D andalso D =< $9 ->
  447. D - $0;
  448. unhex(D) when $a =< D andalso D =< $f ->
  449. 10 + D - $a;
  450. unhex(D) when $A =< D andalso D =< $F ->
  451. 10 + D - $A.