You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

507 regels
17 KiB

  1. %% Vendored from hex_core v0.5.0, do not edit manually
  2. -module(r3_hex_tarball).
  3. -export([create/2, create_docs/1, unpack/2, format_checksum/1, format_error/1]).
  4. -ifdef(TEST).
  5. -export([do_decode_metadata/1, gzip/1, normalize_requirements/1]).
  6. -endif.
  7. -define(VERSION, <<"3">>).
  8. -define(TARBALL_MAX_SIZE, 8 * 1024 * 1024).
  9. -define(TARBALL_MAX_UNCOMPRESSED_SIZE, 64 * 1024 * 1024).
  10. -define(BUILD_TOOL_FILES, [
  11. {<<"mix.exs">>, <<"mix">>},
  12. {<<"rebar.config">>, <<"rebar3">>},
  13. {<<"rebar">>, <<"rebar3">>},
  14. {<<"Makefile">>, <<"make">>},
  15. {<<"Makefile.win">>, <<"make">>}
  16. ]).
  17. -include_lib("kernel/include/file.hrl").
  18. -type checksum() :: binary().
  19. -type contents() :: #{filename() => binary()}.
  20. -type filename() :: string().
  21. -type files() :: [filename() | {filename(), filename()}] | contents().
  22. -type metadata() :: map().
  23. -type tarball() :: binary().
  24. %%====================================================================
  25. %% API functions
  26. %%====================================================================
  27. %% @doc
  28. %% Creates a package tarball.
  29. %%
  30. %% Examples:
  31. %%
  32. %% ```
  33. %% > Metadata = #{<<"name">> => <<"foo">>, <<"version">> => <<"1.0.0">>},
  34. %% > Files = [{"src/foo.erl", <<"-module(foo).">>}],
  35. %% > {ok, {Tarball, Checksum}} = r3_hex_tarball:create(Metadata, Files).
  36. %% > Tarball.
  37. %% <<86,69,...>>
  38. %% > Checksum.
  39. %% <<40,32,...>>
  40. %% '''
  41. %% @end
  42. -spec create(metadata(), files()) -> {ok, {tarball(), checksum()}}.
  43. create(Metadata, Files) ->
  44. MetadataBinary = encode_metadata(Metadata),
  45. ContentsTarball = create_memory_tarball(Files),
  46. ContentsTarballCompressed = gzip(ContentsTarball),
  47. Checksum = checksum(?VERSION, MetadataBinary, ContentsTarballCompressed),
  48. ChecksumBase16 = encode_base16(Checksum),
  49. OuterFiles = [
  50. {"VERSION", ?VERSION},
  51. {"CHECKSUM", ChecksumBase16},
  52. {"metadata.config", MetadataBinary},
  53. {"contents.tar.gz", ContentsTarballCompressed}
  54. ],
  55. Tarball = create_memory_tarball(OuterFiles),
  56. UncompressedSize = byte_size(ContentsTarball),
  57. case(byte_size(Tarball) > ?TARBALL_MAX_SIZE) or (UncompressedSize > ?TARBALL_MAX_UNCOMPRESSED_SIZE) of
  58. true ->
  59. {error, {tarball, too_big}};
  60. false ->
  61. {ok, {Tarball, Checksum}}
  62. end.
  63. %% @doc
  64. %% Creates a docs tarball.
  65. %%
  66. %% Examples:
  67. %%
  68. %% ```
  69. %% > Files = [{"doc/index.html", <<"Docs">>}],
  70. %% > {ok, {Tarball, Checksum}} = r3_hex_tarball:create_docs(Files).
  71. %% > Tarball.
  72. %% %%=> <<86,69,...>>
  73. %% > Checksum.
  74. %% %%=> <<40,32,...>>
  75. %% '''
  76. %% @end
  77. -spec create_docs(files()) -> {ok, {tarball(), checksum()}}.
  78. create_docs(Files) ->
  79. UncompressedTarball = create_memory_tarball(Files),
  80. UncompressedSize = byte_size(UncompressedTarball),
  81. Tarball = gzip(UncompressedTarball),
  82. Checksum = checksum(Tarball),
  83. Size = byte_size(Tarball),
  84. case(Size > ?TARBALL_MAX_SIZE) or (UncompressedSize > ?TARBALL_MAX_UNCOMPRESSED_SIZE) of
  85. true ->
  86. {error, {tarball, too_big}};
  87. false ->
  88. {ok, {Tarball, Checksum}}
  89. end.
  90. %% @doc
  91. %% Unpacks a package tarball.
  92. %%
  93. %% Examples:
  94. %%
  95. %% ```
  96. %% > r3_hex_tarball:unpack(Tarball, memory).
  97. %% {ok,#{checksum => <<...>>,
  98. %% contents => [{"src/foo.erl",<<"-module(foo).">>}],
  99. %% metadata => #{<<"name">> => <<"foo">>, ...}}}
  100. %%
  101. %% > r3_hex_tarball:unpack(Tarball, "path/to/unpack").
  102. %% {ok,#{checksum => <<...>>,
  103. %% metadata => #{<<"name">> => <<"foo">>, ...}}}
  104. %% '''
  105. -spec unpack(tarball(), memory) ->
  106. {ok, #{checksum => checksum(), metadata => metadata(), contents => contents()}} |
  107. {error, term()};
  108. (tarball(), filename()) ->
  109. {ok, #{checksum => checksum(), metadata => metadata()}} |
  110. {error, term()}.
  111. unpack(Tarball, _) when byte_size(Tarball) > ?TARBALL_MAX_SIZE ->
  112. {error, {tarball, too_big}};
  113. unpack(Tarball, Output) ->
  114. case r3_hex_erl_tar:extract({binary, Tarball}, [memory]) of
  115. {ok, []} ->
  116. {error, {tarball, empty}};
  117. {ok, FileList} ->
  118. do_unpack(maps:from_list(FileList), Output);
  119. {error, Reason} ->
  120. {error, {tarball, Reason}}
  121. end.
  122. %% @doc
  123. %% Returns base16-encoded representation of checksum.
  124. -spec format_checksum(checksum()) -> binary().
  125. format_checksum(Checksum) ->
  126. encode_base16(Checksum).
  127. %% @doc
  128. %% Converts an error reason term to a human-readable error message string.
  129. -spec format_error(term()) -> string().
  130. format_error({tarball, empty}) -> "empty tarball";
  131. format_error({tarball, too_big}) -> "tarball is too big";
  132. format_error({tarball, {missing_files, Files}}) -> io_lib:format("missing files: ~p", [Files]);
  133. format_error({tarball, {invalid_files, Files}}) -> io_lib:format("invalid files: ~p", [Files]);
  134. format_error({tarball, {bad_version, Vsn}}) -> io_lib:format("unsupported version: ~p", [Vsn]);
  135. format_error({tarball, invalid_checksum}) -> "invalid tarball checksum";
  136. format_error({tarball, Reason}) -> "tarball error, " ++ r3_hex_erl_tar:format_error(Reason);
  137. format_error({inner_tarball, Reason}) -> "inner tarball error, " ++ r3_hex_erl_tar:format_error(Reason);
  138. format_error({metadata, invalid_terms}) -> "error reading package metadata: invalid terms";
  139. format_error({metadata, not_key_value}) -> "error reading package metadata: not in key-value format";
  140. format_error({metadata, Reason}) -> "error reading package metadata" ++ r3_safe_erl_term:format_error(Reason);
  141. format_error({checksum_mismatch, ExpectedChecksum, ActualChecksum}) ->
  142. io_lib:format(
  143. "tarball checksum mismatch~n~n" ++
  144. "Expected (base16-encoded): ~s~n" ++
  145. "Actual (base16-encoded): ~s",
  146. [encode_base16(ExpectedChecksum), encode_base16(ActualChecksum)]).
  147. %%====================================================================
  148. %% Internal functions
  149. %%====================================================================
  150. checksum(Version, MetadataBinary, ContentsBinary) ->
  151. Blob = <<Version/binary, MetadataBinary/binary, ContentsBinary/binary>>,
  152. crypto:hash(sha256, Blob).
  153. checksum(ContentsBinary) ->
  154. Blob = <<ContentsBinary/binary>>,
  155. crypto:hash(sha256, Blob).
  156. encode_metadata(Meta) ->
  157. Data = lists:map(
  158. fun(MetaPair) ->
  159. String = io_lib_pretty:print(binarify(MetaPair), [{encoding, utf8}]),
  160. unicode:characters_to_binary([String, ".\n"])
  161. end, maps:to_list(Meta)),
  162. iolist_to_binary(Data).
  163. do_unpack(Files, Output) ->
  164. State = #{
  165. checksum => undefined,
  166. contents => undefined,
  167. files => Files,
  168. metadata => undefined,
  169. output => Output
  170. },
  171. State1 = check_files(State),
  172. State2 = check_version(State1),
  173. State3 = check_checksum(State2),
  174. State4 = decode_metadata(State3),
  175. finish_unpack(State4).
  176. finish_unpack({error, _} = Error) ->
  177. Error;
  178. finish_unpack(#{metadata := Metadata, files := Files, output := Output}) ->
  179. _Version = maps:get("VERSION", Files),
  180. Checksum = decode_base16(maps:get("CHECKSUM", Files)),
  181. ContentsBinary = maps:get("contents.tar.gz", Files),
  182. case unpack_tarball(ContentsBinary, Output) of
  183. ok ->
  184. copy_metadata_config(Output, maps:get("metadata.config", Files)),
  185. {ok, #{checksum => Checksum, metadata => Metadata}};
  186. {ok, Contents} ->
  187. {ok, #{checksum => Checksum, metadata => Metadata, contents => Contents}};
  188. {error, Reason} ->
  189. {error, {inner_tarball, Reason}}
  190. end.
  191. copy_metadata_config(Output, MetadataBinary) ->
  192. ok = file:write_file(filename:join(Output, "hex_metadata.config"), MetadataBinary).
  193. check_files(#{files := Files} = State) ->
  194. RequiredFiles = ["VERSION", "CHECKSUM", "metadata.config", "contents.tar.gz"],
  195. case diff_keys(Files, RequiredFiles, []) of
  196. ok ->
  197. State;
  198. {error, {missing_keys, Keys}} ->
  199. {error, {tarball, {missing_files, Keys}}};
  200. {error, {unknown_keys, Keys}} ->
  201. {error, {tarball, {invalid_files, Keys}}}
  202. end.
  203. check_version({error, _} = Error) ->
  204. Error;
  205. check_version(#{files := Files} = State) ->
  206. case maps:get("VERSION", Files) of
  207. <<"3">> ->
  208. State;
  209. Version ->
  210. {error, {tarball, {bad_version, Version}}}
  211. end.
  212. check_checksum({error, _} = Error) ->
  213. Error;
  214. check_checksum(#{files := Files} = State) ->
  215. ChecksumBase16 = maps:get("CHECKSUM", Files),
  216. ExpectedChecksum = decode_base16(ChecksumBase16),
  217. Version = maps:get("VERSION", Files),
  218. MetadataBinary = maps:get("metadata.config", Files),
  219. ContentsBinary = maps:get("contents.tar.gz", Files),
  220. ActualChecksum = checksum(Version, MetadataBinary, ContentsBinary),
  221. if
  222. byte_size(ExpectedChecksum) /= 32 ->
  223. {error, {tarball, invalid_checksum}};
  224. ExpectedChecksum == ActualChecksum ->
  225. maps:put(checksum, ExpectedChecksum, State);
  226. true ->
  227. {error, {tarball, {checksum_mismatch, ExpectedChecksum, ActualChecksum}}}
  228. end.
  229. decode_metadata({error, _} = Error) ->
  230. Error;
  231. decode_metadata(#{files := #{"metadata.config" := Binary}} = State) when is_binary(Binary) ->
  232. case do_decode_metadata(Binary) of
  233. #{} = Metadata -> maps:put(metadata, normalize_metadata(Metadata), State);
  234. Other -> Other
  235. end.
  236. do_decode_metadata(Binary) when is_binary(Binary) ->
  237. {ok, String} = characters_to_list(Binary),
  238. case r3_safe_erl_term:string(String) of
  239. {ok, Tokens, _Line} ->
  240. try
  241. Terms = r3_safe_erl_term:terms(Tokens),
  242. maps:from_list(Terms)
  243. catch
  244. error:function_clause ->
  245. {error, {metadata, invalid_terms}};
  246. error:badarg ->
  247. {error, {metadata, not_key_value}}
  248. end;
  249. {error, {_Line, r3_safe_erl_term, Reason}, _Line2} ->
  250. {error, {metadata, Reason}}
  251. end.
  252. characters_to_list(Binary) ->
  253. case unicode:characters_to_list(Binary) of
  254. List when is_list(List) ->
  255. {ok, List};
  256. {error, _, _} ->
  257. case unicode:characters_to_list(Binary, latin1) of
  258. List when is_list(List) -> {ok, List};
  259. Other -> Other
  260. end
  261. end.
  262. normalize_metadata(Metadata1) ->
  263. Metadata2 = maybe_update_with(<<"requirements">>, fun normalize_requirements/1, Metadata1),
  264. Metadata3 = maybe_update_with(<<"links">>, fun try_into_map/1, Metadata2),
  265. Metadata4 = maybe_update_with(<<"extra">>, fun try_into_map/1, Metadata3),
  266. guess_build_tools(Metadata4).
  267. normalize_requirements(Requirements) ->
  268. case is_list(Requirements) andalso (Requirements /= []) andalso is_list(hd(Requirements)) of
  269. true ->
  270. maps:from_list(lists:map(fun normalize_legacy_requirement/1, Requirements));
  271. false ->
  272. try_into_map(fun normalize_normal_requirement/1, Requirements)
  273. end.
  274. normalize_normal_requirement({Name, Requirement}) ->
  275. {Name, try_into_map(Requirement)}.
  276. normalize_legacy_requirement(Requirement) ->
  277. Map = maps:from_list(Requirement),
  278. Name = maps:get(<<"name">>, Map),
  279. {Name, maps:without([<<"name">>], Map)}.
  280. guess_build_tools(#{<<"build_tools">> := BuildTools} = Metadata) when is_list(BuildTools) ->
  281. Metadata;
  282. guess_build_tools(#{<<"files">> := Filenames} = Metadata) ->
  283. BaseFiles = [Filename || Filename <- Filenames, filename:dirname(binary_to_list(Filename)) == "."],
  284. BuildTools = lists:usort([Tool || {Filename, Tool} <- ?BUILD_TOOL_FILES, lists:member(Filename, BaseFiles)]),
  285. Metadata#{<<"build_tools">> => BuildTools};
  286. guess_build_tools(Metadata) ->
  287. Metadata.
  288. %%====================================================================
  289. %% Tar Helpers
  290. %%====================================================================
  291. unpack_tarball(ContentsBinary, memory) ->
  292. r3_hex_erl_tar:extract({binary, ContentsBinary}, [memory, compressed]);
  293. unpack_tarball(ContentsBinary, Output) ->
  294. case r3_hex_erl_tar:extract({binary, ContentsBinary}, [{cwd, Output}, compressed]) of
  295. ok ->
  296. [try_updating_mtime(filename:join(Output, Path)) || Path <- filelib:wildcard("**", Output)],
  297. ok;
  298. Other ->
  299. Other
  300. end.
  301. %% let it silently fail for bad symlinks
  302. try_updating_mtime(Path) ->
  303. Time = calendar:universal_time(),
  304. _ = file:write_file_info(Path, #file_info{mtime=Time}, [{time, universal}]),
  305. ok.
  306. create_memory_tarball(Files) ->
  307. Path = tmp_path(),
  308. {ok, Tar} = r3_hex_erl_tar:open(Path, [write]),
  309. try
  310. add_files(Tar, Files)
  311. after
  312. ok = r3_hex_erl_tar:close(Tar)
  313. end,
  314. {ok, Tarball} = file:read_file(Path),
  315. ok = file:delete(Path),
  316. Tarball.
  317. tmp_path() ->
  318. "tmp_" ++ binary_to_list(encode_base16(crypto:strong_rand_bytes(32))).
  319. add_files(Tar, Files) when is_list(Files) ->
  320. lists:map(fun(File) -> add_file(Tar, File) end, Files).
  321. add_file(Tar, {Filename, Contents}) when is_list(Filename) and is_binary(Contents) ->
  322. ok = r3_hex_erl_tar:add(Tar, Contents, Filename, tar_opts());
  323. add_file(Tar, Filename) when is_list(Filename) ->
  324. add_file(Tar, {Filename, Filename});
  325. add_file(Tar, {Filename, AbsFilename}) when is_list(Filename), is_list(AbsFilename) ->
  326. {ok, FileInfo} = file:read_link_info(AbsFilename, []),
  327. case FileInfo#file_info.type of
  328. symlink ->
  329. ok = r3_hex_erl_tar:add(Tar, {Filename, AbsFilename}, tar_opts());
  330. directory ->
  331. case file:list_dir(AbsFilename) of
  332. {ok, []} ->
  333. r3_hex_erl_tar:add(Tar, {Filename, AbsFilename}, tar_opts());
  334. {ok, _} ->
  335. ok
  336. end;
  337. _ ->
  338. Mode = FileInfo#file_info.mode,
  339. {ok, Contents} = file:read_file(AbsFilename),
  340. ok = r3_hex_erl_tar:add(Tar, Contents, Filename, Mode, tar_opts())
  341. end.
  342. tar_opts() ->
  343. NixEpoch = calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}),
  344. Y2kEpoch = calendar:datetime_to_gregorian_seconds({{2000, 1, 1}, {0, 0, 0}}),
  345. Epoch = Y2kEpoch - NixEpoch,
  346. [{atime, Epoch}, {mtime, Epoch}, {ctime, Epoch}, {uid, 0}, {gid, 0}].
  347. %% Reproducible gzip by not setting mtime and OS
  348. %%
  349. %% From https://tools.ietf.org/html/rfc1952
  350. %%
  351. %% +---+---+---+---+---+---+---+---+---+---+
  352. %% |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
  353. %% +---+---+---+---+---+---+---+---+---+---+
  354. %%
  355. %% +=======================+
  356. %% |...compressed blocks...| (more-->)
  357. %% +=======================+
  358. %%
  359. %% +---+---+---+---+---+---+---+---+
  360. %% | CRC32 | ISIZE |
  361. %% +---+---+---+---+---+---+---+---+
  362. gzip(Uncompressed) ->
  363. Compressed = gzip_no_header(Uncompressed),
  364. Header = <<31, 139, 8, 0, 0, 0, 0, 0, 0, 0>>,
  365. Crc = erlang:crc32(Uncompressed),
  366. Size = byte_size(Uncompressed),
  367. Trailer = <<Crc:32/little, Size:32/little>>,
  368. iolist_to_binary([Header, Compressed, Trailer]).
  369. gzip_no_header(Uncompressed) ->
  370. Zstream = zlib:open(),
  371. try
  372. zlib:deflateInit(Zstream, default, deflated, -15, 8, default),
  373. Compressed = zlib:deflate(Zstream, Uncompressed, finish),
  374. zlib:deflateEnd(Zstream),
  375. iolist_to_binary(Compressed)
  376. after
  377. zlib:close(Zstream)
  378. end.
  379. %%====================================================================
  380. %% Helpers
  381. %%====================================================================
  382. binarify(Binary) when is_binary(Binary) -> Binary;
  383. binarify(Number) when is_number(Number) -> Number;
  384. binarify(Atom) when Atom == undefined orelse is_boolean(Atom) -> Atom;
  385. binarify(Atom) when is_atom(Atom) -> atom_to_binary(Atom, utf8);
  386. binarify(List) when is_list(List) ->
  387. [binarify(E) || E <- List];
  388. binarify({Key, Value}) ->
  389. {binarify(Key), binarify(Value)};
  390. binarify(Map) when is_map(Map) ->
  391. List = maps:to_list(Map),
  392. lists:map(fun({K, V}) -> binarify({K, V}) end, List).
  393. diff_keys(Map, RequiredKeys, OptionalKeys) ->
  394. Keys = maps:keys(Map),
  395. MissingKeys = RequiredKeys -- Keys,
  396. UnknownKeys = Keys -- (RequiredKeys ++ OptionalKeys),
  397. case {MissingKeys, UnknownKeys} of
  398. {[], []} ->
  399. ok;
  400. {_, [_ | _]} ->
  401. {error, {unknown_keys, UnknownKeys}};
  402. _ ->
  403. {error, {missing_keys, MissingKeys}}
  404. end.
  405. maybe_update_with(Key, Fun, Map) ->
  406. case maps:find(Key, Map) of
  407. {ok, Value} -> maps:put(Key, Fun(Value), Map);
  408. error -> Map
  409. end.
  410. try_into_map(List) ->
  411. try_into_map(fun(X) -> X end, List).
  412. try_into_map(Fun, Input) ->
  413. case is_list(Input) andalso lists:all(fun(E) -> is_tuple(E) andalso (tuple_size(E) == 2) end, Input) of
  414. true -> maps:from_list(lists:map(Fun, Input));
  415. false -> Input
  416. end.
  417. encode_base16(Binary) ->
  418. <<X:256/big-unsigned-integer>> = Binary,
  419. String = string:to_upper(lists:flatten(io_lib:format("~64.16.0b", [X]))),
  420. list_to_binary(String).
  421. %% Based on https://github.com/goj/base16/blob/master/src/base16.erl
  422. %% (C) 2012, Erlang Solutions Ltd.
  423. decode_base16(Base16) ->
  424. << <<(unhex(H) bsl 4 + unhex(L))>> || <<H,L>> <= Base16 >>.
  425. unhex(D) when $0 =< D andalso D =< $9 ->
  426. D - $0;
  427. unhex(D) when $a =< D andalso D =< $f ->
  428. 10 + D - $a;
  429. unhex(D) when $A =< D andalso D =< $F ->
  430. 10 + D - $A.