25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

1966 satır
74 KiB

  1. %% Vendored from hex_core v0.5.0, do not edit manually
  2. %% @private
  3. %% Copied from https://github.com/erlang/otp/blob/OTP-20.0.1/lib/stdlib/src/erl_tar.erl
  4. %% with modifications:
  5. %% - Change module name to `r3_hex_erl_tar`
  6. %% - Set tar mtimes to 0 and remove dependency on :os.system_time/1
  7. %% - Preserve modes when building tarball
  8. %% - Do not crash if failing to write tar
  9. %% - Allow setting file_info opts on :r3_hex_erl_tar.add
  10. %%
  11. %% %CopyrightBegin%
  12. %%
  13. %% Copyright Ericsson AB 1997-2017. All Rights Reserved.
  14. %%
  15. %% Licensed under the Apache License, Version 2.0 (the "License");
  16. %% you may not use this file except in compliance with the License.
  17. %% You may obtain a copy of the License at
  18. %%
  19. %% http://www.apache.org/licenses/LICENSE-2.0
  20. %%
  21. %% Unless required by applicable law or agreed to in writing, software
  22. %% distributed under the License is distributed on an "AS IS" BASIS,
  23. %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  24. %% See the License for the specific language governing permissions and
  25. %% limitations under the License.
  26. %%
  27. %% %CopyrightEnd%
  28. %%
  29. %% This module implements extraction/creation of tar archives.
  30. %% It supports reading most common tar formats, namely V7, STAR,
  31. %% USTAR, GNU, BSD/libarchive, and PAX. It produces archives in USTAR
  32. %% format, unless it must use PAX headers, in which case it produces PAX
  33. %% format.
  34. %%
  35. %% The following references where used:
  36. %% http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
  37. %% http://www.gnu.org/software/tar/manual/html_node/Standard.html
  38. %% http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
  39. -module(r3_hex_erl_tar).
  40. -export([init/3,
  41. create/2, create/3,
  42. extract/1, extract/2,
  43. table/1, table/2, t/1, tt/1,
  44. open/2, close/1,
  45. add/3, add/4, add/5,
  46. format_error/1]).
  47. -include_lib("kernel/include/file.hrl").
  48. -include_lib("r3_hex_erl_tar.hrl").
  49. %% Converts the short error reason to a descriptive string.
  50. -spec format_error(term()) -> string().
  51. format_error(invalid_tar_checksum) ->
  52. "Checksum failed";
  53. format_error(bad_header) ->
  54. "Unrecognized tar header format";
  55. format_error({bad_header, Reason}) ->
  56. lists:flatten(io_lib:format("Unrecognized tar header format: ~p", [Reason]));
  57. format_error({invalid_header, negative_size}) ->
  58. "Invalid header: negative size";
  59. format_error(invalid_sparse_header_size) ->
  60. "Invalid sparse header: negative size";
  61. format_error(invalid_sparse_map_entry) ->
  62. "Invalid sparse map entry";
  63. format_error({invalid_sparse_map_entry, Reason}) ->
  64. lists:flatten(io_lib:format("Invalid sparse map entry: ~p", [Reason]));
  65. format_error(invalid_end_of_archive) ->
  66. "Invalid end of archive";
  67. format_error(eof) ->
  68. "Unexpected end of file";
  69. format_error(integer_overflow) ->
  70. "Failed to parse numeric: integer overflow";
  71. format_error({misaligned_read, Pos}) ->
  72. lists:flatten(io_lib:format("Read a block which was misaligned: block_size=~p pos=~p",
  73. [?BLOCK_SIZE, Pos]));
  74. format_error(invalid_gnu_1_0_sparsemap) ->
  75. "Invalid GNU sparse map (version 1.0)";
  76. format_error({invalid_gnu_0_1_sparsemap, Format}) ->
  77. lists:flatten(io_lib:format("Invalid GNU sparse map (version ~s)", [Format]));
  78. format_error(unsafe_path) ->
  79. "The path points above the current working directory";
  80. format_error({Name,Reason}) ->
  81. lists:flatten(io_lib:format("~ts: ~ts", [Name,format_error(Reason)]));
  82. format_error(Atom) when is_atom(Atom) ->
  83. file:format_error(Atom);
  84. format_error(Term) ->
  85. lists:flatten(io_lib:format("~tp", [Term])).
  86. %% Initializes a new reader given a custom file handle and I/O wrappers
  87. -spec init(handle(), write | read, file_op()) -> {ok, reader()} | {error, badarg}.
  88. init(Handle, AccessMode, Fun) when is_function(Fun, 2) ->
  89. Reader = #reader{handle=Handle,access=AccessMode,func=Fun},
  90. {ok, Pos, Reader2} = do_position(Reader, {cur, 0}),
  91. {ok, Reader2#reader{pos=Pos}};
  92. init(_Handle, _AccessMode, _Fun) ->
  93. {error, badarg}.
  94. %%%================================================================
  95. %% Extracts all files from the tar file Name.
  96. -spec extract(open_handle()) -> ok | {error, term()}.
  97. extract(Name) ->
  98. extract(Name, []).
  99. %% Extracts (all) files from the tar file Name.
  100. %% Options accepted:
  101. %% - cooked: Opens the tar file without mode `raw`
  102. %% - compressed: Uncompresses the tar file when reading
  103. %% - memory: Returns the tar contents as a list of tuples {Name, Bin}
  104. %% - keep_old_files: Extracted files will not overwrite the destination
  105. %% - {files, ListOfFilesToExtract}: Only extract ListOfFilesToExtract
  106. %% - verbose: Prints verbose information about the extraction,
  107. %% - {cwd, AbsoluteDir}: Sets the current working directory for the extraction
  108. -spec extract(open_handle(), [extract_opt()]) ->
  109. ok
  110. | {ok, [{string(), binary()}]}
  111. | {error, term()}.
  112. extract({binary, Bin}, Opts) when is_list(Opts) ->
  113. do_extract({binary, Bin}, Opts);
  114. extract({file, Fd}, Opts) when is_list(Opts) ->
  115. do_extract({file, Fd}, Opts);
  116. extract(#reader{}=Reader, Opts) when is_list(Opts) ->
  117. do_extract(Reader, Opts);
  118. extract(Name, Opts) when is_list(Name); is_binary(Name), is_list(Opts) ->
  119. do_extract(Name, Opts).
  120. do_extract(Handle, Opts) when is_list(Opts) ->
  121. Opts2 = extract_opts(Opts),
  122. Acc = if Opts2#read_opts.output =:= memory -> []; true -> ok end,
  123. foldl_read(Handle, fun extract1/4, Acc, Opts2).
  124. extract1(eof, Reader, _, Acc) when is_list(Acc) ->
  125. {ok, {ok, lists:reverse(Acc)}, Reader};
  126. extract1(eof, Reader, _, leading_slash) ->
  127. error_logger:info_msg("erl_tar: removed leading '/' from member names\n"),
  128. {ok, ok, Reader};
  129. extract1(eof, Reader, _, Acc) ->
  130. {ok, Acc, Reader};
  131. extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) ->
  132. case check_extract(Name, Opts) of
  133. true ->
  134. case do_read(Reader0, Size) of
  135. {ok, Bin, Reader1} ->
  136. Acc = extract2(Header, Bin, Opts, Acc0),
  137. {ok, Acc, Reader1};
  138. {error, _} = Err ->
  139. throw(Err)
  140. end;
  141. false ->
  142. {ok, Acc0, skip_file(Reader0)}
  143. end.
  144. extract2(Header, Bin, Opts, Acc) ->
  145. case write_extracted_element(Header, Bin, Opts) of
  146. ok ->
  147. case Header of
  148. #tar_header{name="/"++_} ->
  149. leading_slash;
  150. #tar_header{} ->
  151. Acc
  152. end;
  153. {ok, NameBin} when is_list(Acc) ->
  154. [NameBin | Acc];
  155. {error, _} = Err ->
  156. throw(Err)
  157. end.
  158. %% Checks if the file Name should be extracted.
  159. check_extract(_, #read_opts{files=all}) ->
  160. true;
  161. check_extract(Name, #read_opts{files=Files}) ->
  162. ordsets:is_element(Name, Files).
  163. %%%================================================================
  164. %% The following table functions produce a list of information about
  165. %% the files contained in the archive.
  166. -type filename() :: string().
  167. -type typeflag() :: regular | link | symlink |
  168. char | block | directory |
  169. fifo | reserved | unknown.
  170. -type mode() :: non_neg_integer().
  171. -type uid() :: non_neg_integer().
  172. -type gid() :: non_neg_integer().
  173. -type tar_entry() :: {filename(),
  174. typeflag(),
  175. non_neg_integer(),
  176. tar_time(),
  177. mode(),
  178. uid(),
  179. gid()}.
  180. %% Returns a list of names of the files in the tar file Name.
  181. -spec table(open_handle()) -> {ok, [string()]} | {error, term()}.
  182. table(Name) ->
  183. table(Name, []).
  184. %% Returns a list of names of the files in the tar file Name.
  185. %% Options accepted: compressed, verbose, cooked.
  186. -spec table(open_handle(), [compressed | verbose | cooked]) ->
  187. {ok, [tar_entry()]} | {error, term()}.
  188. table(Name, Opts) when is_list(Opts) ->
  189. foldl_read(Name, fun table1/4, [], table_opts(Opts)).
  190. table1(eof, Reader, _, Result) ->
  191. {ok, {ok, lists:reverse(Result)}, Reader};
  192. table1(#tar_header{}=Header, Reader, #read_opts{verbose=Verbose}, Result) ->
  193. Attrs = table1_attrs(Header, Verbose),
  194. Reader2 = skip_file(Reader),
  195. {ok, [Attrs|Result], Reader2}.
  196. %% Extracts attributes relevant to table1's output
  197. table1_attrs(#tar_header{typeflag=Typeflag,mode=Mode}=Header, true) ->
  198. Type = typeflag(Typeflag),
  199. Name = Header#tar_header.name,
  200. Mtime = Header#tar_header.mtime,
  201. Uid = Header#tar_header.uid,
  202. Gid = Header#tar_header.gid,
  203. Size = Header#tar_header.size,
  204. {Name, Type, Size, Mtime, Mode, Uid, Gid};
  205. table1_attrs(#tar_header{name=Name}, _Verbose) ->
  206. Name.
  207. typeflag(?TYPE_REGULAR) -> regular;
  208. typeflag(?TYPE_REGULAR_A) -> regular;
  209. typeflag(?TYPE_GNU_SPARSE) -> regular;
  210. typeflag(?TYPE_CONT) -> regular;
  211. typeflag(?TYPE_LINK) -> link;
  212. typeflag(?TYPE_SYMLINK) -> symlink;
  213. typeflag(?TYPE_CHAR) -> char;
  214. typeflag(?TYPE_BLOCK) -> block;
  215. typeflag(?TYPE_DIR) -> directory;
  216. typeflag(?TYPE_FIFO) -> fifo;
  217. typeflag(_) -> unknown.
  218. %%%================================================================
  219. %% Comments for printing the contents of a tape archive,
  220. %% meant to be invoked from the shell.
  221. %% Prints each filename in the archive
  222. -spec t(file:filename()) -> ok | {error, term()}.
  223. t(Name) when is_list(Name); is_binary(Name) ->
  224. case table(Name) of
  225. {ok, List} ->
  226. lists:foreach(fun(N) -> ok = io:format("~ts\n", [N]) end, List);
  227. Error ->
  228. Error
  229. end.
  230. %% Prints verbose information about each file in the archive
  231. -spec tt(open_handle()) -> ok | {error, term()}.
  232. tt(Name) ->
  233. case table(Name, [verbose]) of
  234. {ok, List} ->
  235. lists:foreach(fun print_header/1, List);
  236. Error ->
  237. Error
  238. end.
  239. %% Used by tt/1 to print a tar_entry tuple
  240. -spec print_header(tar_entry()) -> ok.
  241. print_header({Name, Type, Size, Mtime, Mode, Uid, Gid}) ->
  242. io:format("~s~s ~4w/~-4w ~7w ~s ~s\n",
  243. [type_to_string(Type), mode_to_string(Mode),
  244. Uid, Gid, Size, time_to_string(Mtime), Name]).
  245. type_to_string(regular) -> "-";
  246. type_to_string(directory) -> "d";
  247. type_to_string(link) -> "l";
  248. type_to_string(symlink) -> "s";
  249. type_to_string(char) -> "c";
  250. type_to_string(block) -> "b";
  251. type_to_string(fifo) -> "f";
  252. type_to_string(unknown) -> "?".
  253. %% Converts a numeric mode to its human-readable representation
  254. mode_to_string(Mode) ->
  255. mode_to_string(Mode, "xwrxwrxwr", []).
  256. mode_to_string(Mode, [C|T], Acc) when Mode band 1 =:= 1 ->
  257. mode_to_string(Mode bsr 1, T, [C|Acc]);
  258. mode_to_string(Mode, [_|T], Acc) ->
  259. mode_to_string(Mode bsr 1, T, [$-|Acc]);
  260. mode_to_string(_, [], Acc) ->
  261. Acc.
  262. %% Converts a tar_time() (POSIX time) to a readable string
  263. time_to_string(Secs0) ->
  264. Epoch = calendar:datetime_to_gregorian_seconds(?EPOCH),
  265. Secs = Epoch + Secs0,
  266. DateTime0 = calendar:gregorian_seconds_to_datetime(Secs),
  267. DateTime = calendar:universal_time_to_local_time(DateTime0),
  268. {{Y, Mon, Day}, {H, Min, _}} = DateTime,
  269. io_lib:format("~s ~2w ~s:~s ~w", [month(Mon), Day, two_d(H), two_d(Min), Y]).
  270. two_d(N) ->
  271. tl(integer_to_list(N + 100)).
  272. month(1) -> "Jan";
  273. month(2) -> "Feb";
  274. month(3) -> "Mar";
  275. month(4) -> "Apr";
  276. month(5) -> "May";
  277. month(6) -> "Jun";
  278. month(7) -> "Jul";
  279. month(8) -> "Aug";
  280. month(9) -> "Sep";
  281. month(10) -> "Oct";
  282. month(11) -> "Nov";
  283. month(12) -> "Dec".
  284. %%%================================================================
  285. %% The open function with friends is to keep the file and binary api of this module
  286. -type open_handle() :: file:filename()
  287. | {binary, binary()}
  288. | {file, term()}.
  289. -spec open(open_handle(), [write | compressed | cooked]) ->
  290. {ok, reader()} | {error, term()}.
  291. open({binary, Bin}, Mode) when is_binary(Bin) ->
  292. do_open({binary, Bin}, Mode);
  293. open({file, Fd}, Mode) ->
  294. do_open({file, Fd}, Mode);
  295. open(Name, Mode) when is_list(Name); is_binary(Name) ->
  296. do_open(Name, Mode).
  297. do_open(Name, Mode) when is_list(Mode) ->
  298. case open_mode(Mode) of
  299. {ok, Access, Raw, Opts} ->
  300. open1(Name, Access, Raw, Opts);
  301. {error, Reason} ->
  302. {error, {Name, Reason}}
  303. end.
  304. open1({binary,Bin}, read, _Raw, Opts) when is_binary(Bin) ->
  305. case file:open(Bin, [ram,binary,read]) of
  306. {ok,File} ->
  307. _ = [ram_file:uncompress(File) || Opts =:= [compressed]],
  308. {ok, #reader{handle=File,access=read,func=fun file_op/2}};
  309. Error ->
  310. Error
  311. end;
  312. open1({file, Fd}, read, _Raw, _Opts) ->
  313. Reader = #reader{handle=Fd,access=read,func=fun file_op/2},
  314. case do_position(Reader, {cur, 0}) of
  315. {ok, Pos, Reader2} ->
  316. {ok, Reader2#reader{pos=Pos}};
  317. {error, _} = Err ->
  318. Err
  319. end;
  320. open1(Name, Access, Raw, Opts) when is_list(Name) or is_binary(Name) ->
  321. case file:open(Name, Raw ++ [binary, Access|Opts]) of
  322. {ok, File} ->
  323. {ok, #reader{handle=File,access=Access,func=fun file_op/2}};
  324. {error, Reason} ->
  325. {error, {Name, Reason}}
  326. end.
  327. open_mode(Mode) ->
  328. open_mode(Mode, false, [raw], []).
  329. open_mode(read, _, Raw, _) ->
  330. {ok, read, Raw, []};
  331. open_mode(write, _, Raw, _) ->
  332. {ok, write, Raw, []};
  333. open_mode([read|Rest], false, Raw, Opts) ->
  334. open_mode(Rest, read, Raw, Opts);
  335. open_mode([write|Rest], false, Raw, Opts) ->
  336. open_mode(Rest, write, Raw, Opts);
  337. open_mode([compressed|Rest], Access, Raw, Opts) ->
  338. open_mode(Rest, Access, Raw, [compressed|Opts]);
  339. open_mode([cooked|Rest], Access, _Raw, Opts) ->
  340. open_mode(Rest, Access, [], Opts);
  341. open_mode([], Access, Raw, Opts) ->
  342. {ok, Access, Raw, Opts};
  343. open_mode(_, _, _, _) ->
  344. {error, einval}.
  345. file_op(write, {Fd, Data}) ->
  346. file:write(Fd, Data);
  347. file_op(position, {Fd, Pos}) ->
  348. file:position(Fd, Pos);
  349. file_op(read2, {Fd, Size}) ->
  350. file:read(Fd, Size);
  351. file_op(close, Fd) ->
  352. file:close(Fd).
  353. %% Closes a tar archive.
  354. -spec close(reader()) -> ok | {error, term()}.
  355. close(#reader{access=read}=Reader) ->
  356. ok = do_close(Reader);
  357. close(#reader{access=write}=Reader) ->
  358. {ok, Reader2} = pad_file(Reader),
  359. ok = do_close(Reader2),
  360. ok;
  361. close(_) ->
  362. {error, einval}.
  363. pad_file(#reader{pos=Pos}=Reader) ->
  364. %% There must be at least two zero blocks at the end.
  365. PadCurrent = skip_padding(Pos+?BLOCK_SIZE),
  366. Padding = <<0:PadCurrent/unit:8>>,
  367. do_write(Reader, [Padding, ?ZERO_BLOCK, ?ZERO_BLOCK]).
  368. %%%================================================================
  369. %% Creation/modification of tar archives
  370. %% Creates a tar file Name containing the given files.
  371. -spec create(file:filename(), filelist()) -> ok | {error, {string(), term()}}.
  372. create(Name, FileList) when is_list(Name); is_binary(Name) ->
  373. create(Name, FileList, []).
  374. %% Creates a tar archive Name containing the given files.
  375. %% Accepted options: verbose, compressed, cooked
  376. -spec create(file:filename(), filelist(), [create_opt()]) ->
  377. ok | {error, term()} | {error, {string(), term()}}.
  378. create(Name, FileList, Options) when is_list(Name); is_binary(Name) ->
  379. Mode = lists:filter(fun(X) -> (X=:=compressed) or (X=:=cooked)
  380. end, Options),
  381. case open(Name, [write|Mode]) of
  382. {ok, TarFile} ->
  383. do_create(TarFile, FileList, Options);
  384. {error, _} = Err ->
  385. Err
  386. end.
  387. do_create(TarFile, [], _Opts) ->
  388. close(TarFile);
  389. do_create(TarFile, [{NameInArchive, NameOrBin}|Rest], Opts) ->
  390. case add(TarFile, NameOrBin, NameInArchive, Opts) of
  391. ok ->
  392. do_create(TarFile, Rest, Opts);
  393. {error, _} = Err ->
  394. _ = close(TarFile),
  395. Err
  396. end;
  397. do_create(TarFile, [Name|Rest], Opts) ->
  398. case add(TarFile, Name, Name, Opts) of
  399. ok ->
  400. do_create(TarFile, Rest, Opts);
  401. {error, _} = Err ->
  402. _ = close(TarFile),
  403. Err
  404. end.
  405. %% Adds a file to a tape archive.
  406. -type add_type() :: string()
  407. | {string(), string()}
  408. | {string(), binary()}.
  409. -spec add(reader(), add_type(), [add_opt()]) -> ok | {error, term()}.
  410. add(Reader, {NameInArchive, Name}, Opts)
  411. when is_list(NameInArchive), is_list(Name) ->
  412. do_add(Reader, Name, NameInArchive, undefined, Opts);
  413. add(Reader, {NameInArchive, Bin}, Opts)
  414. when is_list(NameInArchive), is_binary(Bin) ->
  415. do_add(Reader, Bin, NameInArchive, undefined, Opts);
  416. add(Reader, {NameInArchive, Bin, Mode}, Opts)
  417. when is_list(NameInArchive), is_binary(Bin), is_integer(Mode) ->
  418. do_add(Reader, Bin, NameInArchive, Mode, Opts);
  419. add(Reader, Name, Opts) when is_list(Name) ->
  420. do_add(Reader, Name, Name, undefined, Opts).
  421. -spec add(reader(), string() | binary(), string(), [add_opt()]) ->
  422. ok | {error, term()}.
  423. add(Reader, NameOrBin, NameInArchive, Options)
  424. when is_list(NameOrBin); is_binary(NameOrBin),
  425. is_list(NameInArchive), is_list(Options) ->
  426. do_add(Reader, NameOrBin, NameInArchive, undefined, Options).
  427. -spec add(reader(), string() | binary(), string(), integer(), [add_opt()]) ->
  428. ok | {error, term()}.
  429. add(Reader, NameOrBin, NameInArchive, Mode, Options)
  430. when is_list(NameOrBin); is_binary(NameOrBin),
  431. is_list(NameInArchive), is_integer(Mode), is_list(Options) ->
  432. do_add(Reader, NameOrBin, NameInArchive, Mode, Options).
  433. do_add(#reader{access=write}=Reader, Name, NameInArchive, Mode, Options)
  434. when is_list(NameInArchive), is_list(Options) ->
  435. RF = fun(F) -> apply_file_info_opts(Options, file:read_link_info(F, [{time, posix}])) end,
  436. Opts = #add_opts{read_info=RF},
  437. add1(Reader, Name, NameInArchive, Mode, add_opts(Options, Options, Opts));
  438. do_add(#reader{access=read},_,_,_,_) ->
  439. {error, eacces};
  440. do_add(Reader,_,_,_,_) ->
  441. {error, {badarg, Reader}}.
  442. add_opts([dereference|T], AllOptions, Opts) ->
  443. RF = fun(F) -> apply_file_info_opts(AllOptions, file:read_file_info(F, [{time, posix}])) end,
  444. add_opts(T, AllOptions, Opts#add_opts{read_info=RF});
  445. add_opts([verbose|T], AllOptions, Opts) ->
  446. add_opts(T, AllOptions, Opts#add_opts{verbose=true});
  447. add_opts([{chunks,N}|T], AllOptions, Opts) ->
  448. add_opts(T, AllOptions, Opts#add_opts{chunk_size=N});
  449. add_opts([{atime,Value}|T], AllOptions, Opts) ->
  450. add_opts(T, AllOptions, Opts#add_opts{atime=Value});
  451. add_opts([{mtime,Value}|T], AllOptions, Opts) ->
  452. add_opts(T, AllOptions, Opts#add_opts{mtime=Value});
  453. add_opts([{ctime,Value}|T], AllOptions, Opts) ->
  454. add_opts(T, AllOptions, Opts#add_opts{ctime=Value});
  455. add_opts([{uid,Value}|T], AllOptions, Opts) ->
  456. add_opts(T, AllOptions, Opts#add_opts{uid=Value});
  457. add_opts([{gid,Value}|T], AllOptions, Opts) ->
  458. add_opts(T, AllOptions, Opts#add_opts{gid=Value});
  459. add_opts([_|T], AllOptions, Opts) ->
  460. add_opts(T, AllOptions, Opts);
  461. add_opts([], _AllOptions, Opts) ->
  462. Opts.
  463. apply_file_info_opts(Opts, {ok, FileInfo}) ->
  464. {ok, do_apply_file_info_opts(Opts, FileInfo)};
  465. apply_file_info_opts(_Opts, Other) ->
  466. Other.
  467. do_apply_file_info_opts([{atime,Value}|T], FileInfo) ->
  468. do_apply_file_info_opts(T, FileInfo#file_info{atime=Value});
  469. do_apply_file_info_opts([{mtime,Value}|T], FileInfo) ->
  470. do_apply_file_info_opts(T, FileInfo#file_info{mtime=Value});
  471. do_apply_file_info_opts([{ctime,Value}|T], FileInfo) ->
  472. do_apply_file_info_opts(T, FileInfo#file_info{ctime=Value});
  473. do_apply_file_info_opts([{uid,Value}|T], FileInfo) ->
  474. do_apply_file_info_opts(T, FileInfo#file_info{uid=Value});
  475. do_apply_file_info_opts([{gid,Value}|T], FileInfo) ->
  476. do_apply_file_info_opts(T, FileInfo#file_info{gid=Value});
  477. do_apply_file_info_opts([_|T], FileInfo) ->
  478. do_apply_file_info_opts(T, FileInfo);
  479. do_apply_file_info_opts([], FileInfo) ->
  480. FileInfo.
  481. add1(#reader{}=Reader, Name, NameInArchive, undefined, #add_opts{read_info=ReadInfo}=Opts)
  482. when is_list(Name) ->
  483. Res = case ReadInfo(Name) of
  484. {error, Reason0} ->
  485. {error, {Name, Reason0}};
  486. {ok, #file_info{type=symlink}=Fi} ->
  487. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  488. {ok, Linkname} = file:read_link(Name),
  489. Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
  490. add_header(Reader, Header, Opts);
  491. {ok, #file_info{type=regular}=Fi} ->
  492. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  493. Header = fileinfo_to_header(NameInArchive, Fi, false),
  494. {ok, Reader2} = add_header(Reader, Header, Opts),
  495. FileSize = Header#tar_header.size,
  496. {ok, FileSize, Reader3} = do_copy(Reader2, Name, Opts),
  497. Padding = skip_padding(FileSize),
  498. Pad = <<0:Padding/unit:8>>,
  499. do_write(Reader3, Pad);
  500. {ok, #file_info{type=directory}=Fi} ->
  501. add_directory(Reader, Name, NameInArchive, Fi, Opts);
  502. {ok, #file_info{}=Fi} ->
  503. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  504. Header = fileinfo_to_header(NameInArchive, Fi, false),
  505. add_header(Reader, Header, Opts)
  506. end,
  507. case Res of
  508. ok -> ok;
  509. {ok, _Reader} -> ok;
  510. {error, _Reason} = Err -> Err
  511. end;
  512. add1(Reader, Bin, NameInArchive, Mode, Opts) when is_binary(Bin) ->
  513. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  514. Now = 0,
  515. Header = #tar_header{
  516. name = NameInArchive,
  517. size = byte_size(Bin),
  518. typeflag = ?TYPE_REGULAR,
  519. atime = add_opts_time(Opts#add_opts.atime, Now),
  520. mtime = add_opts_time(Opts#add_opts.mtime, Now),
  521. ctime = add_opts_time(Opts#add_opts.ctime, Now),
  522. uid = Opts#add_opts.uid,
  523. gid = Opts#add_opts.gid,
  524. mode = default_mode(Mode, 8#100644)},
  525. {ok, Reader2} = add_header(Reader, Header, Opts),
  526. Padding = skip_padding(byte_size(Bin)),
  527. Data = [Bin, <<0:Padding/unit:8>>],
  528. case do_write(Reader2, Data) of
  529. {ok, _Reader3} -> ok;
  530. {error, Reason} -> {error, {NameInArchive, Reason}}
  531. end.
  532. add_opts_time(undefined, _Now) -> 0;
  533. add_opts_time(Time, _Now) -> Time.
  534. default_mode(undefined, Mode) -> Mode;
  535. default_mode(Mode, _) -> Mode.
  536. add_directory(Reader, DirName, NameInArchive, Info, Opts) ->
  537. case file:list_dir(DirName) of
  538. {ok, []} ->
  539. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  540. Header = fileinfo_to_header(NameInArchive, Info, false),
  541. add_header(Reader, Header, Opts);
  542. {ok, Files} ->
  543. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  544. try add_files(Reader, Files, DirName, NameInArchive, Opts) of
  545. ok -> ok;
  546. {error, _} = Err -> Err
  547. catch
  548. throw:{error, {_Name, _Reason}} = Err -> Err;
  549. throw:{error, Reason} -> {error, {DirName, Reason}}
  550. end;
  551. {error, Reason} ->
  552. {error, {DirName, Reason}}
  553. end.
  554. add_files(_Reader, [], _Dir, _DirInArchive, _Opts) ->
  555. ok;
  556. add_files(Reader, [Name|Rest], Dir, DirInArchive, #add_opts{read_info=Info}=Opts) ->
  557. FullName = filename:join(Dir, Name),
  558. NameInArchive = filename:join(DirInArchive, Name),
  559. Res = case Info(FullName) of
  560. {error, Reason} ->
  561. {error, {FullName, Reason}};
  562. {ok, #file_info{type=directory}=Fi} ->
  563. add_directory(Reader, FullName, NameInArchive, Fi, Opts);
  564. {ok, #file_info{type=symlink}=Fi} ->
  565. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  566. {ok, Linkname} = file:read_link(FullName),
  567. Header = fileinfo_to_header(NameInArchive, Fi, Linkname),
  568. add_header(Reader, Header, Opts);
  569. {ok, #file_info{type=regular}=Fi} ->
  570. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  571. Header = fileinfo_to_header(NameInArchive, Fi, false),
  572. {ok, Reader2} = add_header(Reader, Header, Opts),
  573. FileSize = Header#tar_header.size,
  574. {ok, FileSize, Reader3} = do_copy(Reader2, FullName, Opts),
  575. Padding = skip_padding(FileSize),
  576. Pad = <<0:Padding/unit:8>>,
  577. do_write(Reader3, Pad);
  578. {ok, #file_info{}=Fi} ->
  579. add_verbose(Opts, "a ~ts~n", [NameInArchive]),
  580. Header = fileinfo_to_header(NameInArchive, Fi, false),
  581. add_header(Reader, Header, Opts)
  582. end,
  583. case Res of
  584. ok -> add_files(Reader, Rest, Dir, DirInArchive, Opts);
  585. {ok, ReaderNext} -> add_files(ReaderNext, Rest, Dir, DirInArchive, Opts);
  586. {error, _} = Err -> Err
  587. end.
  588. format_string(String, Size) when length(String) > Size ->
  589. throw({error, {write_string, field_too_long}});
  590. format_string(String, Size) ->
  591. Ascii = to_ascii(String),
  592. if byte_size(Ascii) < Size ->
  593. [Ascii, 0];
  594. true ->
  595. Ascii
  596. end.
  597. format_octal(Octal) ->
  598. iolist_to_binary(io_lib:fwrite("~.8B", [Octal])).
  599. add_header(#reader{}=Reader, #tar_header{}=Header, Opts) ->
  600. {ok, Iodata} = build_header(Header, Opts),
  601. do_write(Reader, Iodata).
  602. write_to_block(Block, IoData, Start) when is_list(IoData) ->
  603. write_to_block(Block, iolist_to_binary(IoData), Start);
  604. write_to_block(Block, Bin, Start) when is_binary(Bin) ->
  605. Size = byte_size(Bin),
  606. <<Head:Start/unit:8, _:Size/unit:8, Rest/binary>> = Block,
  607. <<Head:Start/unit:8, Bin/binary, Rest/binary>>.
  608. build_header(#tar_header{}=Header, Opts) ->
  609. #tar_header{
  610. name=Name,
  611. mode=Mode,
  612. uid=Uid,
  613. gid=Gid,
  614. size=Size,
  615. typeflag=Type,
  616. linkname=Linkname,
  617. uname=Uname,
  618. gname=Gname,
  619. devmajor=Devmaj,
  620. devminor=Devmin
  621. } = Header,
  622. Mtime = Header#tar_header.mtime,
  623. Block0 = ?ZERO_BLOCK,
  624. {Block1, Pax0} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN, Name, ?PAX_PATH, #{}),
  625. Block2 = write_octal(Block1, ?V7_MODE, ?V7_MODE_LEN, Mode),
  626. {Block3, Pax1} = write_numeric(Block2, ?V7_UID, ?V7_UID_LEN, Uid, ?PAX_UID, Pax0),
  627. {Block4, Pax2} = write_numeric(Block3, ?V7_GID, ?V7_GID_LEN, Gid, ?PAX_GID, Pax1),
  628. {Block5, Pax3} = write_numeric(Block4, ?V7_SIZE, ?V7_SIZE_LEN, Size, ?PAX_SIZE, Pax2),
  629. {Block6, Pax4} = write_numeric(Block5, ?V7_MTIME, ?V7_MTIME_LEN, Mtime, ?PAX_NONE, Pax3),
  630. {Block7, Pax5} = write_string(Block6, ?V7_TYPE, ?V7_TYPE_LEN, <<Type>>, ?PAX_NONE, Pax4),
  631. {Block8, Pax6} = write_string(Block7, ?V7_LINKNAME, ?V7_LINKNAME_LEN,
  632. Linkname, ?PAX_LINKPATH, Pax5),
  633. {Block9, Pax7} = write_string(Block8, ?USTAR_UNAME, ?USTAR_UNAME_LEN,
  634. Uname, ?PAX_UNAME, Pax6),
  635. {Block10, Pax8} = write_string(Block9, ?USTAR_GNAME, ?USTAR_GNAME_LEN,
  636. Gname, ?PAX_GNAME, Pax7),
  637. {Block11, Pax9} = write_numeric(Block10, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN,
  638. Devmaj, ?PAX_NONE, Pax8),
  639. {Block12, Pax10} = write_numeric(Block11, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN,
  640. Devmin, ?PAX_NONE, Pax9),
  641. {Block13, Pax11} = set_path(Block12, Pax10),
  642. PaxEntry = case maps:size(Pax11) of
  643. 0 -> [];
  644. _ -> build_pax_entry(Header, Pax11, Opts)
  645. end,
  646. Block14 = set_format(Block13, ?FORMAT_USTAR),
  647. Block15 = set_checksum(Block14),
  648. {ok, [PaxEntry, Block15]}.
  649. set_path(Block0, Pax) ->
  650. %% only use ustar header when name is too long
  651. case maps:get(?PAX_PATH, Pax, nil) of
  652. nil ->
  653. {Block0, Pax};
  654. PaxPath ->
  655. case split_ustar_path(PaxPath) of
  656. {ok, UstarName, UstarPrefix} ->
  657. {Block1, _} = write_string(Block0, ?V7_NAME, ?V7_NAME_LEN,
  658. UstarName, ?PAX_NONE, #{}),
  659. {Block2, _} = write_string(Block1, ?USTAR_PREFIX, ?USTAR_PREFIX_LEN,
  660. UstarPrefix, ?PAX_NONE, #{}),
  661. {Block2, maps:remove(?PAX_PATH, Pax)};
  662. false ->
  663. {Block0, Pax}
  664. end
  665. end.
  666. set_format(Block0, Format)
  667. when Format =:= ?FORMAT_USTAR; Format =:= ?FORMAT_PAX ->
  668. Block1 = write_to_block(Block0, ?MAGIC_USTAR, ?USTAR_MAGIC),
  669. write_to_block(Block1, ?VERSION_USTAR, ?USTAR_VERSION);
  670. set_format(_Block, Format) ->
  671. throw({error, {invalid_format, Format}}).
  672. set_checksum(Block) ->
  673. Checksum = compute_checksum(Block),
  674. write_octal(Block, ?V7_CHKSUM, ?V7_CHKSUM_LEN, Checksum).
  675. build_pax_entry(Header, PaxAttrs, Opts) ->
  676. Path = Header#tar_header.name,
  677. Filename = filename:basename(Path),
  678. Dir = filename:dirname(Path),
  679. Path2 = filename:join([Dir, "PaxHeaders.0", Filename]),
  680. AsciiPath = to_ascii(Path2),
  681. Path3 = if byte_size(AsciiPath) > ?V7_NAME_LEN ->
  682. binary_part(AsciiPath, 0, ?V7_NAME_LEN - 1);
  683. true ->
  684. AsciiPath
  685. end,
  686. Keys = maps:keys(PaxAttrs),
  687. SortedKeys = lists:sort(Keys),
  688. PaxFile = build_pax_file(SortedKeys, PaxAttrs),
  689. Size = byte_size(PaxFile),
  690. Padding = (?BLOCK_SIZE -
  691. (byte_size(PaxFile) rem ?BLOCK_SIZE)) rem ?BLOCK_SIZE,
  692. Pad = <<0:Padding/unit:8>>,
  693. PaxHeader = #tar_header{
  694. name=unicode:characters_to_list(Path3),
  695. size=Size,
  696. mtime=Header#tar_header.mtime,
  697. atime=Header#tar_header.atime,
  698. ctime=Header#tar_header.ctime,
  699. typeflag=?TYPE_X_HEADER
  700. },
  701. {ok, PaxHeaderData} = build_header(PaxHeader, Opts),
  702. [PaxHeaderData, PaxFile, Pad].
  703. build_pax_file(Keys, PaxAttrs) ->
  704. build_pax_file(Keys, PaxAttrs, []).
  705. build_pax_file([], _, Acc) ->
  706. unicode:characters_to_binary(Acc);
  707. build_pax_file([K|Rest], Attrs, Acc) ->
  708. V = maps:get(K, Attrs),
  709. Size = sizeof(K) + sizeof(V) + 3,
  710. Size2 = sizeof(Size) + Size,
  711. Key = to_string(K),
  712. Value = to_string(V),
  713. Record = unicode:characters_to_binary(io_lib:format("~B ~ts=~ts\n", [Size2, Key, Value])),
  714. if byte_size(Record) =/= Size2 ->
  715. Size3 = byte_size(Record),
  716. Record2 = io_lib:format("~B ~ts=~ts\n", [Size3, Key, Value]),
  717. build_pax_file(Rest, Attrs, [Acc, Record2]);
  718. true ->
  719. build_pax_file(Rest, Attrs, [Acc, Record])
  720. end.
  721. sizeof(Bin) when is_binary(Bin) ->
  722. byte_size(Bin);
  723. sizeof(List) when is_list(List) ->
  724. length(List);
  725. sizeof(N) when is_integer(N) ->
  726. byte_size(integer_to_binary(N));
  727. sizeof(N) when is_float(N) ->
  728. byte_size(float_to_binary(N)).
  729. to_string(Bin) when is_binary(Bin) ->
  730. unicode:characters_to_list(Bin);
  731. to_string(List) when is_list(List) ->
  732. List;
  733. to_string(N) when is_integer(N) ->
  734. integer_to_list(N);
  735. to_string(N) when is_float(N) ->
  736. float_to_list(N).
  737. split_ustar_path(Path) ->
  738. Len = length(Path),
  739. NotAscii = not is_ascii(Path),
  740. if Len =< ?V7_NAME_LEN; NotAscii ->
  741. false;
  742. true ->
  743. PathBin = binary:list_to_bin(Path),
  744. case binary:split(PathBin, [<<$/>>], [global, trim_all]) of
  745. [Part] when byte_size(Part) >= ?V7_NAME_LEN ->
  746. false;
  747. Parts ->
  748. case lists:last(Parts) of
  749. Name when byte_size(Name) >= ?V7_NAME_LEN ->
  750. false;
  751. Name ->
  752. Parts2 = lists:sublist(Parts, length(Parts) - 1),
  753. join_split_ustar_path(Parts2, {ok, Name, nil})
  754. end
  755. end
  756. end.
  757. join_split_ustar_path([], Acc) ->
  758. Acc;
  759. join_split_ustar_path([Part|_], {ok, _, nil})
  760. when byte_size(Part) > ?USTAR_PREFIX_LEN ->
  761. false;
  762. join_split_ustar_path([Part|_], {ok, _Name, Acc})
  763. when (byte_size(Part)+byte_size(Acc)) > ?USTAR_PREFIX_LEN ->
  764. false;
  765. join_split_ustar_path([Part|Rest], {ok, Name, nil}) ->
  766. join_split_ustar_path(Rest, {ok, Name, Part});
  767. join_split_ustar_path([Part|Rest], {ok, Name, Acc}) ->
  768. join_split_ustar_path(Rest, {ok, Name, <<Acc/binary,$/,Part/binary>>}).
  769. write_octal(Block, Pos, Size, X) ->
  770. Octal = zero_pad(format_octal(X), Size-1),
  771. if byte_size(Octal) < Size ->
  772. write_to_block(Block, Octal, Pos);
  773. true ->
  774. throw({error, {write_failed, octal_field_too_long}})
  775. end.
  776. write_string(Block, Pos, Size, Str, PaxAttr, Pax0) ->
  777. NotAscii = not is_ascii(Str),
  778. if PaxAttr =/= ?PAX_NONE andalso (length(Str) > Size orelse NotAscii) ->
  779. Pax1 = maps:put(PaxAttr, Str, Pax0),
  780. {Block, Pax1};
  781. true ->
  782. Formatted = format_string(Str, Size),
  783. {write_to_block(Block, Formatted, Pos), Pax0}
  784. end.
  785. write_numeric(Block, Pos, Size, X, PaxAttr, Pax0) ->
  786. %% attempt octal
  787. Octal = zero_pad(format_octal(X), Size-1),
  788. if byte_size(Octal) < Size ->
  789. {write_to_block(Block, [Octal, 0], Pos), Pax0};
  790. PaxAttr =/= ?PAX_NONE ->
  791. Pax1 = maps:put(PaxAttr, X, Pax0),
  792. {Block, Pax1};
  793. true ->
  794. throw({error, {write_failed, numeric_field_too_long}})
  795. end.
  796. zero_pad(Str, Size) when byte_size(Str) >= Size ->
  797. Str;
  798. zero_pad(Str, Size) ->
  799. Padding = Size - byte_size(Str),
  800. Pad = binary:copy(<<$0>>, Padding),
  801. <<Pad/binary, Str/binary>>.
  802. %%%================================================================
  803. %% Functions for creating or modifying tar archives
  804. read_block(Reader) ->
  805. case do_read(Reader, ?BLOCK_SIZE) of
  806. eof ->
  807. throw({error, eof});
  808. %% Two zero blocks mark the end of the archive
  809. {ok, ?ZERO_BLOCK, Reader1} ->
  810. case do_read(Reader1, ?BLOCK_SIZE) of
  811. eof ->
  812. % This is technically a malformed end-of-archive marker,
  813. % as two ZERO_BLOCKs are expected as the marker,
  814. % but if we've already made it this far, we should just ignore it
  815. eof;
  816. {ok, ?ZERO_BLOCK, _Reader2} ->
  817. eof;
  818. {ok, _Block, _Reader2} ->
  819. throw({error, invalid_end_of_archive});
  820. {error,_} = Err ->
  821. throw(Err)
  822. end;
  823. {ok, Block, Reader1} when is_binary(Block) ->
  824. {ok, Block, Reader1};
  825. {error, _} = Err ->
  826. throw(Err)
  827. end.
  828. get_header(#reader{}=Reader) ->
  829. case read_block(Reader) of
  830. eof ->
  831. eof;
  832. {ok, Block, Reader1} ->
  833. convert_header(Block, Reader1)
  834. end.
  835. %% Converts the tar header to a record.
  836. to_v7(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
  837. #header_v7{
  838. name=binary_part(Bin, ?V7_NAME, ?V7_NAME_LEN),
  839. mode=binary_part(Bin, ?V7_MODE, ?V7_MODE_LEN),
  840. uid=binary_part(Bin, ?V7_UID, ?V7_UID_LEN),
  841. gid=binary_part(Bin, ?V7_GID, ?V7_GID_LEN),
  842. size=binary_part(Bin, ?V7_SIZE, ?V7_SIZE_LEN),
  843. mtime=binary_part(Bin, ?V7_MTIME, ?V7_MTIME_LEN),
  844. checksum=binary_part(Bin, ?V7_CHKSUM, ?V7_CHKSUM_LEN),
  845. typeflag=binary:at(Bin, ?V7_TYPE),
  846. linkname=binary_part(Bin, ?V7_LINKNAME, ?V7_LINKNAME_LEN)
  847. };
  848. to_v7(_) ->
  849. {error, header_block_too_small}.
  850. to_gnu(#header_v7{}=V7, Bin)
  851. when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
  852. #header_gnu{
  853. header_v7=V7,
  854. magic=binary_part(Bin, ?GNU_MAGIC, ?GNU_MAGIC_LEN),
  855. version=binary_part(Bin, ?GNU_VERSION, ?GNU_VERSION_LEN),
  856. uname=binary_part(Bin, 265, 32),
  857. gname=binary_part(Bin, 297, 32),
  858. devmajor=binary_part(Bin, 329, 8),
  859. devminor=binary_part(Bin, 337, 8),
  860. atime=binary_part(Bin, 345, 12),
  861. ctime=binary_part(Bin, 357, 12),
  862. sparse=to_sparse_array(binary_part(Bin, 386, 24*4+1)),
  863. real_size=binary_part(Bin, 483, 12)
  864. }.
  865. to_star(#header_v7{}=V7, Bin)
  866. when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
  867. #header_star{
  868. header_v7=V7,
  869. magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
  870. version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
  871. uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
  872. gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
  873. devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
  874. devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
  875. prefix=binary_part(Bin, 345, 131),
  876. atime=binary_part(Bin, 476, 12),
  877. ctime=binary_part(Bin, 488, 12),
  878. trailer=binary_part(Bin, ?STAR_TRAILER, ?STAR_TRAILER_LEN)
  879. }.
  880. to_ustar(#header_v7{}=V7, Bin)
  881. when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
  882. #header_ustar{
  883. header_v7=V7,
  884. magic=binary_part(Bin, ?USTAR_MAGIC, ?USTAR_MAGIC_LEN),
  885. version=binary_part(Bin, ?USTAR_VERSION, ?USTAR_VERSION_LEN),
  886. uname=binary_part(Bin, ?USTAR_UNAME, ?USTAR_UNAME_LEN),
  887. gname=binary_part(Bin, ?USTAR_GNAME, ?USTAR_GNAME_LEN),
  888. devmajor=binary_part(Bin, ?USTAR_DEVMAJ, ?USTAR_DEVMAJ_LEN),
  889. devminor=binary_part(Bin, ?USTAR_DEVMIN, ?USTAR_DEVMIN_LEN),
  890. prefix=binary_part(Bin, 345, 155)
  891. }.
  892. to_sparse_array(Bin) when is_binary(Bin) ->
  893. MaxEntries = byte_size(Bin) div 24,
  894. IsExtended = 1 =:= binary:at(Bin, 24*MaxEntries),
  895. Entries = parse_sparse_entries(Bin, MaxEntries-1, []),
  896. #sparse_array{
  897. entries=Entries,
  898. max_entries=MaxEntries,
  899. is_extended=IsExtended
  900. }.
  901. parse_sparse_entries(<<>>, _, Acc) ->
  902. Acc;
  903. parse_sparse_entries(_, -1, Acc) ->
  904. Acc;
  905. parse_sparse_entries(Bin, N, Acc) ->
  906. case to_sparse_entry(binary_part(Bin, N*24, 24)) of
  907. nil ->
  908. parse_sparse_entries(Bin, N-1, Acc);
  909. Entry = #sparse_entry{} ->
  910. parse_sparse_entries(Bin, N-1, [Entry|Acc])
  911. end.
  912. -define(EMPTY_ENTRY, <<0,0,0,0,0,0,0,0,0,0,0,0>>).
  913. to_sparse_entry(Bin) when is_binary(Bin), byte_size(Bin) =:= 24 ->
  914. OffsetBin = binary_part(Bin, 0, 12),
  915. NumBytesBin = binary_part(Bin, 12, 12),
  916. case {OffsetBin, NumBytesBin} of
  917. {?EMPTY_ENTRY, ?EMPTY_ENTRY} ->
  918. nil;
  919. _ ->
  920. #sparse_entry{
  921. offset=parse_numeric(OffsetBin),
  922. num_bytes=parse_numeric(NumBytesBin)}
  923. end.
  924. -spec get_format(binary()) -> {ok, pos_integer(), header_v7()}
  925. | ?FORMAT_UNKNOWN
  926. | {error, term()}.
  927. get_format(Bin) when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
  928. do_get_format(to_v7(Bin), Bin).
  929. do_get_format({error, _} = Err, _Bin) ->
  930. Err;
  931. do_get_format(#header_v7{}=V7, Bin)
  932. when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
  933. Checksum = parse_octal(V7#header_v7.checksum),
  934. Chk1 = compute_checksum(Bin),
  935. Chk2 = compute_signed_checksum(Bin),
  936. if Checksum =/= Chk1 andalso Checksum =/= Chk2 ->
  937. ?FORMAT_UNKNOWN;
  938. true ->
  939. %% guess magic
  940. Ustar = to_ustar(V7, Bin),
  941. Star = to_star(V7, Bin),
  942. Magic = Ustar#header_ustar.magic,
  943. Version = Ustar#header_ustar.version,
  944. Trailer = Star#header_star.trailer,
  945. Format = if
  946. Magic =:= ?MAGIC_USTAR, Trailer =:= ?TRAILER_STAR ->
  947. ?FORMAT_STAR;
  948. Magic =:= ?MAGIC_USTAR ->
  949. ?FORMAT_USTAR;
  950. Magic =:= ?MAGIC_GNU, Version =:= ?VERSION_GNU ->
  951. ?FORMAT_GNU;
  952. true ->
  953. ?FORMAT_V7
  954. end,
  955. {ok, Format, V7}
  956. end.
  957. unpack_format(Format, #header_v7{}=V7, Bin, Reader)
  958. when is_binary(Bin), byte_size(Bin) =:= ?BLOCK_SIZE ->
  959. Mtime = parse_numeric(V7#header_v7.mtime),
  960. Header0 = #tar_header{
  961. name=parse_string(V7#header_v7.name),
  962. mode=parse_numeric(V7#header_v7.mode),
  963. uid=parse_numeric(V7#header_v7.uid),
  964. gid=parse_numeric(V7#header_v7.gid),
  965. size=parse_numeric(V7#header_v7.size),
  966. mtime=Mtime,
  967. atime=Mtime,
  968. ctime=Mtime,
  969. typeflag=V7#header_v7.typeflag,
  970. linkname=parse_string(V7#header_v7.linkname)
  971. },
  972. Typeflag = Header0#tar_header.typeflag,
  973. Header1 = if Format > ?FORMAT_V7 ->
  974. unpack_modern(Format, V7, Bin, Header0);
  975. true ->
  976. Name = Header0#tar_header.name,
  977. Header0#tar_header{name=safe_join_path("", Name)}
  978. end,
  979. HeaderOnly = is_header_only_type(Typeflag),
  980. Header2 = if HeaderOnly ->
  981. Header1#tar_header{size=0};
  982. true ->
  983. Header1
  984. end,
  985. if Typeflag =:= ?TYPE_GNU_SPARSE ->
  986. Gnu = to_gnu(V7, Bin),
  987. RealSize = parse_numeric(Gnu#header_gnu.real_size),
  988. {Sparsemap, Reader2} = parse_sparse_map(Gnu, Reader),
  989. Header3 = Header2#tar_header{size=RealSize},
  990. {Header3, new_sparse_file_reader(Reader2, Sparsemap, RealSize)};
  991. true ->
  992. FileReader = #reg_file_reader{
  993. handle=Reader,
  994. num_bytes=Header2#tar_header.size,
  995. size=Header2#tar_header.size,
  996. pos = 0
  997. },
  998. {Header2, FileReader}
  999. end.
  1000. unpack_modern(Format, #header_v7{}=V7, Bin, #tar_header{}=Header0)
  1001. when is_binary(Bin) ->
  1002. Typeflag = Header0#tar_header.typeflag,
  1003. Ustar = to_ustar(V7, Bin),
  1004. H0 = Header0#tar_header{
  1005. uname=parse_string(Ustar#header_ustar.uname),
  1006. gname=parse_string(Ustar#header_ustar.gname)},
  1007. H1 = if Typeflag =:= ?TYPE_CHAR
  1008. orelse Typeflag =:= ?TYPE_BLOCK ->
  1009. Ma = parse_numeric(Ustar#header_ustar.devmajor),
  1010. Mi = parse_numeric(Ustar#header_ustar.devminor),
  1011. H0#tar_header{
  1012. devmajor=Ma,
  1013. devminor=Mi
  1014. };
  1015. true ->
  1016. H0
  1017. end,
  1018. {Prefix, H2} = case Format of
  1019. ?FORMAT_USTAR ->
  1020. {parse_string(Ustar#header_ustar.prefix), H1};
  1021. ?FORMAT_STAR ->
  1022. Star = to_star(V7, Bin),
  1023. Prefix0 = parse_string(Star#header_star.prefix),
  1024. Atime0 = Star#header_star.atime,
  1025. Atime = parse_numeric(Atime0),
  1026. Ctime0 = Star#header_star.ctime,
  1027. Ctime = parse_numeric(Ctime0),
  1028. {Prefix0, H1#tar_header{
  1029. atime=Atime,
  1030. ctime=Ctime
  1031. }};
  1032. _ ->
  1033. {"", H1}
  1034. end,
  1035. Name = H2#tar_header.name,
  1036. H2#tar_header{name=safe_join_path(Prefix, Name)}.
  1037. safe_join_path([], Name) ->
  1038. filename:join([Name]);
  1039. safe_join_path(Prefix, []) ->
  1040. filename:join([Prefix]);
  1041. safe_join_path(Prefix, Name) ->
  1042. filename:join(Prefix, Name).
  1043. new_sparse_file_reader(Reader, Sparsemap, RealSize) ->
  1044. true = validate_sparse_entries(Sparsemap, RealSize),
  1045. #sparse_file_reader{
  1046. handle = Reader,
  1047. num_bytes = RealSize,
  1048. pos = 0,
  1049. size = RealSize,
  1050. sparse_map = Sparsemap}.
  1051. validate_sparse_entries(Entries, RealSize) ->
  1052. validate_sparse_entries(Entries, RealSize, 0, 0).
  1053. validate_sparse_entries([], _RealSize, _I, _LastOffset) ->
  1054. true;
  1055. validate_sparse_entries([#sparse_entry{}=Entry|Rest], RealSize, I, LastOffset) ->
  1056. Offset = Entry#sparse_entry.offset,
  1057. NumBytes = Entry#sparse_entry.num_bytes,
  1058. if
  1059. Offset > ?MAX_INT64-NumBytes ->
  1060. throw({error, {invalid_sparse_map_entry, offset_too_large}});
  1061. Offset+NumBytes > RealSize ->
  1062. throw({error, {invalid_sparse_map_entry, offset_too_large}});
  1063. I > 0 andalso LastOffset > Offset ->
  1064. throw({error, {invalid_sparse_map_entry, overlapping_offsets}});
  1065. true ->
  1066. ok
  1067. end,
  1068. validate_sparse_entries(Rest, RealSize, I+1, Offset+NumBytes).
  1069. -spec parse_sparse_map(header_gnu(), reader_type()) ->
  1070. {[sparse_entry()], reader_type()}.
  1071. parse_sparse_map(#header_gnu{sparse=Sparse}, Reader)
  1072. when Sparse#sparse_array.is_extended ->
  1073. parse_sparse_map(Sparse, Reader, []);
  1074. parse_sparse_map(#header_gnu{sparse=Sparse}, Reader) ->
  1075. {Sparse#sparse_array.entries, Reader}.
  1076. parse_sparse_map(#sparse_array{is_extended=true,entries=Entries}, Reader, Acc) ->
  1077. case read_block(Reader) of
  1078. eof ->
  1079. throw({error, eof});
  1080. {ok, Block, Reader2} ->
  1081. Sparse2 = to_sparse_array(Block),
  1082. parse_sparse_map(Sparse2, Reader2, Entries++Acc)
  1083. end;
  1084. parse_sparse_map(#sparse_array{entries=Entries}, Reader, Acc) ->
  1085. Sorted = lists:sort(fun (#sparse_entry{offset=A},#sparse_entry{offset=B}) ->
  1086. A =< B
  1087. end, Entries++Acc),
  1088. {Sorted, Reader}.
  1089. %% Defined by taking the sum of the unsigned byte values of the
  1090. %% entire header record, treating the checksum bytes to as ASCII spaces
  1091. compute_checksum(<<H1:?V7_CHKSUM/binary,
  1092. H2:?V7_CHKSUM_LEN/binary,
  1093. Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
  1094. _/binary>>) ->
  1095. C0 = checksum(H1) + (byte_size(H2) * $\s),
  1096. C1 = checksum(Rest),
  1097. C0 + C1.
  1098. compute_signed_checksum(<<H1:?V7_CHKSUM/binary,
  1099. H2:?V7_CHKSUM_LEN/binary,
  1100. Rest:(?BLOCK_SIZE - ?V7_CHKSUM - ?V7_CHKSUM_LEN)/binary,
  1101. _/binary>>) ->
  1102. C0 = signed_checksum(H1) + (byte_size(H2) * $\s),
  1103. C1 = signed_checksum(Rest),
  1104. C0 + C1.
  1105. %% Returns the checksum of a binary.
  1106. checksum(Bin) -> checksum(Bin, 0).
  1107. checksum(<<A/unsigned,Rest/binary>>, Sum) ->
  1108. checksum(Rest, Sum+A);
  1109. checksum(<<>>, Sum) -> Sum.
  1110. signed_checksum(Bin) -> signed_checksum(Bin, 0).
  1111. signed_checksum(<<A/signed,Rest/binary>>, Sum) ->
  1112. signed_checksum(Rest, Sum+A);
  1113. signed_checksum(<<>>, Sum) -> Sum.
  1114. -spec parse_numeric(binary()) -> non_neg_integer().
  1115. parse_numeric(<<>>) ->
  1116. 0;
  1117. parse_numeric(<<First, _/binary>> = Bin) ->
  1118. %% check for base-256 format first
  1119. %% if the bit is set, then all following bits constitute a two's
  1120. %% complement encoded number in big-endian byte order
  1121. if
  1122. First band 16#80 =/= 0 ->
  1123. %% Handling negative numbers relies on the following identity:
  1124. %% -a-1 == ^a
  1125. %% If the number is negative, we use an inversion mask to invert
  1126. %% the data bytes and treat the value as an unsigned number
  1127. Inv = if First band 16#40 =/= 0 -> 16#00; true -> 16#FF end,
  1128. Bytes = binary:bin_to_list(Bin),
  1129. Reducer = fun (C, {I, X}) ->
  1130. C1 = C bxor Inv,
  1131. C2 = if I =:= 0 -> C1 band 16#7F; true -> C1 end,
  1132. if (X bsr 56) > 0 ->
  1133. throw({error,integer_overflow});
  1134. true ->
  1135. {I+1, (X bsl 8) bor C2}
  1136. end
  1137. end,
  1138. {_, N} = lists:foldl(Reducer, {0,0}, Bytes),
  1139. if (N bsr 63) > 0 ->
  1140. throw({error, integer_overflow});
  1141. true ->
  1142. if Inv =:= 16#FF ->
  1143. -1 bxor N;
  1144. true ->
  1145. N
  1146. end
  1147. end;
  1148. true ->
  1149. %% normal case is an octal number
  1150. parse_octal(Bin)
  1151. end.
  1152. parse_octal(Bin) when is_binary(Bin) ->
  1153. %% skip leading/trailing zero bytes and spaces
  1154. do_parse_octal(Bin, <<>>).
  1155. do_parse_octal(<<>>, <<>>) ->
  1156. 0;
  1157. do_parse_octal(<<>>, Acc) ->
  1158. case io_lib:fread("~8u", binary:bin_to_list(Acc)) of
  1159. {error, _} -> throw({error, invalid_tar_checksum});
  1160. {ok, [Octal], []} -> Octal;
  1161. {ok, _, _} -> throw({error, invalid_tar_checksum})
  1162. end;
  1163. do_parse_octal(<<$\s,Rest/binary>>, Acc) ->
  1164. do_parse_octal(Rest, Acc);
  1165. do_parse_octal(<<0, Rest/binary>>, Acc) ->
  1166. do_parse_octal(Rest, Acc);
  1167. do_parse_octal(<<C, Rest/binary>>, Acc) ->
  1168. do_parse_octal(Rest, <<Acc/binary, C>>).
  1169. parse_string(Bin) when is_binary(Bin) ->
  1170. do_parse_string(Bin, <<>>).
  1171. do_parse_string(<<>>, Acc) ->
  1172. case unicode:characters_to_list(Acc) of
  1173. Str when is_list(Str) ->
  1174. Str;
  1175. {incomplete, _Str, _Rest} ->
  1176. binary:bin_to_list(Acc);
  1177. {error, _Str, _Rest} ->
  1178. throw({error, {bad_header, invalid_string}})
  1179. end;
  1180. do_parse_string(<<0, _/binary>>, Acc) ->
  1181. do_parse_string(<<>>, Acc);
  1182. do_parse_string(<<C, Rest/binary>>, Acc) ->
  1183. do_parse_string(Rest, <<Acc/binary, C>>).
  1184. convert_header(Bin, #reader{pos=Pos}=Reader)
  1185. when byte_size(Bin) =:= ?BLOCK_SIZE, (Pos rem ?BLOCK_SIZE) =:= 0 ->
  1186. case get_format(Bin) of
  1187. ?FORMAT_UNKNOWN ->
  1188. throw({error, bad_header});
  1189. {ok, Format, V7} ->
  1190. unpack_format(Format, V7, Bin, Reader);
  1191. {error, Reason} ->
  1192. throw({error, {bad_header, Reason}})
  1193. end;
  1194. convert_header(Bin, #reader{pos=Pos}) when byte_size(Bin) =:= ?BLOCK_SIZE ->
  1195. throw({error, misaligned_read, Pos});
  1196. convert_header(Bin, _Reader) when byte_size(Bin) =:= 0 ->
  1197. eof;
  1198. convert_header(_Bin, _Reader) ->
  1199. throw({error, eof}).
  1200. %% Creates a partially-populated header record based
  1201. %% on the provided file_info record. If the file is
  1202. %% a symlink, then `link` is used as the link target.
  1203. %% If the file is a directory, a slash is appended to the name.
  1204. fileinfo_to_header(Name, #file_info{}=Fi, Link) when is_list(Name) ->
  1205. BaseHeader = #tar_header{name=Name,
  1206. mtime=0,
  1207. atime=0,
  1208. ctime=0,
  1209. mode=Fi#file_info.mode,
  1210. typeflag=?TYPE_REGULAR},
  1211. do_fileinfo_to_header(BaseHeader, Fi, Link).
  1212. do_fileinfo_to_header(Header, #file_info{size=Size,type=regular}, _Link) ->
  1213. Header#tar_header{size=Size,typeflag=?TYPE_REGULAR};
  1214. do_fileinfo_to_header(#tar_header{name=Name}=Header,
  1215. #file_info{type=directory}, _Link) ->
  1216. Header#tar_header{name=Name++"/",typeflag=?TYPE_DIR};
  1217. do_fileinfo_to_header(Header, #file_info{type=symlink}, Link) ->
  1218. Header#tar_header{typeflag=?TYPE_SYMLINK,linkname=Link};
  1219. do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
  1220. when (Mode band ?S_IFMT) =:= ?S_IFCHR ->
  1221. Header#tar_header{typeflag=?TYPE_CHAR,
  1222. devmajor=Fi#file_info.major_device,
  1223. devminor=Fi#file_info.minor_device};
  1224. do_fileinfo_to_header(Header, #file_info{type=device,mode=Mode}=Fi, _Link)
  1225. when (Mode band ?S_IFMT) =:= ?S_IFBLK ->
  1226. Header#tar_header{typeflag=?TYPE_BLOCK,
  1227. devmajor=Fi#file_info.major_device,
  1228. devminor=Fi#file_info.minor_device};
  1229. do_fileinfo_to_header(Header, #file_info{type=other,mode=Mode}, _Link)
  1230. when (Mode band ?S_IFMT) =:= ?S_FIFO ->
  1231. Header#tar_header{typeflag=?TYPE_FIFO};
  1232. do_fileinfo_to_header(Header, Fi, _Link) ->
  1233. {error, {invalid_file_type, Header#tar_header.name, Fi}}.
  1234. is_ascii(Str) when is_list(Str) ->
  1235. not lists:any(fun (Char) -> Char >= 16#80 end, Str);
  1236. is_ascii(Bin) when is_binary(Bin) ->
  1237. is_ascii1(Bin).
  1238. is_ascii1(<<>>) ->
  1239. true;
  1240. is_ascii1(<<C,_Rest/binary>>) when C >= 16#80 ->
  1241. false;
  1242. is_ascii1(<<_, Rest/binary>>) ->
  1243. is_ascii1(Rest).
  1244. to_ascii(Str) when is_list(Str) ->
  1245. case is_ascii(Str) of
  1246. true ->
  1247. unicode:characters_to_binary(Str);
  1248. false ->
  1249. Chars = lists:filter(fun (Char) -> Char < 16#80 end, Str),
  1250. unicode:characters_to_binary(Chars)
  1251. end;
  1252. to_ascii(Bin) when is_binary(Bin) ->
  1253. to_ascii(Bin, <<>>).
  1254. to_ascii(<<>>, Acc) ->
  1255. Acc;
  1256. to_ascii(<<C, Rest/binary>>, Acc) when C < 16#80 ->
  1257. to_ascii(Rest, <<Acc/binary,C>>);
  1258. to_ascii(<<_, Rest/binary>>, Acc) ->
  1259. to_ascii(Rest, Acc).
  1260. is_header_only_type(?TYPE_SYMLINK) -> true;
  1261. is_header_only_type(?TYPE_LINK) -> true;
  1262. is_header_only_type(?TYPE_DIR) -> true;
  1263. is_header_only_type(_) -> false.
  1264. foldl_read(#reader{access=read}=Reader, Fun, Accu, #read_opts{}=Opts)
  1265. when is_function(Fun,4) ->
  1266. case foldl_read0(Reader, Fun, Accu, Opts) of
  1267. {ok, Result, _Reader2} ->
  1268. Result;
  1269. {error, _} = Err ->
  1270. Err
  1271. end;
  1272. foldl_read(#reader{access=Access}, _Fun, _Accu, _Opts) ->
  1273. {error, {read_mode_expected, Access}};
  1274. foldl_read(TarName, Fun, Accu, #read_opts{}=Opts)
  1275. when is_function(Fun,4) ->
  1276. try open(TarName, [read|Opts#read_opts.open_mode]) of
  1277. {ok, #reader{access=read}=Reader} ->
  1278. try
  1279. foldl_read(Reader, Fun, Accu, Opts)
  1280. after
  1281. _ = close(Reader)
  1282. end;
  1283. {error, _} = Err ->
  1284. Err
  1285. catch
  1286. throw:Err ->
  1287. Err
  1288. end.
  1289. foldl_read0(Reader, Fun, Accu, Opts) ->
  1290. try foldl_read1(Fun, Accu, Reader, Opts, #{}) of
  1291. {ok,_,_} = Ok ->
  1292. Ok
  1293. catch
  1294. throw:{error, {Reason, Format, Args}} ->
  1295. read_verbose(Opts, Format, Args),
  1296. {error, Reason};
  1297. throw:Err ->
  1298. Err
  1299. end.
  1300. foldl_read1(Fun, Accu0, Reader0, Opts, ExtraHeaders) ->
  1301. {ok, Reader1} = skip_unread(Reader0),
  1302. case get_header(Reader1) of
  1303. eof ->
  1304. Fun(eof, Reader1, Opts, Accu0);
  1305. {Header, Reader2} ->
  1306. case Header#tar_header.typeflag of
  1307. ?TYPE_X_HEADER ->
  1308. {ExtraHeaders2, Reader3} = parse_pax(Reader2),
  1309. ExtraHeaders3 = maps:merge(ExtraHeaders, ExtraHeaders2),
  1310. foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders3);
  1311. ?TYPE_GNU_LONGNAME ->
  1312. {RealName, Reader3} = get_real_name(Reader2),
  1313. ExtraHeaders2 = maps:put(?PAX_PATH,
  1314. parse_string(RealName), ExtraHeaders),
  1315. foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
  1316. ?TYPE_GNU_LONGLINK ->
  1317. {RealName, Reader3} = get_real_name(Reader2),
  1318. ExtraHeaders2 = maps:put(?PAX_LINKPATH,
  1319. parse_string(RealName), ExtraHeaders),
  1320. foldl_read1(Fun, Accu0, Reader3, Opts, ExtraHeaders2);
  1321. _ ->
  1322. Header1 = merge_pax(Header, ExtraHeaders),
  1323. {ok, NewAccu, Reader3} = Fun(Header1, Reader2, Opts, Accu0),
  1324. foldl_read1(Fun, NewAccu, Reader3, Opts, #{})
  1325. end
  1326. end.
  1327. %% Applies all known PAX attributes to the current tar header
  1328. -spec merge_pax(tar_header(), #{binary() => binary()}) -> tar_header().
  1329. merge_pax(Header, ExtraHeaders) when is_map(ExtraHeaders) ->
  1330. do_merge_pax(Header, maps:to_list(ExtraHeaders)).
  1331. do_merge_pax(Header, []) ->
  1332. Header;
  1333. do_merge_pax(Header, [{?PAX_PATH, Path}|Rest]) ->
  1334. do_merge_pax(Header#tar_header{name=unicode:characters_to_list(Path)}, Rest);
  1335. do_merge_pax(Header, [{?PAX_LINKPATH, LinkPath}|Rest]) ->
  1336. do_merge_pax(Header#tar_header{linkname=unicode:characters_to_list(LinkPath)}, Rest);
  1337. do_merge_pax(Header, [{?PAX_GNAME, Gname}|Rest]) ->
  1338. do_merge_pax(Header#tar_header{gname=unicode:characters_to_list(Gname)}, Rest);
  1339. do_merge_pax(Header, [{?PAX_UNAME, Uname}|Rest]) ->
  1340. do_merge_pax(Header#tar_header{uname=unicode:characters_to_list(Uname)}, Rest);
  1341. do_merge_pax(Header, [{?PAX_UID, Uid}|Rest]) ->
  1342. Uid2 = binary_to_integer(Uid),
  1343. do_merge_pax(Header#tar_header{uid=Uid2}, Rest);
  1344. do_merge_pax(Header, [{?PAX_GID, Gid}|Rest]) ->
  1345. Gid2 = binary_to_integer(Gid),
  1346. do_merge_pax(Header#tar_header{gid=Gid2}, Rest);
  1347. do_merge_pax(Header, [{?PAX_ATIME, Atime}|Rest]) ->
  1348. Atime2 = parse_pax_time(Atime),
  1349. do_merge_pax(Header#tar_header{atime=Atime2}, Rest);
  1350. do_merge_pax(Header, [{?PAX_MTIME, Mtime}|Rest]) ->
  1351. Mtime2 = parse_pax_time(Mtime),
  1352. do_merge_pax(Header#tar_header{mtime=Mtime2}, Rest);
  1353. do_merge_pax(Header, [{?PAX_CTIME, Ctime}|Rest]) ->
  1354. Ctime2 = parse_pax_time(Ctime),
  1355. do_merge_pax(Header#tar_header{ctime=Ctime2}, Rest);
  1356. do_merge_pax(Header, [{?PAX_SIZE, Size}|Rest]) ->
  1357. Size2 = binary_to_integer(Size),
  1358. do_merge_pax(Header#tar_header{size=Size2}, Rest);
  1359. do_merge_pax(Header, [{<<?PAX_XATTR_STR, _Key/binary>>, _Value}|Rest]) ->
  1360. do_merge_pax(Header, Rest);
  1361. do_merge_pax(Header, [_Ignore|Rest]) ->
  1362. do_merge_pax(Header, Rest).
  1363. %% Returns the time since UNIX epoch as a datetime
  1364. -spec parse_pax_time(binary()) -> tar_time().
  1365. parse_pax_time(Bin) when is_binary(Bin) ->
  1366. TotalNano = case binary:split(Bin, [<<$.>>]) of
  1367. [SecondsStr, NanoStr0] ->
  1368. Seconds = binary_to_integer(SecondsStr),
  1369. if byte_size(NanoStr0) < ?MAX_NANO_INT_SIZE ->
  1370. %% right pad
  1371. PaddingN = ?MAX_NANO_INT_SIZE-byte_size(NanoStr0),
  1372. Padding = binary:copy(<<$0>>, PaddingN),
  1373. NanoStr1 = <<NanoStr0/binary,Padding/binary>>,
  1374. Nano = binary_to_integer(NanoStr1),
  1375. (Seconds*?BILLION)+Nano;
  1376. byte_size(NanoStr0) > ?MAX_NANO_INT_SIZE ->
  1377. %% right truncate
  1378. NanoStr1 = binary_part(NanoStr0, 0, ?MAX_NANO_INT_SIZE),
  1379. Nano = binary_to_integer(NanoStr1),
  1380. (Seconds*?BILLION)+Nano;
  1381. true ->
  1382. (Seconds*?BILLION)+binary_to_integer(NanoStr0)
  1383. end;
  1384. [SecondsStr] ->
  1385. binary_to_integer(SecondsStr)*?BILLION
  1386. end,
  1387. %% truncate to microseconds
  1388. Micro = TotalNano div 1000,
  1389. Mega = Micro div 1000000000000,
  1390. Secs = Micro div 1000000 - (Mega*1000000),
  1391. Secs.
  1392. %% Given a regular file reader, reads the whole file and
  1393. %% parses all extended attributes it contains.
  1394. parse_pax(#reg_file_reader{handle=Handle,num_bytes=0}) ->
  1395. {#{}, Handle};
  1396. parse_pax(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
  1397. case do_read(Handle0, NumBytes) of
  1398. {ok, Bytes, Handle1} ->
  1399. do_parse_pax(Handle1, Bytes, #{});
  1400. {error, _} = Err ->
  1401. throw(Err)
  1402. end.
  1403. do_parse_pax(Reader, <<>>, Headers) ->
  1404. {Headers, Reader};
  1405. do_parse_pax(Reader, Bin, Headers) ->
  1406. {Key, Value, Residual} = parse_pax_record(Bin),
  1407. NewHeaders = maps:put(Key, Value, Headers),
  1408. do_parse_pax(Reader, Residual, NewHeaders).
  1409. %% Parse an extended attribute
  1410. parse_pax_record(Bin) when is_binary(Bin) ->
  1411. case binary:split(Bin, [<<$\n>>]) of
  1412. [Record, Residual] ->
  1413. case [X || X <- binary:split(Record, [<<$\s>>], [global]), X =/= <<>>] of
  1414. [_Len, Record1] ->
  1415. case [X || X <- binary:split(Record1, [<<$=>>], [global]), X =/= <<>>] of
  1416. [AttrName, AttrValue] ->
  1417. {AttrName, AttrValue, Residual};
  1418. _Other ->
  1419. throw({error, malformed_pax_record})
  1420. end;
  1421. _Other ->
  1422. throw({error, malformed_pax_record})
  1423. end;
  1424. _Other ->
  1425. throw({error, malformed_pax_record})
  1426. end.
  1427. get_real_name(#reg_file_reader{handle=Handle,num_bytes=0}) ->
  1428. {"", Handle};
  1429. get_real_name(#reg_file_reader{handle=Handle0,num_bytes=NumBytes}) ->
  1430. case do_read(Handle0, NumBytes) of
  1431. {ok, RealName, Handle1} ->
  1432. {RealName, Handle1};
  1433. {error, _} = Err ->
  1434. throw(Err)
  1435. end;
  1436. get_real_name(#sparse_file_reader{num_bytes=NumBytes}=Reader0) ->
  1437. case do_read(Reader0, NumBytes) of
  1438. {ok, RealName, Reader1} ->
  1439. {RealName, Reader1};
  1440. {error, _} = Err ->
  1441. throw(Err)
  1442. end.
  1443. %% Skip the remaining bytes for the current file entry
  1444. skip_file(#reg_file_reader{handle=Handle0,pos=Pos,size=Size}=Reader) ->
  1445. Padding = skip_padding(Size),
  1446. AbsPos = Handle0#reader.pos + (Size-Pos) + Padding,
  1447. case do_position(Handle0, AbsPos) of
  1448. {ok, _, Handle1} ->
  1449. Reader#reg_file_reader{handle=Handle1,num_bytes=0,pos=Size};
  1450. Err ->
  1451. throw(Err)
  1452. end;
  1453. skip_file(#sparse_file_reader{pos=Pos,size=Size}=Reader) ->
  1454. case do_read(Reader, Size-Pos) of
  1455. {ok, _, Reader2} ->
  1456. Reader2;
  1457. Err ->
  1458. throw(Err)
  1459. end.
  1460. skip_padding(0) ->
  1461. 0;
  1462. skip_padding(Size) when (Size rem ?BLOCK_SIZE) =:= 0 ->
  1463. 0;
  1464. skip_padding(Size) when Size =< ?BLOCK_SIZE ->
  1465. ?BLOCK_SIZE - Size;
  1466. skip_padding(Size) ->
  1467. ?BLOCK_SIZE - (Size rem ?BLOCK_SIZE).
  1468. skip_unread(#reader{pos=Pos}=Reader0) when (Pos rem ?BLOCK_SIZE) > 0 ->
  1469. Padding = skip_padding(Pos + ?BLOCK_SIZE),
  1470. AbsPos = Pos + Padding,
  1471. case do_position(Reader0, AbsPos) of
  1472. {ok, _, Reader1} ->
  1473. {ok, Reader1};
  1474. Err ->
  1475. throw(Err)
  1476. end;
  1477. skip_unread(#reader{}=Reader) ->
  1478. {ok, Reader};
  1479. skip_unread(#reg_file_reader{handle=Handle,num_bytes=0}) ->
  1480. skip_unread(Handle);
  1481. skip_unread(#reg_file_reader{}=Reader) ->
  1482. #reg_file_reader{handle=Handle} = skip_file(Reader),
  1483. {ok, Handle};
  1484. skip_unread(#sparse_file_reader{handle=Handle,num_bytes=0}) ->
  1485. skip_unread(Handle);
  1486. skip_unread(#sparse_file_reader{}=Reader) ->
  1487. #sparse_file_reader{handle=Handle} = skip_file(Reader),
  1488. {ok, Handle}.
  1489. write_extracted_element(#tar_header{name=Name,typeflag=Type},
  1490. Bin,
  1491. #read_opts{output=memory}=Opts) ->
  1492. case typeflag(Type) of
  1493. regular ->
  1494. read_verbose(Opts, "x ~ts~n", [Name]),
  1495. {ok, {Name, Bin}};
  1496. _ ->
  1497. ok
  1498. end;
  1499. write_extracted_element(#tar_header{name=Name0}=Header, Bin, Opts) ->
  1500. Name1 = make_safe_path(Name0, Opts),
  1501. Created =
  1502. case typeflag(Header#tar_header.typeflag) of
  1503. regular ->
  1504. create_regular(Name1, Name0, Bin, Opts);
  1505. directory ->
  1506. read_verbose(Opts, "x ~ts~n", [Name0]),
  1507. create_extracted_dir(Name1, Opts);
  1508. symlink ->
  1509. read_verbose(Opts, "x ~ts~n", [Name0]),
  1510. create_symlink(Name1, Header#tar_header.linkname, Opts);
  1511. Device when Device =:= char orelse Device =:= block ->
  1512. %% char/block devices will be created as empty files
  1513. %% and then have their major/minor device set later
  1514. create_regular(Name1, Name0, <<>>, Opts);
  1515. fifo ->
  1516. %% fifo devices will be created as empty files
  1517. create_regular(Name1, Name0, <<>>, Opts);
  1518. Other -> % Ignore.
  1519. read_verbose(Opts, "x ~ts - unsupported type ~p~n",
  1520. [Name0, Other]),
  1521. not_written
  1522. end,
  1523. case Created of
  1524. ok -> set_extracted_file_info(Name1, Header);
  1525. not_written -> ok
  1526. end.
  1527. make_safe_path([$/|Path], Opts) ->
  1528. make_safe_path(Path, Opts);
  1529. make_safe_path(Path, #read_opts{cwd=Cwd}) ->
  1530. case r3_hex_filename:safe_relative_path(Path) of
  1531. unsafe ->
  1532. throw({error,{Path,unsafe_path}});
  1533. SafePath ->
  1534. filename:absname(SafePath, Cwd)
  1535. end.
  1536. create_regular(Name, NameInArchive, Bin, Opts) ->
  1537. case write_extracted_file(Name, Bin, Opts) of
  1538. not_written ->
  1539. read_verbose(Opts, "x ~ts - exists, not created~n", [NameInArchive]),
  1540. not_written;
  1541. Ok ->
  1542. read_verbose(Opts, "x ~ts~n", [NameInArchive]),
  1543. Ok
  1544. end.
  1545. create_extracted_dir(Name, _Opts) ->
  1546. case file:make_dir(Name) of
  1547. ok -> ok;
  1548. {error,enotsup} -> not_written;
  1549. {error,eexist} -> not_written;
  1550. {error,enoent} -> make_dirs(Name, dir);
  1551. {error,Reason} -> throw({error, Reason})
  1552. end.
  1553. create_symlink(Name, Linkname, Opts) ->
  1554. case file:make_symlink(Linkname, Name) of
  1555. ok -> ok;
  1556. {error,enoent} ->
  1557. ok = make_dirs(Name, file),
  1558. create_symlink(Name, Linkname, Opts);
  1559. {error,eexist} -> not_written;
  1560. {error,enotsup} ->
  1561. read_verbose(Opts, "x ~ts - symbolic links not supported~n", [Name]),
  1562. not_written;
  1563. {error,Reason} -> throw({error, Reason})
  1564. end.
  1565. write_extracted_file(Name, Bin, Opts) ->
  1566. Write =
  1567. case Opts#read_opts.keep_old_files of
  1568. true ->
  1569. case file:read_file_info(Name) of
  1570. {ok, _} -> false;
  1571. _ -> true
  1572. end;
  1573. false -> true
  1574. end,
  1575. case Write of
  1576. true -> write_file(Name, Bin);
  1577. false -> not_written
  1578. end.
  1579. write_file(Name, Bin) ->
  1580. case file:write_file(Name, Bin) of
  1581. ok -> ok;
  1582. {error,enoent} ->
  1583. case make_dirs(Name, file) of
  1584. ok ->
  1585. write_file(Name, Bin);
  1586. {error,Reason} ->
  1587. throw({error, Reason})
  1588. end;
  1589. {error,Reason} ->
  1590. throw({error, Reason})
  1591. end.
  1592. set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_SYMLINK}) -> ok;
  1593. set_extracted_file_info(_, #tar_header{typeflag = ?TYPE_LINK}) -> ok;
  1594. set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_CHAR}=Header) ->
  1595. set_device_info(Name, Header);
  1596. set_extracted_file_info(Name, #tar_header{typeflag = ?TYPE_BLOCK}=Header) ->
  1597. set_device_info(Name, Header);
  1598. set_extracted_file_info(Name, #tar_header{mtime=Mtime,mode=Mode}) ->
  1599. Info = #file_info{mode=Mode, mtime=Mtime},
  1600. file:write_file_info(Name, Info, [{time, posix}]).
  1601. set_device_info(Name, #tar_header{}=Header) ->
  1602. Mtime = Header#tar_header.mtime,
  1603. Mode = Header#tar_header.mode,
  1604. Devmajor = Header#tar_header.devmajor,
  1605. Devminor = Header#tar_header.devminor,
  1606. Info = #file_info{
  1607. mode=Mode,
  1608. mtime=Mtime,
  1609. major_device=Devmajor,
  1610. minor_device=Devminor
  1611. },
  1612. file:write_file_info(Name, Info).
  1613. %% Makes all directories leading up to the file.
  1614. make_dirs(Name, file) ->
  1615. filelib:ensure_dir(Name);
  1616. make_dirs(Name, dir) ->
  1617. filelib:ensure_dir(filename:join(Name,"*")).
  1618. %% Prints the message on if the verbose option is given (for reading).
  1619. read_verbose(#read_opts{verbose=true}, Format, Args) ->
  1620. io:format(Format, Args);
  1621. read_verbose(_, _, _) ->
  1622. ok.
  1623. %% Prints the message on if the verbose option is given.
  1624. add_verbose(#add_opts{verbose=true}, Format, Args) ->
  1625. io:format(Format, Args);
  1626. add_verbose(_, _, _) ->
  1627. ok.
  1628. %%%%%%%%%%%%%%%%%%
  1629. %% I/O primitives
  1630. %%%%%%%%%%%%%%%%%%
  1631. do_write(#reader{handle=Handle,func=Fun}=Reader0, Data)
  1632. when is_function(Fun,2) ->
  1633. case Fun(write,{Handle,Data}) of
  1634. ok ->
  1635. {ok, Pos, Reader1} = do_position(Reader0, {cur,0}),
  1636. {ok, Reader1#reader{pos=Pos}};
  1637. {error, _} = Err ->
  1638. Err
  1639. end.
  1640. do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts)
  1641. when is_function(Fun, 2) ->
  1642. do_copy(Reader, Source, Opts#add_opts{chunk_size=65536});
  1643. do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize})
  1644. when is_function(Fun, 2) ->
  1645. case file:open(Source, [read, binary]) of
  1646. {ok, SourceFd} ->
  1647. case copy_chunked(Reader, SourceFd, ChunkSize, 0) of
  1648. {ok, _Copied, _Reader2} = Ok->
  1649. _ = file:close(SourceFd),
  1650. Ok;
  1651. Err ->
  1652. _ = file:close(SourceFd),
  1653. throw(Err)
  1654. end;
  1655. Err ->
  1656. throw(Err)
  1657. end.
  1658. copy_chunked(#reader{}=Reader, Source, ChunkSize, Copied) ->
  1659. case file:read(Source, ChunkSize) of
  1660. {ok, Bin} ->
  1661. {ok, Reader2} = do_write(Reader, Bin),
  1662. copy_chunked(Reader2, Source, ChunkSize, Copied+byte_size(Bin));
  1663. eof ->
  1664. {ok, Copied, Reader};
  1665. Other ->
  1666. Other
  1667. end.
  1668. do_position(#reader{handle=Handle,func=Fun}=Reader, Pos)
  1669. when is_function(Fun,2)->
  1670. case Fun(position, {Handle,Pos}) of
  1671. {ok, NewPos} ->
  1672. %% since Pos may not always be an absolute seek,
  1673. %% make sure we update the reader with the new absolute position
  1674. {ok, AbsPos} = Fun(position, {Handle, {cur, 0}}),
  1675. {ok, NewPos, Reader#reader{pos=AbsPos}};
  1676. Other ->
  1677. Other
  1678. end.
  1679. do_read(#reg_file_reader{handle=Handle,pos=Pos,size=Size}=Reader, Len) ->
  1680. NumBytes = Size - Pos,
  1681. ActualLen = if NumBytes - Len < 0 -> NumBytes; true -> Len end,
  1682. case do_read(Handle, ActualLen) of
  1683. {ok, Bin, Handle2} ->
  1684. NewPos = Pos + ActualLen,
  1685. NumBytes2 = Size - NewPos,
  1686. Reader1 = Reader#reg_file_reader{
  1687. handle=Handle2,
  1688. pos=NewPos,
  1689. num_bytes=NumBytes2},
  1690. {ok, Bin, Reader1};
  1691. Other ->
  1692. Other
  1693. end;
  1694. do_read(#sparse_file_reader{}=Reader, Len) ->
  1695. do_sparse_read(Reader, Len);
  1696. do_read(#reader{pos=Pos,handle=Handle,func=Fun}=Reader, Len)
  1697. when is_function(Fun,2)->
  1698. %% Always convert to binary internally
  1699. case Fun(read2,{Handle,Len}) of
  1700. {ok, List} when is_list(List) ->
  1701. Bin = list_to_binary(List),
  1702. NewPos = Pos+byte_size(Bin),
  1703. {ok, Bin, Reader#reader{pos=NewPos}};
  1704. {ok, Bin} when is_binary(Bin) ->
  1705. NewPos = Pos+byte_size(Bin),
  1706. {ok, Bin, Reader#reader{pos=NewPos}};
  1707. Other ->
  1708. Other
  1709. end.
  1710. do_sparse_read(Reader, Len) ->
  1711. do_sparse_read(Reader, Len, <<>>).
  1712. do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{num_bytes=0}|Entries]
  1713. }=Reader0, Len, Acc) ->
  1714. %% skip all empty fragments
  1715. Reader1 = Reader0#sparse_file_reader{sparse_map=Entries},
  1716. do_sparse_read(Reader1, Len, Acc);
  1717. do_sparse_read(#sparse_file_reader{sparse_map=[],
  1718. pos=Pos,size=Size}=Reader0, Len, Acc)
  1719. when Pos < Size ->
  1720. %% if there are no more fragments, it is possible that there is one last sparse hole
  1721. %% this behaviour matches the BSD tar utility
  1722. %% however, GNU tar stops returning data even if we haven't reached the end
  1723. {ok, Bin, Reader1} = read_sparse_hole(Reader0, Size, Len),
  1724. do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
  1725. do_sparse_read(#sparse_file_reader{sparse_map=[]}=Reader, _Len, Acc) ->
  1726. {ok, Acc, Reader};
  1727. do_sparse_read(#sparse_file_reader{}=Reader, 0, Acc) ->
  1728. {ok, Acc, Reader};
  1729. do_sparse_read(#sparse_file_reader{sparse_map=[#sparse_entry{offset=Offset}|_],
  1730. pos=Pos}=Reader0, Len, Acc)
  1731. when Pos < Offset ->
  1732. {ok, Bin, Reader1} = read_sparse_hole(Reader0, Offset, Offset-Pos),
  1733. do_sparse_read(Reader1, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
  1734. do_sparse_read(#sparse_file_reader{sparse_map=[Entry|Entries],
  1735. pos=Pos}=Reader0, Len, Acc) ->
  1736. %% we're in a data fragment, so read from it
  1737. %% end offset of fragment
  1738. EndPos = Entry#sparse_entry.offset + Entry#sparse_entry.num_bytes,
  1739. %% bytes left in fragment
  1740. NumBytes = EndPos - Pos,
  1741. ActualLen = if Len > NumBytes -> NumBytes; true -> Len end,
  1742. case do_read(Reader0#sparse_file_reader.handle, ActualLen) of
  1743. {ok, Bin, Handle} ->
  1744. BytesRead = byte_size(Bin),
  1745. ActualEndPos = Pos+BytesRead,
  1746. Reader1 = if ActualEndPos =:= EndPos ->
  1747. Reader0#sparse_file_reader{sparse_map=Entries};
  1748. true ->
  1749. Reader0
  1750. end,
  1751. Size = Reader1#sparse_file_reader.size,
  1752. NumBytes2 = Size - ActualEndPos,
  1753. Reader2 = Reader1#sparse_file_reader{
  1754. handle=Handle,
  1755. pos=ActualEndPos,
  1756. num_bytes=NumBytes2},
  1757. do_sparse_read(Reader2, Len-byte_size(Bin), <<Acc/binary,Bin/binary>>);
  1758. Other ->
  1759. Other
  1760. end.
  1761. %% Reads a sparse hole ending at Offset
  1762. read_sparse_hole(#sparse_file_reader{pos=Pos}=Reader, Offset, Len) ->
  1763. N = Offset - Pos,
  1764. N2 = if N > Len ->
  1765. Len;
  1766. true ->
  1767. N
  1768. end,
  1769. Bin = <<0:N2/unit:8>>,
  1770. NumBytes = Reader#sparse_file_reader.size - (Pos+N2),
  1771. {ok, Bin, Reader#sparse_file_reader{
  1772. num_bytes=NumBytes,
  1773. pos=Pos+N2}}.
  1774. -spec do_close(reader()) -> ok | {error, term()}.
  1775. do_close(#reader{handle=Handle,func=Fun}) when is_function(Fun,2) ->
  1776. Fun(close,Handle).
  1777. %%%%%%%%%%%%%%%%%%
  1778. %% Option parsing
  1779. %%%%%%%%%%%%%%%%%%
  1780. extract_opts(List) ->
  1781. extract_opts(List, default_options()).
  1782. table_opts(List) ->
  1783. read_opts(List, default_options()).
  1784. default_options() ->
  1785. {ok, Cwd} = file:get_cwd(),
  1786. #read_opts{cwd=Cwd}.
  1787. extract_opts([keep_old_files|Rest], Opts) ->
  1788. extract_opts(Rest, Opts#read_opts{keep_old_files=true});
  1789. extract_opts([{cwd, Cwd}|Rest], Opts) ->
  1790. extract_opts(Rest, Opts#read_opts{cwd=Cwd});
  1791. extract_opts([{files, Files}|Rest], Opts) ->
  1792. Set = ordsets:from_list(Files),
  1793. extract_opts(Rest, Opts#read_opts{files=Set});
  1794. extract_opts([memory|Rest], Opts) ->
  1795. extract_opts(Rest, Opts#read_opts{output=memory});
  1796. extract_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
  1797. extract_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
  1798. extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
  1799. extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
  1800. extract_opts([verbose|Rest], Opts) ->
  1801. extract_opts(Rest, Opts#read_opts{verbose=true});
  1802. extract_opts([Other|Rest], Opts) ->
  1803. extract_opts(Rest, read_opts([Other], Opts));
  1804. extract_opts([], Opts) ->
  1805. Opts.
  1806. read_opts([compressed|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
  1807. read_opts(Rest, Opts#read_opts{open_mode=[compressed|OpenMode]});
  1808. read_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
  1809. read_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
  1810. read_opts([verbose|Rest], Opts) ->
  1811. read_opts(Rest, Opts#read_opts{verbose=true});
  1812. read_opts([_|Rest], Opts) ->
  1813. read_opts(Rest, Opts);
  1814. read_opts([], Opts) ->
  1815. Opts.