Implement TAR-based directory archiver

* Implement the ability to save directory contents as TAR.
  With the ability to customize saving of specific files and also select which files are saved and which aren't.

* Implement full original TAR USTAR 255 characters path support.
* Implement file modification time and access time support for extracted/archived TAR.
This commit is contained in:
Eladash 2021-07-09 20:28:38 +03:00 committed by Megamouse
parent 8e2c34a003
commit 1f6ca25820
2 changed files with 164 additions and 17 deletions

View File

@ -83,21 +83,32 @@ fs::file tar_object::get_file(const std::string& path)
u64 size = -1;
if (header.name[0] && std::memcmp(header.magic, "ustar", 5) == 0)
std::string filename;
if (std::memcmp(header.magic, "ustar", 5) == 0)
{
const std::string_view size_sv{header.size, std::size(header.size)};
size = octal_text_to_u64(size_sv);
// Check for overflows and if surpasses file size
if (size + 512 > size && max_size >= size + 512 && max_size - size - 512 >= largest_offset)
if ((header.name[0] || header.prefix[0]) && size + 512 > size && max_size >= size + 512 && max_size - size - 512 >= largest_offset)
{
// Cache size in native u64 format
static_assert(sizeof(size) < sizeof(header.size));
std::memcpy(header.size, &size, 8);
// Save header andd offset
m_map.insert_or_assign(header.name, std::make_pair(largest_offset + 512, header));
std::string_view prefix_name{header.prefix, std::size(header.prefix)};
std::string_view name{header.name, std::size(header.name)};
prefix_name = prefix_name.substr(0, prefix_name.find_first_of('\0'));
name = name.substr(0, name.find_first_of('\0'));
filename += prefix_name;
filename += name;
// Save header and offset
m_map.insert_or_assign(filename, std::make_pair(largest_offset + 512, header));
}
else
{
@ -113,27 +124,26 @@ fs::file tar_object::get_file(const std::string& path)
if (size == umax)
{
size = 0;
header.name[0] = '\0'; // Ensure path will not be equal
largest_offset += 512;
continue;
}
if (!path.empty() && path == header.name)
// Advance offset to next block
largest_offset += utils::align(size, 512) + 512;
if (!path.empty() && path == filename)
{
// Path is equal, read file and advance offset to start of next block
std::vector<u8> buf(size);
if (m_file.read(buf, size))
{
largest_offset += utils::align(size, 512) + 512;
return fs::make_stream(std::move(buf));
}
tar_log.error("tar_object::get_file() failed to read file entry %s (size=0x%x)", header.name, size);
size = 0;
tar_log.error("tar_object::get_file() failed to read file entry %s (size=0x%x)", filename, size);
largest_offset -= utils::align(size, 512);
}
// Advance offset to next block
largest_offset += utils::align(size, 512) + 512;
}
return fs::file();
@ -170,6 +180,18 @@ bool tar_object::extract(std::string vfs_mp)
return false;
}
u64 mtime = octal_text_to_u64({header.mtime, std::size(header.mtime)});
// Let's use it for optional atime
u64 atime = octal_text_to_u64({header.padding, 12});
// This is a fake timestamp, it can be invalid
if (atime == umax)
{
// Set to mtime if not provided
atime = mtime;
}
switch (header.filetype)
{
case '\0':
@ -189,6 +211,14 @@ bool tar_object::extract(std::string vfs_mp)
if (file)
{
file.write(static_cast<fs::container_stream<std::vector<u8>>*>(data.get())->obj);
file.close();
if (mtime != umax && !fs::utime(result, atime, mtime))
{
tar_log.error("TAR Loader: fs::utime failed on %s (%s)", result, fs::g_tls_error);
return false;
}
tar_log.notice("TAR Loader: written file %s", name);
break;
}
@ -206,6 +236,12 @@ bool tar_object::extract(std::string vfs_mp)
return false;
}
if (mtime != umax && !fs::utime(result, atime, mtime))
{
tar_log.error("TAR Loader: fs::utime failed on %s (%s)", result, fs::g_tls_error);
return false;
}
break;
}
@ -217,11 +253,113 @@ bool tar_object::extract(std::string vfs_mp)
return true;
}
bool extract_tar(const std::string& file_path, const std::string& dir_path)
std::vector<u8> tar_object::save_directory(const std::string& src_dir, std::vector<u8>&& init, const process_func& func, std::string full_path)
{
const std::string& target_path = full_path.empty() ? src_dir : full_path;
fs::stat_t stat{};
if (!fs::stat(target_path, stat))
{
return std::move(init);
}
u32 count = 0;
if (stat.is_directory)
{
bool has_items = false;
for (auto& entry : fs::dir(target_path))
{
if (entry.name.find_first_not_of('.') == umax) continue;
init = save_directory(src_dir, std::move(init), func, target_path + '/' + entry.name);
has_items = true;
}
if (has_items)
{
return std::move(init);
}
}
auto write_octal = [](char* ptr, u64 i)
{
if (!i)
{
*ptr = '0';
return;
}
ptr += utils::aligned_div(std::bit_width(i), 3) - 1;
for (; i; ptr--, i /= 8)
{
*ptr = static_cast<char>('0' + (i % 8));
}
};
std::string saved_path{target_path.data() + src_dir.size(), target_path.size() - src_dir.size()};
const u64 old_size = init.size();
init.resize(old_size + sizeof(TARHeader));
if (!stat.is_directory)
{
fs::file fd(target_path);
if (func)
{
// Use custom function for file saving if provided
// Allows for example to compress PNG files as JPEG in the TAR itself
if (!func(fd, saved_path, std::move(init)))
{
// Revert (this entry should not be included if func returns false)
init.resize(old_size);
return std::move(init);
}
}
else
{
const u64 old_size2 = init.size();
init.resize(init.size() + stat.size);
ensure(fd.read(init.data() + old_size2, stat.size) == stat.size);
}
// Align
init.resize(utils::align(init.size(), 512));
fd.close();
fs::utime(target_path, stat.atime, stat.mtime);
}
TARHeader header{};
std::memcpy(header.magic, "ustar ", 6);
// Prefer saving to name field as much as we can
// If it doesn't fit, save 100 characters at name and 155 characters preceding to it at max
const u64 prefix_size = std::clamp<usz>(saved_path.size(), 100, 255) - 100;
std::memcpy(header.prefix, saved_path.data(), prefix_size);
const u64 name_size = std::min<usz>(saved_path.size(), 255) - prefix_size;
std::memcpy(header.name, saved_path.data() + prefix_size, name_size);
write_octal(header.size, stat.is_directory ? 0 : stat.size);
write_octal(header.mtime, stat.mtime);
write_octal(header.padding, stat.atime);
header.filetype = stat.is_directory ? '5' : '0';
std::memcpy(init.data() + old_size, &header, sizeof(header));
return std::move(init);
}
bool extract_tar(const std::string& file_path, const std::string& dir_path, fs::file file)
{
tar_log.notice("Extracting '%s' to directory '%s'...", file_path, dir_path);
fs::file file(file_path);
if (!file)
{
file.open(file_path);
}
if (!file)
{

View File

@ -14,9 +14,14 @@ struct TARHeader
char magic[6];
char dontcare2[82];
char prefix[155];
char padding[12];
char padding[12]; // atime for RPCS3
};
namespace fs
{
class file;
}
class tar_object
{
const fs::file& m_file;
@ -33,9 +38,13 @@ public:
fs::file get_file(const std::string& path);
using process_func = std::function<bool(const fs::file&, std::string&, std::vector<u8>&&)>;
// Extract all files in archive to destination as VFS
// Allow to optionally specify explicit mount point (which may be directory meant for extraction)
bool extract(std::string vfs_mp = {});
static std::vector<u8> save_directory(const std::string& src_dir, std::vector<u8>&& init = std::vector<u8>{}, const process_func& func = {}, std::string append_path = {});
};
bool extract_tar(const std::string& file_path, const std::string& dir_path);
bool extract_tar(const std::string& file_path, const std::string& dir_path, fs::file file = {});