Improve TAR loader (#9908)

* Fix header magic test.
* Rewrite code to not use so many filesystem calls.
* Add many more error checks.
* Add missing NUL filetype.
* octalToDecimal(header.size) has been fixed to use fixed 12 characters range instead of endless string.
* Add many optimizations.
* Fix possible signed overflows with int, use the unisgned u64 type instead which allows for greater files as well.
* Log errors.
This commit is contained in:
Eladash 2021-03-11 01:26:39 +02:00 committed by GitHub
parent 4adf412049
commit 0958c10f88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 101 additions and 55 deletions

View File

@ -2,38 +2,48 @@
#include "TAR.h" #include "TAR.h"
#include "util/asm.hpp"
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <charconv>
LOG_CHANNEL(tar_log, "TAR"); LOG_CHANNEL(tar_log, "TAR");
tar_object::tar_object(const fs::file& file, usz offset) tar_object::tar_object(const fs::file& file)
: m_file(file) : m_file(file)
, initial_offset(static_cast<int>(offset))
{ {
m_file.seek(initial_offset);
largest_offset = initial_offset;
} }
TARHeader tar_object::read_header(u64 offset) TARHeader tar_object::read_header(u64 offset) const
{ {
m_file.seek(offset); TARHeader header{};
TARHeader header;
m_file.read(header); if (m_file.seek(offset) != offset)
{
return header;
}
if (!m_file.read(header))
{
std::memset(&header, 0, sizeof(header));
}
return header; return header;
} }
int octalToDecimal(int octalNumber) u64 octal_text_to_u64(std::string_view sv)
{ {
int decimalNumber = 0, i = 0, rem; u64 i = -1;
while (octalNumber != 0) const auto ptr = std::from_chars(sv.data(), sv.data() + sv.size(), i, 8).ptr;
// Range must be terminated with either NUL or space
if (ptr == sv.data() + sv.size() || (*ptr && *ptr != ' '))
{ {
rem = octalNumber % 10; i = -1;
octalNumber /= 10;
decimalNumber += rem * (1 << (i * 3));
++i;
} }
return decimalNumber;
return i;
} }
std::vector<std::string> tar_object::get_filenames() std::vector<std::string> tar_object::get_filenames()
@ -47,46 +57,80 @@ std::vector<std::string> tar_object::get_filenames()
return vec; return vec;
} }
fs::file tar_object::get_file(std::string path) fs::file tar_object::get_file(const std::string& path)
{ {
if (!m_file) return fs::file(); if (!m_file) return fs::file();
auto it = m_map.find(path); if (auto it = m_map.find(path); it != m_map.end())
if (it != m_map.end())
{ {
TARHeader header = read_header(it->second); u64 size = 0;
int size = octalToDecimal(atoi(header.size)); std::memcpy(&size, it->second.second.size, sizeof(size));
std::vector<u8> buf(size); std::vector<u8> buf(size);
m_file.seek(it->second.first);
m_file.read(buf, size); m_file.read(buf, size);
int offset = ((m_file.pos() - initial_offset + 512 - 1) & ~(512 - 1)) + initial_offset; // Always keep the offset aligned to 512 bytes + the initial offset.
m_file.seek(offset);
return fs::make_stream(std::move(buf)); return fs::make_stream(std::move(buf));
} }
else //continue scanning from last file entered else //continue scanning from last file entered
{ {
while (m_file.pos() < m_file.size()) const u64 max_size = m_file.size();
while (largest_offset < max_size)
{ {
TARHeader header = read_header(largest_offset); TARHeader header = read_header(largest_offset);
if (std::string(header.magic).find("ustar") != umax) u64 size = -1;
m_map[header.name] = largest_offset;
int size = octalToDecimal(atoi(header.size)); if (header.name[0] && std::memcmp(header.magic, "ustar", 5) == 0)
if (path == header.name) { //path is equal, read file and advance offset to start of next block {
const std::string_view size_sv{header.size, std::size(header.size)};
size = octal_text_to_u64(size_sv);
// Check for overflows and if surpasses file size
if (size + 512 > size && max_size >= size + 512 && max_size - size - 512 >= largest_offset)
{
// Cache size in native u64 format
static_assert(sizeof(size) < sizeof(header.size));
std::memcpy(header.size, &size, 8);
// Save header andd offset
m_map.insert_or_assign(header.name, std::make_pair(largest_offset + 512, header));
}
else
{
// Invalid
size = -1;
tar_log.error("tar_object::get_file() failed to convert header.size=%s, filesize=0x%x", size_sv, max_size);
}
}
else
{
tar_log.trace("tar_object::get_file() failed to parse header: offset=0x%x, filesize=0x%x", largest_offset, max_size);
}
if (size == umax)
{
size = 0;
header.name[0] = '\0'; // Ensure path will not be equal
}
if (!path.empty() && path == header.name)
{
// Path is equal, read file and advance offset to start of next block
std::vector<u8> buf(size); std::vector<u8> buf(size);
m_file.read(buf, size);
int offset = ((m_file.pos() - initial_offset + 512 - 1) & ~(512 - 1)) + initial_offset;
m_file.seek(offset);
largest_offset = offset;
return fs::make_stream(std::move(buf)); if (m_file.read(buf, size))
} {
else { // just advance offset to next block largest_offset += utils::align(size, 512) + 512;
m_file.seek(size, fs::seek_mode::seek_cur); return fs::make_stream(std::move(buf));
int offset = ((m_file.pos() - initial_offset + 512 - 1) & ~(512 - 1)) + initial_offset; }
m_file.seek(offset);
largest_offset = offset; tar_log.error("tar_object::get_file() failed to read file entry %s (size=0x%x)", header.name, size);
size = 0;
} }
// Advance offset to next block
largest_offset += utils::align(size, 512) + 512;
} }
return fs::file(); return fs::file();
@ -97,13 +141,14 @@ bool tar_object::extract(std::string path, std::string ignore)
{ {
if (!m_file) return false; if (!m_file) return false;
get_file(""); //Make sure we have scanned all files get_file(""); // Make sure we have scanned all files
for (auto iter : m_map)
{
TARHeader header = read_header(iter.second);
if (!header.name[0]) continue;
std::string result = path + header.name; for (auto& iter : m_map)
{
const TARHeader& header = iter.second.second;
const std::string& name = iter.first;
std::string result = path + name;
if (result.compare(path.size(), ignore.size(), ignore) == 0) if (result.compare(path.size(), ignore.size(), ignore) == 0)
{ {
@ -112,20 +157,22 @@ bool tar_object::extract(std::string path, std::string ignore)
switch (header.filetype) switch (header.filetype)
{ {
case '\0':
case '0': case '0':
{ {
auto data = get_file(header.name).release(); auto data = get_file(name).release();
fs::file file(result, fs::rewrite); fs::file file(result, fs::rewrite);
if (file) if (file)
{ {
file.write(static_cast<fs::container_stream<std::vector<u8>>*>(data.get())->obj); file.write(static_cast<fs::container_stream<std::vector<u8>>*>(data.get())->obj);
tar_log.notice("TAR Loader: written file %s", header.name); tar_log.notice("TAR Loader: written file %s", name);
break; break;
} }
tar_log.error("TAR Loader: failed to write file %s (%s)", header.name, fs::g_tls_error); const auto old_error = fs::g_tls_error;
tar_log.error("TAR Loader: failed to write file %s (%s) (fs::exists=%s)", name, old_error, fs::exists(result));
return false; return false;
} }
@ -133,7 +180,7 @@ bool tar_object::extract(std::string path, std::string ignore)
{ {
if (!fs::create_path(result)) if (!fs::create_path(result))
{ {
tar_log.error("TAR Loader: failed to create directory %s (%s)", header.name, fs::g_tls_error); tar_log.error("TAR Loader: failed to create directory %s (%s)", name, fs::g_tls_error);
return false; return false;
} }

View File

@ -21,18 +21,17 @@ class tar_object
{ {
const fs::file& m_file; const fs::file& m_file;
int initial_offset; usz largest_offset = 0; // We store the largest offset so we can continue to scan from there.
int largest_offset; //we store the largest offset so we can continue to scan from there. std::map<std::string, std::pair<u64, TARHeader>> m_map; // Maps path to offset of file data and its header
std::map<std::string, u64> m_map; //maps path to offset of header of that file, so we only need to scan the entire file once.
TARHeader read_header(u64 offset); TARHeader read_header(u64 offset) const;
public: public:
tar_object(const fs::file& file, usz offset = 0); tar_object(const fs::file& file);
std::vector<std::string> get_filenames(); std::vector<std::string> get_filenames();
fs::file get_file(std::string path); fs::file get_file(const std::string& path);
bool extract(std::string path, std::string ignore = ""); // extract all files in archive to path bool extract(std::string path, std::string ignore = ""); // extract all files in archive to path
}; };