// SPDX-License-Identifier: 0BSD


#include <algorithm>
#include <atomic>
#include <clocale>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <endian.h>
#include <err.h>
#include <pthread.h>
#include <snappy-sinksource.h>
#include <snappy.h>
#include <string_view>
#include <sys/mman.h>
#include <unistd.h>
#if __has_include(<libintl.h>)
#include <libintl.h>
#else
#define gettext(s) (s)
#define ngettext(s, p, n) (n == 1 ? s : p)
#endif

using namespace std::literals;


#if __i386__ || __x86_64__  // using function multiversioning (or manual cpuid check)
#include <nmmintrin.h>
#if(__linux__ && !__GLIBC__) || __OpenBSD__  // cpuid
#include <cpuid.h>
#define CRC32C_GENERIC_ATTR
#define CRC32C_GENERIC_NAME crc32c_generic
#define PARALLEL_COMPRESSION_CRC32C parallel_compression_crc32c_generic
#else  // multiversioning
#define MULTIVER 1
#define CRC32C_GENERIC_ATTR [[gnu::target("default")]]
#define CRC32C_GENERIC_NAME crc32c
#define PARALLEL_COMPRESSION_CRC32C parallel_compression_crc32c
#endif
#elif __aarch64__  // accelerated by default (CRC nominally optional on v8, required on v8.1)
#include <arm_acle.h>
#define CRC32C_GENERIC_ATTR [[maybe_unused]]
#define CRC32C_GENERIC_NAME crc32c_generic
#define PARALLEL_COMPRESSION_CRC32C parallel_compression_crc32c_generic
#elif __loongarch64  // manual cpucfg check in static init
#include <larchintrin.h>
#define CRC32C_GENERIC_ATTR
#define CRC32C_GENERIC_NAME crc32c_generic
#define PARALLEL_COMPRESSION_CRC32C parallel_compression_crc32c  // TODO: measure on hardware! defaults to true, which wins in QEMU
#else
#define CRC32C_GENERIC_ATTR
#define CRC32C_GENERIC_NAME crc32c
#define PARALLEL_COMPRESSION_CRC32C parallel_compression_crc32c
#endif


namespace {
	CRC32C_GENERIC_ATTR std::uint32_t CRC32C_GENERIC_NAME(std::uint32_t cur, const void * data, std::size_t data_len) {
		static const constexpr std::uint32_t crc_lut[] = {
		    0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
		    0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
		    0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
		    0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
		    0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
		    0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
		    0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
		    0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
		    0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
		    0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
		    0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
		    0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
		    0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
		    0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
		    0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
		    0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
		    0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
		    0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
		    0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
		    0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
		    0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
		    0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351};

		auto bytes = reinterpret_cast<const std::uint8_t *>(data), end = bytes + data_len;
		for(; bytes != end; ++bytes)
			cur = (cur >> 8) ^ crc_lut[(cur & 0xFF) ^ *bytes];
		return cur;
	}

	CRC32C_GENERIC_ATTR bool PARALLEL_COMPRESSION_CRC32C() {
		return true;
	}


#if __i386__ || __x86_64__
#if MULTIVER
#define CRC32Q(sym) sym
#else
#define CRC32Q(sym) sym##_sse42
#endif


	[[gnu::target("sse4.2")]] std::uint32_t CRC32Q(crc32c)(std::uint32_t cur_r, const void * data, std::size_t data_len) {
#if __x86_64__  // this isn't a simple std::size_t+#if _LP64 because x32 is amd64 ILP32
		using sse42_t = std::uint64_t;
#define _mm_crc32_big _mm_crc32_u64
#else
		using sse42_t = std::uint32_t;
#define _mm_crc32_big _mm_crc32_u32
#endif

		sse42_t cur = cur_r;

		auto bulk_iter = reinterpret_cast<const sse42_t *>(data);
		for(auto bulk = data_len / sizeof(sse42_t); bulk; --bulk, ++bulk_iter)
			cur = _mm_crc32_big(cur, *bulk_iter);

		auto single_iter = reinterpret_cast<const std::uint8_t *>(bulk_iter);
		for(auto single = data_len % sizeof(sse42_t); single; --single, ++single_iter)
			cur = _mm_crc32_u8(cur, *single_iter);

		return cur;
	}

	[[gnu::target("sse4.2")]] bool CRC32Q(parallel_compression_crc32c)() {
		return false;
	}

#if !MULTIVER
	const bool have_crc32q                 = ([] {
    std::uint32_t r[4];
    return __get_cpuid_count(1, 0, &r[0], &r[1], &r[2], &r[3]) && ((r[2] & (1 << 20)) == (1 << 20));
  }());
	const auto crc32c                      = have_crc32q ? CRC32Q(crc32c) : CRC32C_GENERIC_NAME;
	const auto parallel_compression_crc32c = have_crc32q ? CRC32Q(parallel_compression_crc32c) : PARALLEL_COMPRESSION_CRC32C;
#endif
#elif __aarch64__
	[[gnu::target("+crc")]] std::uint32_t crc32c(std::uint32_t cur, const void * data, std::size_t data_len) {
		auto bulk_iter = reinterpret_cast<const std::uint64_t *>(data);
		for(auto bulk = data_len / sizeof(std::uint64_t); bulk; --bulk, ++bulk_iter)
			cur = __crc32cd(cur, *bulk_iter);

		auto single_iter = reinterpret_cast<const std::uint8_t *>(bulk_iter);
		for(auto single = data_len % sizeof(std::uint64_t); single; --single, ++single_iter)
			cur = __crc32cb(cur, *single_iter);

		return cur;
	}

	bool parallel_compression_crc32c() {
		return false;
	}
#elif __loongarch64
	std::uint32_t crc32c_crcc(std::uint32_t cur_r, const void * data, std::size_t data_len) {
		int cur;
		std::memcpy(&cur, &cur_r, sizeof(std::uint32_t));

		auto bulk_iter = reinterpret_cast<const long int *>(data);
		for(auto bulk = data_len / sizeof(long int); bulk; --bulk, ++bulk_iter)
			cur = __crcc_w_d_w(*bulk_iter, cur);

		auto single_iter = reinterpret_cast<const char *>(bulk_iter);
		for(auto single = data_len % sizeof(long int); single; --single, ++single_iter)
			cur = __crcc_w_b_w(*single_iter, cur);

		std::memcpy(&cur_r, &cur, sizeof(std::uint32_t));
		return cur_r;
	}

	// bool parallel_compression_crc32c() {
	// 	// TODO: measure on hardware! defaults to true, which wins in QEMU
	// }

	// As of 2024-01-14, the official upstream ISA manual (https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_cpucfg) says:
	//   Table 3. The configuration information accessible by the CPUCFG instruction
	//   Word number	Bit number	Annotation	Implication
	//           0x1         25   IOCSR_BRD   1 indicates that the string of processor product information is recorded at address 0 of the IOCSR access space
	//
	//                                        That is, information such as “Loongson3A5000 @2.5GHz”
	//
	// This is corroborated by the manuals for the 3A5000/3B5000, 3C5000, and 3D5000
	//   https://github.com/loongson-community/docs/blob/master/3A5000/Loongson3A5000_3B5000%20user%20book_V1.3.pdf
	//   https://github.com/loongson-community/docs/blob/master/3C5000/龙芯3C5000寄存器及使用手册V1.0.pdf
	//   https://github.com/loongson-community/docs/blob/master/3D5000/2023061508513851030.龙芯3D5000处理器寄存器使用手册_V1.0.pdf
	// whereas the 3A4000 manual allocates cpucfg completely differently
	//   https://github.com/loongson-community/docs/blob/master/3A4000/3A4000_user_v1.5_20191220.pdf
	//
	// The 3A6000's allocation scheme agrees with the live doc and the 5000s except bit 25 is
	//   CRC32  为 1 表示支持 CRC32 加速指令。  1’b1
	// https://github.com/loongson-community/docs/blob/master/3A6000/Loongson3A6000%20user%20book_V1.1.pdf
	//
	// Linux checks bit 25 for CRC/CRCC presence since
	//   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=df830336045db1246d3245d3737fee9939c5f731
	// saying
	//   LoongArch: Fix probing of the CRC32 feature
	//
	//   Not all LoongArch processors support CRC32 instructions. This feature
	//   is indicated by CPUCFG1.CRC32 (Bit25) but it is wrongly defined in the
	//   previous versions of the ISA manual (and so does in loongarch.h). The
	//   CRC32 feature is set unconditionally now, so fix it.
#define LOONG64_CPUCFG1_CRC32 (1 << 25)
	static const auto crc32c = __cpucfg(1) & LOONG64_CPUCFG1_CRC32 ? crc32c_crcc : crc32c_generic;
#endif

	std::uint32_t crc32c_finish(std::uint32_t cur) {
		cur = ~cur;
		// https://github.com/google/snappy/blob/f82bff66afe0de4c9ae22f8c4ef84e3c2233e799/framing_format.txt#L53-L55
		cur = ((cur >> 15) | (cur << 17)) + 0xA282EAD8;
		return cur;
	}


	static union {
		char buf[0xFFFFFF];
		std::uint32_t bufsum;
	};


	// A Source is an interface that yields a sequence of bytes
	struct fd_source : snappy::Source {
		int fd;
		const char * filename;
		off_t size, off;

		fd_source(int fd, const char * filename) : fd(fd), filename(filename) {
			if((this->off = (filename == "-"sv ? lseek(this->fd, 0, SEEK_CUR) : 0)) == -1)
				return;
			if((this->size = lseek(this->fd, 0, SEEK_END)) == -1)
				this->off = -1;
		}

		// Return the number of bytes left to read from the source
		virtual size_t Available() const { return this->size - this->off; }

		// Peek at the next flat region of the source.  Does not reposition
		// the source.  The returned region is empty iff Available()==0.
		//
		// Returns a pointer to the beginning of the region and store its
		// length in *len.
		//
		// The returned region is valid until the next call to Skip() or
		// until this object is destroyed, whichever occurs first.
		//
		// The returned region may be larger than Available() (for example
		// if this ByteSource is a view on a substring of a larger source).
		// The caller is responsible for ensuring that it only reads the
		// Available() bytes.
		virtual const char * Peek(size_t * len) {
			static_assert(snappy::kBlockSize <= sizeof(buf));
			ssize_t rd;
			while((rd = pread(this->fd, buf, std::min(static_cast<off_t>(snappy::kBlockSize), this->size), this->off)) == -1 && errno == EINTR)
				;
			if(rd == -1)
				err(2, "%s", this->filename);
			if(rd == 0)
				this->off = this->size;
			*len = rd;
			return buf;
		}

		// Skip the next n bytes.  Invalidates any buffer returned by
		// a previous call to Peek().
		// REQUIRES: Available() >= n
		virtual void Skip(size_t n) { this->off += n; }
	};

	struct FILE_sink : snappy::Sink {
		FILE * f;

		FILE_sink(FILE * f) : f(f) {}

		// Append "bytes[0,n-1]" to this.
		virtual void Append(const char * bytes, size_t n) {
			if(std::fwrite(bytes, 1, n, this->f) != n)
				err(2, gettext("write error"));
		}
	};

	struct FILE_crc32_sink : FILE_sink {
		std::uint32_t sum = ~0;

		using FILE_sink::FILE_sink;

		// Append "bytes[0,n-1]" to this.
		virtual void Append(const char * bytes, size_t n) {
			FILE_sink::Append(bytes, n);
			this->sum = crc32c(this->sum, bytes, n);
		}
	};


	bool uncompress_unframed(const void * data, std::size_t data_len, snappy::Sink * sink, auto filename, bool ignore_errors) {
		std::uint32_t len = -1;
		bool err{};
		{
			snappy::ByteArraySource src{reinterpret_cast<const char *>(data), data_len};
			if(!snappy::GetUncompressedLength(&src, &len)) {
				// not understood by the decompressor
				warnx(gettext("%s: compressed block of length %zu: invalid data"), filename, data_len), err = true;
				if(!ignore_errors)
					return err;
			}
		}
		{
			snappy::ByteArraySource src{reinterpret_cast<const char *>(data), data_len};
			auto un = snappy::UncompressAsMuchAsPossible(&src, sink);
			if(un != len)
				warnx(gettext("%s: compressed block of length %zu: expecting %zu bytes, got %zu"), filename, data_len, static_cast<std::size_t>(len), un), err = true;
		}
		return err;
	}

	// The stream ends when the file ends -- there is no explicit end-of-file marker.
	int uncompress_framed(FILE * f, const char * filename, bool ignore_errors) {
		bool err{};
		for(;;) {
			auto tp = getc(f);
			if(tp == EOF) {
			checkerr:
				if(std::ferror(f))
					::err(2, "%s", filename);
				else
					return err;
			}
			std::uint32_t chunk_len{};
			for(int i = 0; i < 3; ++i) {
				auto l = getc(f);
				if(l == EOF)
					goto checkerr;
				chunk_len |= l << (i * 8);
			}
			if(std::fread(buf, 1, chunk_len, f) != chunk_len)
				goto checkerr;

			FILE_crc32_sink sink{stdout};
			switch(tp) {
				case 0xFF:  // 4.1. Stream identifier (chunk type 0xff)
					if(chunk_len != std::strlen("sNaPpY"))
						warnx(gettext("%s: stream identifier chunk: length %zu != %zu"), filename, static_cast<std::size_t>(chunk_len), std::strlen("sNaPpY")), err = true;
					if(std::memcmp(buf, "sNaPpY", std::strlen("sNaPpY")))
						warnx(gettext("%s: stream identifier chunk: content %.*s != %s"), filename, (int)chunk_len, buf, "sNaPpY"), err = true;
					break;
				case 0x00:  // 4.2. Compressed data (chunk type 0x00)
					err |= uncompress_unframed(buf + 4, chunk_len - 4, &sink, filename, ignore_errors);
					sink.f = nullptr;
					break;
				case 0x01:  // 4.3. Uncompressed data (chunk type 0x01)
					sink.Append(buf + 4, chunk_len - 4);
					sink.f = nullptr;
					break;
				case 0xFE:  // 4.4. Padding (chunk type 0xfe)
					break;
				default:
					if(tp >= 0x02 && tp <= 0x7F)  // 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f)
						warnx(gettext("%s: chunk of length %zu: unknown type 0x%02X"), filename, static_cast<std::size_t>(chunk_len), tp), err = true;
					else  // 4.6. Reserved skippable chunks (chunk types 0x80-0xfd)
						;
			}
			if(err && !ignore_errors)
				return err;

			if(!sink.f) {
				sink.sum = crc32c_finish(sink.sum);
				if(le32toh(bufsum) != sink.sum) {
					warnx(gettext("%s: chunk of length %zu: checksum 0x%08X != 0x%08X"), filename, static_cast<std::size_t>(chunk_len), sink.sum, le32toh(bufsum)),
					    err = true;
					if(!ignore_errors)
						return err;
				}
			}
		}
		return err;
	}

	std::pair<void *, std::size_t> ingest(FILE * f, const char * filename, const void * prefix = nullptr, std::size_t prefix_len = 0) {
		char * data;
		std::size_t data_size;
		auto mf = open_memstream(&data, &data_size);
		if(!mf)
		mferr:
			err(2, nullptr);

		std::fwrite(prefix, 1, prefix_len, mf);

		for(auto rd = sizeof(buf); rd == sizeof(buf);) {
			if((rd = std::fread(buf, 1, sizeof(buf), f)) != sizeof(buf) && std::ferror(f))
				err(2, "%s", filename);

			if(std::fwrite(buf, 1, rd, mf) != rd)
				goto mferr;
		}
		if(std::fclose(mf))
			goto mferr;

		return {data, data_size};
	}

	struct stdout_flush {
		~stdout_flush() {
			if(std::fflush(stdout))
				err(2, gettext("write error"));
		}
	};
}

int main(int argc, char * const * argv) {
	setlocale(LC_ALL, "");
#if __has_include(<libintl.h>)
	bindtextdomain("snappy-tools", TEXTDOMAIN_DIRNAME);
	textdomain("snappy-tools");
#endif


	auto bn               = std::strrchr(argv[0] ?: "", '/') ?: (argv[0] ?: "");
	auto uncompress_argv0 = *bn == 'u' || (*bn == '/' && *(bn + 1) == 'u');

	bool uncompress = uncompress_argv0;
	bool uncompress_ignore_errors{};
	bool compress_frame{};

	for(int arg; (arg = getopt(argc, argv, uncompress_argv0 ? "i" : "dif")) != -1;)
		switch(arg) {
			case 'd':
				uncompress = true;
				break;
			case 'i':
				uncompress_ignore_errors = true;
				break;
			case 'f':
				compress_frame = true;
				break;
			default:
			usage:
				// Also in README!
				return std::fprintf(stderr,
				                    uncompress_argv0 ? gettext("usage: %1$s [-i]   snappy.sn|.sz\n"
				                                               "       %1$s [-i] < snappy.sn|.sz\n")
				                                     : gettext("usage: %1$s    [-f]   data > snappy.sn|.sz\n"
				                                               "       %1$s    [-f] < data > snappy.sn|.sz\n"
				                                               "       %1$s -d [-i]          snappy.sn|.sz\n"
				                                               "       %1$s -d [-i] <        snappy.sn|.sz\n"),
				                    argv[0]),
				       1;
		}
	if(*(argv + optind) && *(argv + optind + 1))
		goto usage;

	auto filename = *(argv + optind) ?: "-";
	if(filename != "-"sv)
		if(!std::freopen(filename, "r", stdin))
			err(2, "%s", filename);

	const int fd   = 0;
	FILE * const f = stdin;
	stdout_flush _flusher{};

	if(uncompress) {
		auto hdrlen = std::fread(buf, 1, sizeof("\xFF\x06\x00\x00sNaPpY") - 1, f);
		if(hdrlen != sizeof("\xFF\x06\x00\x00sNaPpY") - 1) {
			if(std::ferror(f))
				err(2, "%s", filename);

			FILE_sink sink{stdout};
			return uncompress_unframed(buf, hdrlen, &sink, filename, uncompress_ignore_errors);
		} else if(!std::memcmp(buf, "\xFF\x06\x00\x00sNaPpY", sizeof("\xFF\x06\x00\x00sNaPpY") - 1))
			return uncompress_framed(f, filename, uncompress_ignore_errors);

		// Ideally we'd just ungetc but realistically glibc/musl give us 8 bytes and we want 9 :/
		// while(hdrlen--)
		// 	assert(std::ungetc(buf[hdrlen], stdin) != EOF);

		off_t orig_pos, len;
		const void * mapping;
		if((orig_pos = ftello(f)) == -1 || fseeko(f, 0, SEEK_END) == -1 || (len = ftello(f)) == -1 ||
		   ((mapping = mmap(nullptr, len, PROT_READ, MAP_PRIVATE, fd, 0)) == MAP_FAILED && (fseeko(f, orig_pos, SEEK_SET), true))) {
			auto dt = ingest(f, filename, buf, hdrlen);
			FILE_sink sink{stdout};
			return uncompress_unframed(dt.first, dt.second, &sink, filename, uncompress_ignore_errors);
		}

		FILE_sink sink{stdout};
		return uncompress_unframed(reinterpret_cast<const std::uint8_t *>(mapping) + (orig_pos - hdrlen), len - (orig_pos - hdrlen), &sink, filename,
		                           uncompress_ignore_errors);
	} else {
		unsigned long long read{}, written{};

		// No  thread: {read   → compress → cksum → write}...
		//
		// Yes thread: {read 🚧→ compress 🚧→ write}...
		//                    ↓ len        ↑ sum
		//     thread:      {🚧→ cksum    🚧→}...
		const auto crc_in_thread = parallel_compression_crc32c();

		struct ipc {
			pthread_barrier_t barrier;
			std::atomic<std::uint32_t> len_sum;
		} ipc;
		pthread_t cksum_thread;
		if(crc_in_thread) {
			while(pthread_barrier_init(&ipc.barrier, nullptr, 2))
				;
			while(pthread_create(
			    &cksum_thread, nullptr,
			    [](void * ipcp) -> void * {
				    auto & ipc = *reinterpret_cast<struct ipc *>(ipcp);
				    for(;;) {
					    pthread_barrier_wait(&ipc.barrier);
					    ipc.len_sum.store(htole32(crc32c_finish(crc32c(~0, buf, ipc.len_sum.load(std::memory_order::relaxed)))), std::memory_order::relaxed);
					    pthread_barrier_wait(&ipc.barrier);
				    }
				    __builtin_unreachable();
			    },
			    &ipc))
				;
		}

		if(compress_frame) {
			if(std::fwrite("\xFF\x06\x00\x00sNaPpY", 1, sizeof("\xFF\x06\x00\x00sNaPpY") - 1, stdout) != sizeof("\xFF\x06\x00\x00sNaPpY") - 1)
			we:
				err(2, gettext("write error"));
			written = sizeof("\xFF\x06\x00\x00sNaPpY") - 1;

			// However, we place an additional restriction that the uncompressed data in a chunk must be no longer than 65536 bytes.
			for(std::size_t rd = 65536; rd == 65536;) {
				if((rd = std::fread(buf, 1, 65536, f)) != 65536 && std::ferror(f))
					err(2, "%s", filename);
				read += rd;
				if(!rd)
					break;

				if(crc_in_thread) {
					ipc.len_sum.store(rd, std::memory_order::relaxed);
					pthread_barrier_wait(&ipc.barrier);
				}

				auto compbuf            = buf + 65536;
				std::size_t compbuf_len = -1;
				snappy::RawCompress(buf, rd, compbuf, &compbuf_len);

				// Both the uncompressed and the compressed chunks have the same final size: [1    + 3  ] + 4     + data
				//                                                                           [type + len] + cksum + data
				//                                                                           [header    ] + body
				// 4.2. Compressed data (chunk type 0x00)
				// 4.3. Uncompressed data (chunk type 0x01)
				std::uint8_t tp = (compbuf_len < rd) ? 0x00 : 0x01;
				if(putc(tp, stdout) == EOF)
					goto we;
				auto outsize = 4 + (tp ? rd : compbuf_len);
				if(putc((outsize & 0x0000FF) >> 0, stdout) == EOF ||  //
				   putc((outsize & 0x00FF00) >> 8, stdout) == EOF ||  //
				   putc((outsize & 0xFF0000) >> 16, stdout) == EOF)
					goto we;

				std::uint32_t crc;
				if(crc_in_thread) {
					pthread_barrier_wait(&ipc.barrier);
					crc = ipc.len_sum.load(std::memory_order::relaxed);
				} else
					crc = htole32(crc32c_finish(crc32c(~0, buf, rd)));
				if(std::fwrite(&crc, 1, 4, stdout) != 4)
					goto we;

				if(std::fwrite(tp ? buf : compbuf, 1, tp ? rd : compbuf_len, stdout) != (tp ? rd : compbuf_len))
					goto we;
				written += 1 + 3 + 4 + (tp ? rd : compbuf_len);
			}
		} else {
			FILE_sink sink{stdout};

			if(fd_source source{fd, filename}; source.size && source.off != -1) {
				read = source.size - source.off;
				if(read != source.Available())
					errno = EOVERFLOW, err(2, "%s", filename);

				written = Compress(&source, &sink);
			} else {
				auto dt = ingest(f, filename);
				read    = dt.second;

				snappy::ByteArraySource src{reinterpret_cast<const char *>(dt.first), dt.second};
				written = Compress(&src, &sink);
			}
		}

		// filename, byte count read, byte count written, ratio
		std::fprintf(stderr, gettext("%s: %llu -> %llu (%.2f%%)\n"), filename, read, written, 100. * ((double)written / read));

		if(!compress_frame && read >= 4ull * 1024 * 1024 * 1024)
			return std::fprintf(stderr, gettext("%s: %s: sized %lluB >= 4GiB w/o -f: output stream may be broken!\n"), argv[0], filename, read), 3;
	}
}
