lz4.py
ofrak.core.lz4
Lz4 Components.
Lz4Unpacker currently supports unpacking modern LZ4 format (Lz4ModernData), legacy format (see Lz4LegacyData), and skippable data (Lz4SkippableData).
Lz4Packer supports repacking the modern LZ4 format (Lz4ModernData), matching block/checksum information extracted during unpacking. Compression level can be specified via config.
Lz4LegacyPacker supports repacking legacy LZ4 format (Lz4LegacyData) with compression level support (default/fast/high modes). Compression level can be specified via config.
Lz4Data (GenericBinary)
dataclass
Base class for LZ4 binary blobs.
LZ4 is a high-speed lossless compression algorithm.
Lz4LegacyData (Lz4Data)
dataclass
LZ4 legacy frame format (v0.1-v0.9).
Lz4LegacyPacker (Packer)
Pack data into compressed LZ4 legacy format.
Legacy format supports compression levels via lz4.block.compress(): - Negative values: Fast mode with acceleration - 0: Default compression - 1-12: High compression mode
pack(self, resource, config=None)
async
Pack data into Lz4LegacyData format.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
resource |
Resource |
The LZ4 legacy resource to pack |
required |
config |
Lz4PackerConfig |
Optional configuration specifying compression level |
None |
Source code in ofrak/core/lz4.py
async def pack(self, resource: Resource, config: Lz4PackerConfig = None):
"""
Pack data into `Lz4LegacyData` format.
:param resource: The LZ4 legacy resource to pack
:param config: Optional configuration specifying compression level
"""
if config is None:
config = Lz4PackerConfig()
lz4_child = await resource.get_only_child()
child_data = await lz4_child.get_data()
# LZ4 legacy format uses 8 MB blocks (see https://github.com/lz4/lz4/blob/67a385a170d2dc331a25677e0d20d96eef0450c5/programs/lz4io.c#L86)
LEGACY_BLOCK_SIZE = 8 * (1 << 20) # 8 MB
# Start with magic header
lz4_compressed_parts = [LZ4_LEGACY_MAGIC]
# Split data into 8MB chunks and compress each block
# The last block may be smaller than 8MB
offset = 0
while offset < len(child_data):
# Get up to 8MB (last block will be smaller if remaining data < 8MB)
block_data = child_data[offset : offset + LEGACY_BLOCK_SIZE]
# Map compression_level to lz4.block.compress() parameters
# This matches the lz4 CLI behavior for legacy format:
# - Level < 0: fast mode with acceleration = -level
if config.compression_level < 0:
# Fast mode with acceleration
compressed_block = lz4.block.compress(
block_data,
mode="fast",
acceleration=abs(config.compression_level),
store_size=False,
)
# - Level 0-2: fast mode with acceleration = 0
elif config.compression_level < 3:
# Fast mode with acceleration = 0 (levels 0, 1, 2)
compressed_block = lz4.block.compress(
block_data, mode="fast", acceleration=0, store_size=False
)
# - Level >= 3: high compression mode
else:
# High compression mode (3-12)
compressed_block = lz4.block.compress(
block_data,
mode="high_compression",
compression=config.compression_level,
store_size=False,
)
# Append block size + compressed block data
compressed_block_size = len(compressed_block)
lz4_compressed_parts.append(
compressed_block_size.to_bytes(4, "little") + compressed_block
)
offset += LEGACY_BLOCK_SIZE
original_size = await resource.get_data_length()
resource.queue_patch(Range(0, original_size), b"".join(lz4_compressed_parts))
Lz4LegacyUnpacker (Unpacker)
Unpack (decompress) LZ4 legacy frame format files.
Legacy format (v0.1-v0.9) uses lz4.block decompression instead of lz4.frame.
unpack(self, resource, config=None)
async
Unpack LZ4 legacy data.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
resource |
Resource |
The LZ4 legacy resource to unpack |
required |
Exceptions:
| Type | Description |
|---|---|
RuntimeError |
if the data is not valid LZ4 legacy format |
Source code in ofrak/core/lz4.py
async def unpack(self, resource: Resource, config=None):
"""
Unpack LZ4 legacy data.
:param resource: The LZ4 legacy resource to unpack
:raises RuntimeError: if the data is not valid LZ4 legacy format
"""
resource_data = await resource.get_data()
if len(resource_data) < 8:
raise RuntimeError("Invalid LZ4 legacy format: file too short")
deserializer = BinaryDeserializer(
io.BytesIO(resource_data),
endianness=Endianness.LITTLE_ENDIAN,
word_size=4,
)
magic = deserializer.read(4)
assert magic == LZ4_LEGACY_MAGIC
decompressed_data = b""
while deserializer.position() < len(resource_data):
# Legacy LZ4 has a repeating pattern of 4 bytes block size followed by compressed data
block_size = deserializer.unpack_uint()
compressed_block = deserializer.read(block_size)
try:
# LZ4 legacy block size (uncompressed) is 8 MB (see https://github.com/lz4/lz4/blob/67a385a170d2dc331a25677e0d20d96eef0450c5/programs/lz4io.c#L86)
decompressed_data += lz4.block.decompress(
compressed_block,
uncompressed_size=8 * (1 << 20),
)
except Exception as e:
LOGGER.error(f"Failed to decompress LZ4 legacy data: {e}")
raise RuntimeError(f"LZ4 legacy decompression failed: {e}")
await resource.create_child(
tags=(GenericBinary,),
data=decompressed_data,
)
Lz4ModernData (Lz4Data)
dataclass
LZ4 modern frame format (v1.4+).
Lz4Packer (Packer)
Pack data into a compressed LZ4 file using modern frame format.
Implementation repacks modern frame format preserving frame metadata. Compression level can be specified via config (default: 0).
pack(self, resource, config=None)
async
Pack data into Lz4ModernData format.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
resource |
Resource |
The LZ4 resource to pack |
required |
config |
Lz4PackerConfig |
Optional configuration specifying compression level |
None |
Source code in ofrak/core/lz4.py
async def pack(self, resource: Resource, config: Lz4PackerConfig = None):
"""
Pack data into `Lz4ModernData` format.
:param resource: The LZ4 resource to pack
:param config: Optional configuration specifying compression level
"""
if config is None:
config = Lz4PackerConfig()
lz4_child = await resource.get_only_child()
child_data = await lz4_child.get_data()
# Use stored compression settings from the view
lz4_view = await resource.view_as(Lz4ModernData)
content_checksum = lz4_view.content_checksum
block_checksum = lz4_view.block_checksum
block_size_id = lz4_view.block_size_id
block_linked = lz4_view.block_linked
store_size = lz4_view.content_size != 0
lz4_compressed = lz4.frame.compress(
child_data,
compression_level=config.compression_level,
content_checksum=content_checksum,
block_checksum=block_checksum,
block_size=block_size_id,
block_linked=block_linked,
store_size=store_size,
)
original_size = await resource.get_data_length()
resource.queue_patch(Range(0, original_size), lz4_compressed)
Lz4PackerConfig (ComponentConfig)
dataclass
Configuration for LZ4 packer.
compression_level: Compression level to use (default: 0). - Negative values: Fast acceleration (faster, less compression) - 0-2: Minimum compression (default, all produce same output) - 3: Minimum high-compression mode - 4-16: Higher compression levels (16 is maximum)
Lz4SkippableData (Lz4Data)
dataclass
LZ4 skippable frame.
Special frame type for embedding metadata or application-specific data.
Lz4Unpacker (Unpacker)
Unpack (decompress) LZ4 modern frame format files.
Supports: - Modern frame format (Lz4ModernData) - Skippable frames (metadata containers: Lz4SkippableData)
unpack(self, resource, config=None)
async
Unpack LZ4 data.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
resource |
Resource |
The LZ4 resource to unpack |
required |
Exceptions:
| Type | Description |
|---|---|
RuntimeError |
if the data is not valid LZ4 format |
Source code in ofrak/core/lz4.py
async def unpack(self, resource: Resource, config=None):
"""
Unpack LZ4 data.
:param resource: The LZ4 resource to unpack
:raises RuntimeError: if the data is not valid LZ4 format
"""
resource_data = await resource.get_data()
if resource.has_tag(Lz4ModernData):
# lz4.frame.get_frame_info() does not support legacy frames
frame_info = lz4.frame.get_frame_info(resource_data)
resource.add_view(
Lz4ModernData(
block_size=frame_info["block_size"],
block_size_id=frame_info["block_size_id"],
block_linked=frame_info["block_linked"],
content_checksum=frame_info["content_checksum"],
block_checksum=frame_info["block_checksum"],
content_size=frame_info["content_size"],
)
)
try:
decompressed_data = lz4.frame.decompress(resource_data)
except RuntimeError as e:
LOGGER.error(f"Failed to decompress LZ4 data: {e}")
raise
await resource.create_child(
tags=(GenericBinary,),
data=decompressed_data,
)