tar.py
ofrak.core.tar
TarArchive (GenericBinary, FilesystemRoot)
dataclass
Filesystem stored in a tar archive.
TarPacker (Packer)
Packages files into a TAR (Tape Archive) format, preserving file permissions, ownership, timestamps, and directory structure without compression. Use after modifying extracted tar contents to recreate archives for distribution or backup.
pack(self, resource, config=None)
async
Pack the given resource.
Users should not call this method directly; rather, they should run Resource.run or Resource.pack.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
resource |
Resource |
required | |
config |
ComponentConfig |
Optional config for packing. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run. |
None |
Source code in ofrak/core/tar.py
async def pack(self, resource: Resource, config: ComponentConfig = None) -> None:
# Flush the child files to the filesystem
tar_view = await resource.view_as(TarArchive)
flush_dir = await tar_view.flush_to_disk()
# Pack it back into a temporary archive
with tempfile.NamedTemporaryFile(suffix=".tar", delete_on_close=False) as temp_archive:
temp_archive.close()
cmd = [
TAR.tool,
"--xattrs",
"-C",
flush_dir,
"-cf",
temp_archive.name,
".",
]
proc = await asyncio.create_subprocess_exec(
*cmd,
)
returncode = await proc.wait()
if proc.returncode:
raise CalledProcessError(returncode=returncode, cmd=cmd)
# Replace the original archive data
with open(temp_archive.name, "rb") as new_fh:
resource.queue_patch(Range(0, await resource.get_data_length()), new_fh.read())
TarUnpacker (Unpacker)
Extracts files and directories from TAR (Tape Archive) archives. TAR preserves file permissions, ownership, timestamps, extended attributes (xattrs), and directory structure without compression. Use for .tar files, or as the second step in unpacking compressed tarballs (.tar.gz, .tar.bz2, etc.) after first decompressing.
unpack(self, resource, config=None)
async
Unpack the given resource.
Users should not call this method directly; rather, they should run Resource.run or Resource.unpack.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
resource |
Resource |
The resource that is being unpacked |
required |
config |
ComponentConfig |
Optional config for unpacking. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run. |
None |
Source code in ofrak/core/tar.py
async def unpack(self, resource: Resource, config: ComponentConfig = None) -> None:
# Write the archive data to a file
async with resource.temp_to_disk(suffix=".tar") as temp_archive_path:
# Check the archive member files to ensure none unpack to a parent directory
cmd = [
TAR.tool,
"-P",
"-tf",
temp_archive_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode:
raise CalledProcessError(returncode=proc.returncode, cmd=cmd)
for filename in stdout.decode().splitlines():
# Handles relative parent paths and rooted paths, and normalizes paths like "./../"
rel_filename = os.path.relpath(filename)
if rel_filename.startswith(".." + os.sep):
raise UnpackerError(
f"Tar archive contains a file {filename} that would extract to a parent "
f"directory {rel_filename}."
)
# Unpack into a temporary directory using the temporary file
with tempfile.TemporaryDirectory() as temp_dir:
command = ["tar", "--xattrs", "-C", temp_dir, "-xf", temp_archive_path]
proc = await asyncio.create_subprocess_exec(
*command,
)
returncode = await proc.wait()
if returncode:
raise CalledProcessError(returncode=returncode, cmd=command)
# Initialize a filesystem from the unpacked/untarred temporary folder
tar_view = await resource.view_as(TarArchive)
await tar_view.initialize_from_disk(temp_dir)