tar.py
ofrak.core.tar
TarArchive (GenericBinary, FilesystemRoot)
dataclass
Filesystem stored in a tar archive.
TarPacker (Packer)
Pack files into a tar archive.
pack(self, resource, config=None)
async
Pack the given resource.
Users should not call this method directly; rather, they should run Resource.run or Resource.pack.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
resource |
Resource |
required | |
config |
ComponentConfig |
Optional config for packing. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run. |
None |
Source code in ofrak/core/tar.py
async def pack(self, resource: Resource, config: ComponentConfig = None) -> None:
# Flush the child files to the filesystem
tar_view = await resource.view_as(TarArchive)
flush_dir = await tar_view.flush_to_disk()
# Pack it back into a temporary archive
with tempfile.NamedTemporaryFile(suffix=".tar", delete_on_close=False) as temp_archive:
temp_archive.close()
cmd = [
"tar",
"--xattrs",
"-C",
flush_dir,
"-cf",
temp_archive.name,
".",
]
proc = await asyncio.create_subprocess_exec(
*cmd,
)
returncode = await proc.wait()
if proc.returncode:
raise CalledProcessError(returncode=returncode, cmd=cmd)
# Replace the original archive data
with open(temp_archive.name, "rb") as new_fh:
resource.queue_patch(Range(0, await resource.get_data_length()), new_fh.read())
TarUnpacker (Unpacker)
Unpack a tar archive.
unpack(self, resource, config=None)
async
Unpack the given resource.
Users should not call this method directly; rather, they should run Resource.run or Resource.unpack.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
resource |
Resource |
The resource that is being unpacked |
required |
config |
ComponentConfig |
Optional config for unpacking. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run. |
None |
Source code in ofrak/core/tar.py
async def unpack(self, resource: Resource, config: ComponentConfig = None) -> None:
# Write the archive data to a file
async with resource.temp_to_disk(suffix=".tar") as temp_archive_path:
# Check the archive member files to ensure none unpack to a parent directory
cmd = [
"tar",
"-P",
"-tf",
temp_archive_path,
]
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
if proc.returncode:
raise CalledProcessError(returncode=proc.returncode, cmd=cmd)
for filename in stdout.decode().splitlines():
# Handles relative parent paths and rooted paths, and normalizes paths like "./../"
rel_filename = os.path.relpath(filename)
if rel_filename.startswith(".." + os.sep):
raise UnpackerError(
f"Tar archive contains a file {filename} that would extract to a parent "
f"directory {rel_filename}."
)
# Unpack into a temporary directory using the temporary file
with tempfile.TemporaryDirectory() as temp_dir:
command = ["tar", "--xattrs", "-C", temp_dir, "-xf", temp_archive_path]
proc = await asyncio.create_subprocess_exec(
*command,
)
returncode = await proc.wait()
if returncode:
raise CalledProcessError(returncode=returncode, cmd=command)
# Initialize a filesystem from the unpacked/untarred temporary folder
tar_view = await resource.view_as(TarArchive)
await tar_view.initialize_from_disk(temp_dir)