Skip to content

gzip.py

ofrak.core.gzip

GzipData (GenericBinary)

A gzip binary blob.

GzipPacker (Packer)

Pack data into a compressed gzip file.

pack(self, resource, config=None) async

Pack the given resource.

Users should not call this method directly; rather, they should run Resource.run or Resource.pack.

Parameters:

Name Type Description Default
resource Resource required
config

Optional config for packing. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run.

None
Source code in ofrak/core/gzip.py
async def pack(self, resource: Resource, config=None):
    gzip_view = await resource.view_as(GzipData)

    result = BytesIO()
    with GzipFile(fileobj=result, mode="w") as gzip_file:
        gzip_child_r = await gzip_view.get_file()
        gzip_data = await gzip_child_r.get_data()
        gzip_file.write(gzip_data)

    original_gzip_size = await gzip_view.resource.get_data_length()
    resource.queue_patch(Range(0, original_gzip_size), result.getvalue())

GzipUnpacker (Unpacker)

Unpack (decompress) a gzip file.

unpack(self, resource, config=None) async

Unpack the given resource.

Users should not call this method directly; rather, they should run Resource.run or Resource.unpack.

Parameters:

Name Type Description Default
resource Resource

The resource that is being unpacked

required
config

Optional config for unpacking. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run.

None
Source code in ofrak/core/gzip.py
async def unpack(self, resource: Resource, config=None):
    data = await resource.get_data()
    # GzipFile is faster (spawning external processes has overhead),
    # but pigz is more willing to tolerate things like extra junk at the end
    try:
        with GzipFile(fileobj=BytesIO(data), mode="r") as gzip_file:
            return await resource.create_child(
                tags=(GenericBinary,),
                data=gzip_file.read(),
            )
    except BadGzipFile:
        # Create temporary file with .gz extension
        with tempfile.NamedTemporaryFile(suffix=".gz") as temp_file:
            temp_file.write(data)
            temp_file.flush()
            cmd = [
                "pigz",
                "-d",
                "-c",
                temp_file.name,
            ]
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
            )
            stdout, stderr = await proc.communicate()
            data = stdout
            if proc.returncode:
                # Forward any gzip warning message and continue
                if proc.returncode == -2 or proc.returncode == 2:
                    LOGGER.warning(stderr)
                    data = stdout
                else:
                    raise CalledProcessError(returncode=proc.returncode, cmd=cmd, stderr=stderr)

            await resource.create_child(
                tags=(GenericBinary,),
                data=data,
            )