magic.py
ofrak.core.magic
Magic (ResourceAttributes)
dataclass
Magic(mime: str, descriptor: str)
MagicAnalyzer (Analyzer)
Identifies file types and formats by analyzing magic byte signatures and file structure using libmagic (the library behind the Unix file command). Produces a MIME type and human-readable description. Use for initial file type identification of unknown binaries, automated triage of large firmware collections, determining what unpacker or analyzer to use next, or validating that files match expected types. Often the first analysis step when encountering unknown files.
analyze(self, resource, config=None)
async
Analyze a resource for to extract specific ResourceAttributes.
Users should not call this method directly; rather, they should run Resource.run or Resource.analyze.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
resource |
Resource |
The resource that is being analyzed |
required |
config |
Optional config for analyzing. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run. |
None |
Returns:
| Type | Description |
|---|---|
Magic |
The analysis results |
Source code in ofrak/core/magic.py
async def analyze(self, resource: Resource, config=None) -> Magic:
data = await resource.get_data()
if not MAGIC_INSTALLED:
raise ComponentMissingDependencyError(self, LIBMAGIC_DEP)
else:
magic_mime = magic.from_buffer(data, mime=True)
magic_description = magic.from_buffer(data)
return Magic(magic_mime, magic_description)
MagicDescriptionPattern
Pattern to tag resources based on its mime description.
register(resource_tag, matcher)
classmethod
Register a callable that determines whether the given resource tag should be applied.
Source code in ofrak/core/magic.py
@classmethod
def register(cls, resource_tag: ResourceTag, matcher: Callable[[str], bool]):
"""
Register a callable that determines whether the given resource tag should be applied.
"""
if matcher in cls.matchers:
raise AlreadyExistError("Registering already-registered matcher")
cls.matchers[matcher] = resource_tag
run(resource, magic_description)
classmethod
Run this pattern against a given resource, tagging it based on registered tags.
This method is designed to be called by the MagicIdentifier.
Source code in ofrak/core/magic.py
@classmethod
def run(cls, resource: Resource, magic_description: str):
"""
Run this pattern against a given resource, tagging it based on registered tags.
This method is designed to be called by the [MagicIdentifier][ofrak.core.magic.MagicIdentifier].
"""
for matcher, resource_type in cls.matchers.items():
if matcher(magic_description):
resource.add_tag(resource_type)
MagicIdentifier (Identifier)
Identify resources using three identifier patterns:
OFRAK component authors can "register" magic patterns to run whenever this identifier is:
MagicMimePattern.register(GenericBinary, "application/octet-stream")
identify(self, resource, config=None)
async
Perform identification on the given resource.
Users should not call this method directly; rather, they should run Resource.identify.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
resource |
Resource |
required | |
config |
Optional config for identifying. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run. |
None |
Source code in ofrak/core/magic.py
async def identify(self, resource: Resource, config=None) -> None:
_magic = await resource.analyze(Magic)
MagicMimePattern.run(resource, _magic.mime)
MagicDescriptionPattern.run(resource, _magic.descriptor)
await RawMagicPattern.run(resource)
MagicMimePattern
Pattern to tag resources based on their mimetype.
register(resource_tag, mime_types)
classmethod
Register what resource tags correspond to specific mime types.
Source code in ofrak/core/magic.py
@classmethod
def register(cls, resource_tag: ResourceTag, mime_types: Union[Iterable[str], str]):
"""
Register what resource tags correspond to specific mime types.
"""
if isinstance(mime_types, str):
mime_types = [mime_types]
for mime_type in mime_types:
if mime_type in cls.tags_by_mime:
raise AlreadyExistError(f"Registering already-registered mime type: {mime_type}")
cls.tags_by_mime[mime_type] = resource_tag
run(resource, magic_mime)
classmethod
Run the pattern against a given resource, tagging it based on matching mime types.
This method is designed to be called by the MagicIdentifier.
Source code in ofrak/core/magic.py
@classmethod
def run(cls, resource: Resource, magic_mime: str):
"""
Run the pattern against a given resource, tagging it based on matching mime types.
This method is designed to be called by the [MagicIdentifier][ofrak.core.magic.MagicIdentifier].
"""
tag = cls.tags_by_mime.get(magic_mime)
if tag is not None:
resource.add_tag(tag)
RawMagicPattern
Pattern to tag resource based on custom raw magic matching patterns.
MAX_SEARCH_SIZE specifies how many bytes this pattern's run method exposes to registered
matches (the first MAX_SEARCH_SIZE bytes of a resource are exposed).
register(resource_tag, matcher)
classmethod
Register a callable that determines whether the given resource tag should be applied.
Source code in ofrak/core/magic.py
@classmethod
def register(cls, resource_tag: ResourceTag, matcher: Callable[[bytes], bool]):
"""
Register a callable that determines whether the given resource tag should be applied.
"""
if matcher in cls.matchers:
raise AlreadyExistError("Registering already-registered matcher")
cls.matchers[matcher] = resource_tag
run(resource)
async
classmethod
Run the pattern against a given resource, tagging it based on registered tags. Note that the first MAX_SEARCH_SIZE bytes of a resource are made available to the callable.
This method is designed to be called by the MagicIdentifier.
Source code in ofrak/core/magic.py
@classmethod
async def run(cls, resource: Resource):
"""
Run the pattern against a given resource, tagging it based on registered tags.
Note that the first MAX_SEARCH_SIZE bytes of a resource are made available to the callable.
This method is designed to be called by the [MagicIdentifier][ofrak.core.magic.MagicIdentifier].
"""
data_length = min(await resource.get_data_length(), cls.MAX_SEARCH_SIZE)
data = await resource.get_data(range=Range(0, data_length))
for matcher, resource_type in cls.matchers.items():
if matcher(data):
resource.add_tag(resource_type)
_LibmagicDependency (ComponentExternalTool)
private
__init__(self)
special
Initialize self. See help(type(self)) for accurate signature.
Source code in ofrak/core/magic.py
def __init__(self):
super().__init__(
"libmagic",
"https://www.darwinsys.com/file/",
install_check_arg="",
apt_package="libmagic1",
brew_package="libmagic",
)
try:
import magic as _magic
_LibmagicDependency._magic = _magic
except ImportError:
_LibmagicDependency._magic = None
is_tool_installed(self)
async
Check if a tool is installed by running it with the install_check_arg.
This method runs <tool> <install_check_arg>.
Returns:
| Type | Description |
|---|---|
bool |
True if the |
Source code in ofrak/core/magic.py
async def is_tool_installed(self) -> bool:
return MAGIC_INSTALLED