magic.py
ofrak.core.magic
Magic (ResourceAttributes)
dataclass
Magic(mime: str, descriptor: str)
MagicAnalyzer (Analyzer)
Analyze a binary blob to extract its mimetype and magic description.
analyze(self, resource, config=None)
async
Analyze a resource for to extract specific ResourceAttributes.
Users should not call this method directly; rather, they should run Resource.run or Resource.analyze.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
resource |
Resource |
The resource that is being analyzed |
required |
config |
Optional config for analyzing. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run. |
None |
Returns:
Type | Description |
---|---|
Magic |
The analysis results |
Source code in ofrak/core/magic.py
async def analyze(self, resource: Resource, config=None) -> Magic:
data = await resource.get_data()
if not MAGIC_INSTALLED:
raise ComponentMissingDependencyError(self, LIBMAGIC_DEP)
else:
magic_mime = magic.from_buffer(data, mime=True)
magic_description = magic.from_buffer(data)
return Magic(magic_mime, magic_description)
MagicDescriptionPattern
Pattern to tag resources based on its mime description.
register(resource_tag, matcher)
classmethod
Register a callable that determines whether the given resource tag should be applied.
Source code in ofrak/core/magic.py
@classmethod
def register(cls, resource_tag: ResourceTag, matcher: Callable[[str], bool]):
"""
Register a callable that determines whether the given resource tag should be applied.
"""
if matcher in cls.matchers:
raise AlreadyExistError("Registering already-registered matcher")
cls.matchers[matcher] = resource_tag
run(resource, magic_description)
classmethod
Run this pattern against a given resource, tagging it based on registered tags.
This method is designed to be called by the MagicIdentifier.
Source code in ofrak/core/magic.py
@classmethod
def run(cls, resource: Resource, magic_description: str):
"""
Run this pattern against a given resource, tagging it based on registered tags.
This method is designed to be called by the [MagicIdentifier][ofrak.core.magic.MagicIdentifier].
"""
for matcher, resource_type in cls.matchers.items():
if matcher(magic_description):
resource.add_tag(resource_type)
MagicIdentifier (Identifier)
Identify resources using three identifier patterns:
OFRAK component authors can "register" magic patterns to run whenever this identifier is:
MagicMimePattern.register(GenericBinary, "application/octet-stream")
identify(self, resource, config=None)
async
Perform identification on the given resource.
Users should not call this method directly; rather, they should run Resource.identify.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
resource |
Resource |
required | |
config |
Optional config for identifying. If an implementation provides a default, this default will always be used when config would otherwise be None. Note that a copy of the default config will be passed, so the default config values cannot be modified persistently by a component run. |
None |
Source code in ofrak/core/magic.py
async def identify(self, resource: Resource, config=None) -> None:
_magic = await resource.analyze(Magic)
MagicMimePattern.run(resource, _magic.mime)
MagicDescriptionPattern.run(resource, _magic.descriptor)
await RawMagicPattern.run(resource)
MagicMimePattern
Pattern to tag resources based on their mimetype.
register(resource_tag, mime_types)
classmethod
Register what resource tags correspond to specific mime types.
Source code in ofrak/core/magic.py
@classmethod
def register(cls, resource_tag: ResourceTag, mime_types: Union[Iterable[str], str]):
"""
Register what resource tags correspond to specific mime types.
"""
if isinstance(mime_types, str):
mime_types = [mime_types]
for mime_type in mime_types:
if mime_type in cls.tags_by_mime:
raise AlreadyExistError(f"Registering already-registered mime type: {mime_type}")
cls.tags_by_mime[mime_type] = resource_tag
run(resource, magic_mime)
classmethod
Run the pattern against a given resource, tagging it based on matching mime types.
This method is designed to be called by the MagicIdentifier.
Source code in ofrak/core/magic.py
@classmethod
def run(cls, resource: Resource, magic_mime: str):
"""
Run the pattern against a given resource, tagging it based on matching mime types.
This method is designed to be called by the [MagicIdentifier][ofrak.core.magic.MagicIdentifier].
"""
tag = cls.tags_by_mime.get(magic_mime)
if tag is not None:
resource.add_tag(tag)
RawMagicPattern
Pattern to tag resource based on custom raw magic matching patterns.
MAX_SEARCH_SIZE specifies how many bytes this pattern's run
method exposes to registered
matches (the first MAX_SEARCH_SIZE bytes of a resource are exposed).
register(resource_tag, matcher)
classmethod
Register a callable that determines whether the given resource tag should be applied.
Source code in ofrak/core/magic.py
@classmethod
def register(cls, resource_tag: ResourceTag, matcher: Callable[[bytes], bool]):
"""
Register a callable that determines whether the given resource tag should be applied.
"""
if matcher in cls.matchers:
raise AlreadyExistError("Registering already-registered matcher")
cls.matchers[matcher] = resource_tag
run(resource)
async
classmethod
Run the pattern against a given resource, tagging it based on registered tags. Note that the first MAX_SEARCH_SIZE bytes of a resource are made available to the callable.
This method is designed to be called by the MagicIdentifier.
Source code in ofrak/core/magic.py
@classmethod
async def run(cls, resource: Resource):
"""
Run the pattern against a given resource, tagging it based on registered tags.
Note that the first MAX_SEARCH_SIZE bytes of a resource are made available to the callable.
This method is designed to be called by the [MagicIdentifier][ofrak.core.magic.MagicIdentifier].
"""
data_length = min(await resource.get_data_length(), cls.MAX_SEARCH_SIZE)
data = await resource.get_data(range=Range(0, data_length))
for matcher, resource_type in cls.matchers.items():
if matcher(data):
resource.add_tag(resource_type)
_LibmagicDependency (ComponentExternalTool)
private
__init__(self)
special
Initialize self. See help(type(self)) for accurate signature.
Source code in ofrak/core/magic.py
def __init__(self):
super().__init__(
"libmagic",
"https://www.darwinsys.com/file/",
install_check_arg="",
apt_package="libmagic1",
brew_package="libmagic",
)
try:
import magic as _magic
_LibmagicDependency._magic = _magic
except ImportError:
_LibmagicDependency._magic = None
is_tool_installed(self)
async
Check if a tool is installed by running it with the install_check_arg
.
This method runs <tool> <install_check_arg>
.
Returns:
Type | Description |
---|---|
bool |
True if the |
Source code in ofrak/core/magic.py
async def is_tool_installed(self) -> bool:
return MAGIC_INSTALLED