bb_unpacker.py
ofrak_ghidra.components.blocks.bb_unpacker
GhidraBasicBlockUnpacker (BasicBlockUnpacker, OfrakGhidraMixin)
unpack(self, resource, config=None)
async
Unpack a basic block into its corresponding instructions.
Source code in ofrak_ghidra/components/blocks/bb_unpacker.py
async def unpack(self, resource: Resource, config=None):
bb_view: BasicBlock = await resource.view_as(BasicBlock)
bb_start_vaddr = bb_view.virtual_address
instructions = await self.batch_manager.get_result(
(
resource,
bb_start_vaddr,
bb_start_vaddr + bb_view.size - 1, # Ghidra is inclusive
),
)
program_attrs = await resource.analyze(ProgramAttributes)
children_created = []
for instruction in instructions:
vaddr = instruction["instr_offset"]
size = instruction["instr_size"]
mnem, operands = _asm_fixups(
instruction["mnem"].lower(), instruction["operands"].lower(), program_attrs
)
results = instruction["results"].split(",")
regs_read = list()
regs_written = list()
# TODO A way to standardize register representations
if all(item in results for item in ["CF", "PF", "ZF", "SF", "OF"]):
regs_written.append("rflags")
if all(item in results for item in ["RSP"]):
regs_written.append("rsp")
regs_read.append("rsp")
for reg in instruction["regs_read"].lower().split(","):
if reg not in regs_read and reg != "":
regs_read.append(reg)
for reg in instruction["regs_written"].lower().split(","):
if reg not in regs_written and reg != "":
regs_written.append(reg)
disasm = f"{mnem} {operands}"
mode_string = instruction.get("instr_mode", "NONE")
mode = InstructionSetMode[mode_string]
assert mode == bb_view.mode, (
f"The instruction mode {mode.name} returned by Ghidra does not match the basic "
f"block mode {bb_view.mode.name}."
)
instruction = Instruction(
virtual_address=vaddr,
size=size,
disassembly=disasm,
mnemonic=mnem,
operands=operands,
mode=mode,
)
children_created.append(
bb_view.create_child_region(instruction, additional_attributes=(program_attrs,))
)
await asyncio.gather(*children_created)
_asm_fixups(base_mnemonic, base_operands, program_attrs)
private
Fix up an assembly instruction from Ghidra, so that the toolchain can assemble it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
base_mnemonic |
str |
original mnemonic from Ghidra |
required |
base_operands |
str |
original operands from Ghidra |
required |
program_attrs |
ProgramAttributes |
ProgramAttributes for the binary analyzed in Ghidra |
required |
Returns:
Type | Description |
---|---|
Tuple[str, str] |
fixed up assembly instruction |
Source code in ofrak_ghidra/components/blocks/bb_unpacker.py
def _asm_fixups(
base_mnemonic: str, base_operands: str, program_attrs: ProgramAttributes
) -> Tuple[str, str]:
"""
Fix up an assembly instruction from Ghidra, so that the toolchain can assemble it.
:param base_mnemonic: original mnemonic from Ghidra
:param base_operands: original operands from Ghidra
:param program_attrs: ProgramAttributes for the binary analyzed in Ghidra
:return: fixed up assembly instruction
"""
operands = base_operands.replace(",", ", ")
operands = operands.replace("+ -", "- ")
operands = re.sub(RE_STRIP_PRECEDING_ZERO, r"0x\1", operands)
if program_attrs.isa is InstructionSet.ARM:
# Convert the CPY Ghidra instruction to the more commonly used MOV instruction
mnemonic = re.sub(RE_CPY_TO_MOV, "mov", base_mnemonic)
elif program_attrs.isa is InstructionSet.M68K:
# Convert the Ghidra assembly syntax (that corresponds to the manual's syntax) to AT&T syntax that the GNU toolchain uses
mnemonic = base_mnemonic
operands = operands.replace("sp", "%SP")
operands = operands.replace("sr", "%SR")
operands = operands.replace(" 0x", " #0x")
operands = operands.replace(" -0x", " #-0x")
for mnem in [
"moveq",
"mov3q",
"subq",
"cmpi",
"addq",
"cmpi",
"addi",
"ori",
"subi",
"stop",
]:
if mnem in mnemonic:
operands = re.sub(r"^0x", r"#0x", operands)
operand_list = re.split("(,)", operands)
operands = ""
for operand in operand_list:
if not "0x" in operand:
operand = re.sub(r"a([0-7])", r"%A\1", operand)
operand = re.sub(r"d([0-7])[bw]?", r"%D\1", operand)
operands += operand
else:
mnemonic = base_mnemonic
return mnemonic, operands