Skip to content

gnu.py

ofrak_patch_maker.toolchain.gnu

Abstract_GNU_Toolchain (Toolchain, ABC)

name: str property readonly

Returns:

Type Description
str

name property that matches the value used in toolchain.conf to access paths

_linker_script_flag: str private property readonly

Returns:

Type Description
str

the linker script flag for this toolchain, usually -T

_get_compiler_target(self, processor) private

Returns a default compiler target for the provided processor unless one is provided in self._config.

Red Balloon Security strongly recommends all users provide their specific hardware target for best results.

Parameters:

Name Type Description Default
processor ProgramAttributes required

Returns:

Type Description
Optional[str]
Source code in ofrak_patch_maker/toolchain/gnu.py
def _get_compiler_target(self, processor: ProgramAttributes) -> Optional[str]:
    return self._config.compiler_target

_get_linker_map_flag(exec_path) private staticmethod

Generates the linker map file flag for a linker invocation given the executable path.

Parameters:

Name Type Description Default
exec_path str

path to executable

required

Returns:

Type Description
Iterable[str]

path to map file

Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def _get_linker_map_flag(exec_path: str) -> Iterable[str]:
    return "-Map", f"{exec_path}.map"

add_linker_include_values(self, symbols, path)

Adds linker include entries to a provided file (usually ending in .inc).

For example GNU syntax prescribes PROVIDE(name = 0xdeadbeef);.

Parameters:

Name Type Description Default
symbols Mapping[str, int]

mapping of symbol string to effective address

required
path str

path to the provided linker include file.

required
Source code in ofrak_patch_maker/toolchain/gnu.py
def add_linker_include_values(self, symbols: Mapping[str, int], path: str):
    with open(path, "a") as f:
        for name, addr in symbols.items():
            if self.linker_include_filter(name):
                continue
            f.write(f"PROVIDE({name} = {hex(addr)});\n")

generate_linker_include_file(self, symbols, out_path)

This utility function receives the generated symbols dictionary that results from preprocessing a firmware image and generates a .inc file for use with linker scripts, enabling direct function calls when using the complete cross compilation toolchain.

This functionality must be defined for each toolchain given potential syntactical differences.

Parameters:

Name Type Description Default
symbols Mapping[str, int]

mappings of symbol string to effective address

required
out_path str

the path to the resulting symbol include file (usually .inc)

required

Returns:

Type Description
str

returns out_path

Source code in ofrak_patch_maker/toolchain/gnu.py
def generate_linker_include_file(self, symbols: Mapping[str, int], out_path: str) -> str:
    with open(out_path, "w") as f:
        f.write(RBS_AUTOGEN_WARNING)

    self.add_linker_include_values(symbols, out_path)
    return out_path

ld_generate_region(self, object_path, segment_name, permissions, vm_address, length)

Generates regions for linker scripts.

Returns:

Type Description
Tuple[str, str]

a string entry for a "memory region" for the toolchain in question.

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_region(
    self,
    object_path: str,
    segment_name: str,
    permissions: MemoryPermissions,
    vm_address: int,
    length: int,
) -> Tuple[str, str]:
    perms_string = self._ld_perm2str(permissions)
    stripped_seg_name = segment_name.strip(".")
    stripped_obj_name = os.path.basename(object_path).split(".")[0]
    region_name = f'".rbs_{stripped_obj_name}_{stripped_seg_name}_mem"'
    return (
        f"    {region_name} ({perms_string}) : ORIGIN = {hex(vm_address)}, LENGTH = {hex(length)}",
        region_name,
    )

ld_generate_bss_region(self, vm_address, length)

Generates .bss regions for linker scripts.

Returns:

Type Description
Tuple[str, str]

a .bss memory entry string for the toolchain in question.

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_bss_region(
    self,
    vm_address: int,
    length: int,
) -> Tuple[str, str]:
    region_name = '".bss_mem"'
    perms_string = self._ld_perm2str(MemoryPermissions.RW)
    return (
        f"    {region_name} ({perms_string}) : ORIGIN = {hex(vm_address)}, LENGTH = {hex(length)}",
        region_name,
    )

ld_generate_section(object_path, segment_name, memory_region_name) staticmethod

Generates sections for linker scripts.

Returns:

Type Description
str

a string entry for a "section" for the toolchain in question.

Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def ld_generate_section(
    object_path: str,
    segment_name: str,
    memory_region_name: str,
) -> str:
    stripped_seg_name = segment_name.strip(".")
    stripped_obj_name = os.path.basename(object_path).split(".")[0]
    abs_path = os.path.abspath(object_path)
    return (
        f"    .rbs_{stripped_obj_name}_{stripped_seg_name} ORIGIN({memory_region_name}) : SUBALIGN(0) {{\n"
        f"        {abs_path}({segment_name})\n"
        f"    }} > {memory_region_name}"
    )

ld_generate_bss_section(memory_region_name) staticmethod

Generates .bss sections for linker scripts.

Returns:

Type Description
str

a .bss section entry string for the toolchain in question.

Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def ld_generate_bss_section(
    memory_region_name: str,
) -> str:
    bss_section_name = ".bss"
    return (
        f"    {bss_section_name} : {{\n"
        f"        *.o({bss_section_name}, {bss_section_name}.*)\n"
        f"    }} > {memory_region_name}"
    )

ld_generate_placeholder_reloc_sections(self)

GCC generates these sections for relocatable binaries even if they are completely unnecessary.

They don't seem to make it into the final executable, so there should be no risk of injecting them inadvertently.

Todo

No clear way to get size, so way overestimate.

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_placeholder_reloc_sections(self) -> Tuple[List[str], List[str]]:
    """
    GCC generates these sections for relocatable binaries even if they are completely
    unnecessary.

    They don't seem to make it into the final executable, so there should be no risk
    of injecting them inadvertently.

    !!! todo

        No clear way to get size, so way overestimate.
    """
    (
        got_plt_region,
        got_plt_name,
    ) = self._ld_generate_got_plt_region(0xDEADBEEF, 0x1000)
    got_plt_section = self._ld_generate_got_plt_section(got_plt_name)
    (
        rel_dyn_region,
        rel_dyn_name,
    ) = self._ld_generate_rel_dyn_region(0xDEADBEEF + 0x20000, 0x1000)
    rel_dyn_section = self._ld_generate_rel_dyn_section(rel_dyn_name)
    return [got_plt_region, rel_dyn_region], [got_plt_section, rel_dyn_section]

ld_script_create(self, name, memory_regions, sections, build_dir, symbol_files)

Constructs the linker script for the concrete toolchain class in use.

Uses the provided name, memory region strings, section strings, symbol files, expected entrypoint (if any) to generate a linker script that results in a valid FEM object when used within link.

Parameters:

Name Type Description Default
name str required
memory_regions List[str] required
sections List[str] required
build_dir str required
symbol_files List[str] required

Returns:

Type Description
str

path to the generated linker script

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_script_create(
    self,
    name: str,
    memory_regions: List[str],
    sections: List[str],
    build_dir: str,
    symbol_files: List[str],
) -> str:
    # I know that it's annoying we're duplicating all of this source
    # from the LLVM implementation, but ultimately each toolchain
    # is responsible for maintaining its own syntax.
    _, ld_script_path = tempfile.mkstemp(dir=build_dir, prefix=name + "_", suffix=".ld")
    with open(ld_script_path, "w") as f:
        f.write(RBS_AUTOGEN_WARNING)
        for file in symbol_files:
            f.write(f"INCLUDE {str(os.path.abspath(file))}\n")

        f.write("\n\n")

        f.write("MEMORY\n{\n")
        for r in memory_regions:
            f.write(r + "\n")
        f.write("}\n")

        f.write("\n")

        f.write("SECTIONS\n{\n")
        for s in sections:
            f.write(s + "\n")
        f.write("\n")

        f.write("    /DISCARD/ : {\n")
        for d in self._linker_discard_list:
            f.write(f"        *({d})\n")
        f.write("    }\n")

        f.write("}\n")

    return ld_script_path

get_required_alignment(self, segment)

For example, x86 returns 16. This will most often be used when programmatically allocating memory for code/data.

Returns:

Type Description
int

required alignment factor for the toolchain/ISA

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_required_alignment(self, segment: Segment) -> int:
    if self._processor.isa == InstructionSet.X86:
        return 16
    return 1

get_bin_file_symbols(self, executable_path)

For now, this utility only searches for global function and data symbols which are actually contained in a section in the file, as opposed to symbols which are referenced but undefined.

Parameters:

Name Type Description Default
executable_path str

path to the program to be analyzed for symbols

required

Returns:

Type Description
Dict[str, int]

mapping of symbol string to effective address.

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_bin_file_symbols(self, executable_path: str) -> Dict[str, int]:
    # This happens to be the same as LLVM but it really doesn't belong in Parent code.
    # Note: readobj for gcc is objdump
    readobj_output = self._execute_tool(self._readobj_path, ["--syms"], [executable_path])

    return self._parser.parse_symbols(readobj_output)

get_bin_file_segments(self, path)

Parses all segments found in the executable path provided.

Parameters:

Name Type Description Default
path str

path to the program to be analyzed for symbols

required

Returns:

Type Description
Tuple[ofrak_patch_maker.toolchain.model.Segment, ...]

Tuple of Segment objects

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_bin_file_segments(self, path: str) -> Tuple[Segment, ...]:
    if get_file_format(path) != self.file_format:
        raise ToolchainException(
            "Extracted file format does not match this toolchain instance!"
        )

    readobj_output = self._execute_tool(self._readobj_path, ["--section-headers"], [path])

    return self._parser.parse_sections(readobj_output)

GNU_AARCH64_LINUX_10_Toolchain (GNU_10_Toolchain)

name property readonly

Returns:

Type Description

name property that matches the value used in toolchain.conf to access paths

get_required_alignment(self, segment)

For example, x86 returns 16. This will most often be used when programmatically allocating memory for code/data.

Returns:

Type Description
int

required alignment factor for the toolchain/ISA

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_required_alignment(self, segment: Segment) -> int:
    return 4

ld_generate_placeholder_reloc_sections(self)

GCC generates these sections for relocatable binaries even if they are completely unnecessary.

They don't seem to make it into the final executable, so there should be no risk of injecting them inadvertently.

Todo

No clear way to get size, so way overestimate.

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_placeholder_reloc_sections(self):
    regions, sections = super().ld_generate_placeholder_reloc_sections()
    (
        got_region,
        got_name,
    ) = self._ld_generate_got_region(0xDEADBEEF + 0x30000, 0x1000)
    regions.append(got_region)
    sections.append(self._ld_generate_got_section(got_name))
    return regions, sections

_get_assembler_target(self, processor) private

Red Balloon Security strongly recommends all users provide their specific hardware target for best results.

Parameters:

Name Type Description Default
processor ProgramAttributes required

Returns:

Type Description

a default assembler target for the provided processor unless one is provided in self._config.

Exceptions:

Type Description
PatchMakerException

if no target provided and program attributes do not correspond to a default value.

Source code in ofrak_patch_maker/toolchain/gnu.py
def _get_assembler_target(self, processor: ProgramAttributes):
    if processor.isa is not InstructionSet.AARCH64:
        raise ValueError(
            f"The GNU AARCH64 toolchain does not support ISAs which are not AARCH64; "
            f"given ISA {processor.isa.name}"
        )
    if processor.sub_isa is not None:
        return processor.sub_isa.value.lower()
    return SubInstructionSet.ARMv8A.value.lower()

GNU_ARM_NONE_EABI_10_2_1_Toolchain (GNU_10_Toolchain)

name property readonly

Returns:

Type Description

name property that matches the value used in toolchain.conf to access paths

_get_assembler_target(self, processor) private

Thumb mode should be defined in the assembler source at the top, using:

.syntax unified
.thumb           ; or .code 16
Source code in ofrak_patch_maker/toolchain/gnu.py
def _get_assembler_target(self, processor: ProgramAttributes):
    """
    Thumb mode should be defined in the assembler source at the top, using:

        .syntax unified
        .thumb           ; or .code 16
    """
    if processor.isa is not InstructionSet.ARM:
        raise ValueError(
            f"The GNU ARM toolchain does not support ISAs which are not ARM; "
            f"given ISA {processor.isa.name}"
        )
    if self._config.assembler_target:
        return self._config.assembler_target

    if processor.sub_isa:
        return processor.sub_isa.value.lower()
    elif processor.isa == InstructionSet.ARM:
        return SubInstructionSet.ARMv7A.value.lower()
    else:
        raise ToolchainException("Assembler Target not provided and no valid default found!")

GNU_AVR_5_Toolchain (Abstract_GNU_Toolchain)

name: str property readonly

Returns:

Type Description
str

name property that matches the value used in toolchain.conf to access paths

_get_assembler_target(self, processor) private

Red Balloon Security strongly recommends all users provide their specific hardware target for best results.

Parameters:

Name Type Description Default
processor ProgramAttributes required

Returns:

Type Description
str

a default assembler target for the provided processor unless one is provided in self._config.

Exceptions:

Type Description
PatchMakerException

if no target provided and program attributes do not correspond to a default value.

Source code in ofrak_patch_maker/toolchain/gnu.py
def _get_assembler_target(self, processor: ProgramAttributes) -> str:
    if processor.isa is not InstructionSet.AVR:
        raise ValueError(
            f"The GNU AVR toolchain does not support ISAs which are not AVR; "
            f"given ISA {processor.isa.name}"
        )
    if self._config.assembler_target:
        return self._config.assembler_target
    return InstructionSet.AVR.value.lower()

get_required_alignment(self, segment)

For example, x86 returns 16. This will most often be used when programmatically allocating memory for code/data.

Returns:

Type Description
int

required alignment factor for the toolchain/ISA

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_required_alignment(self, segment: Segment) -> int:
    return 2

GNU_M68K_LINUX_10_Toolchain (GNU_10_Toolchain)

name property readonly

Returns:

Type Description

name property that matches the value used in toolchain.conf to access paths

get_required_alignment(self, segment)

For example, x86 returns 16. This will most often be used when programmatically allocating memory for code/data.

Returns:

Type Description
int

required alignment factor for the toolchain/ISA

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_required_alignment(self, segment: Segment) -> int:
    return 4

_get_assembler_target(self, processor) private

Red Balloon Security strongly recommends all users provide their specific hardware target for best results.

Parameters:

Name Type Description Default
processor ProgramAttributes required

Returns:

Type Description

a default assembler target for the provided processor unless one is provided in self._config.

Exceptions:

Type Description
PatchMakerException

if no target provided and program attributes do not correspond to a default value.

Source code in ofrak_patch_maker/toolchain/gnu.py
def _get_assembler_target(self, processor: ProgramAttributes):
    if processor.isa is not InstructionSet.M68K:
        raise ValueError(
            f"The GNU M68K toolchain does not support ISAs which are not M68K; "
            f"given ISA {processor.isa.name}"
        )
    if self._config.assembler_target:
        return self._config.assembler_target
    arch = processor.isa.value
    if processor.sub_isa is not None:
        arch = processor.sub_isa.value
    return arch

GNU_X86_64_LINUX_EABI_10_3_0_Toolchain (GNU_10_Toolchain)

name: str property readonly

Returns:

Type Description
str

name property that matches the value used in toolchain.conf to access paths

_get_assembler_target(self, processor) private

Red Balloon Security strongly recommends all users provide their specific hardware target for best results.

Parameters:

Name Type Description Default
processor ProgramAttributes required

Returns:

Type Description

a default assembler target for the provided processor unless one is provided in self._config.

Exceptions:

Type Description
PatchMakerException

if no target provided and program attributes do not correspond to a default value.

Source code in ofrak_patch_maker/toolchain/gnu.py
def _get_assembler_target(self, processor: ProgramAttributes):
    if self._config.assembler_target:
        return self._config.assembler_target
    return "generic64"

ld_generate_bss_section(memory_region_name) staticmethod

We override this for x64 so we can provide SUBALIGN(1) This is required to correctly estimate how much size we need for bss when splitting up data structures into their own individual bss sections. If we were to let the linker align every structure's section to 8 or 16, it would insert empty space that we had not allocated for the bss memory region. gcc/ld do prefer 8 alignment for data if you don't force this, but it is not likely to be hugely faster on recent hardware for most situations (ie not locked instructions across a cache line): https://lemire.me/blog/2012/05/31/data-alignment-for-speed-myth-or-reality/ Pre-2011 x64 chips might be slower with these kinds of accesses, but: - We should not bend over backwards for processors we've not evaluated yet. - .bss handling is already difficult enough as is. - The flexibility granted by this feature likely justifies a relatively small performance impact. We should address this as a problem if future users find that performance is noticeably/severely impacted.

Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def ld_generate_bss_section(
    memory_region_name: str,
) -> str:
    """
    We override this for x64 so we can provide SUBALIGN(1)
    This is required to correctly estimate how much size we need for bss
    when splitting up data structures into their own individual bss sections.
    If we were to let the linker align every structure's section to 8 or 16, it would
    insert empty space that we had not allocated for the bss memory region.
    gcc/ld do prefer 8 alignment for data if you don't force this, but it is not likely to be
    hugely faster on recent hardware for most situations (ie not locked instructions
    across a cache line):
    https://lemire.me/blog/2012/05/31/data-alignment-for-speed-myth-or-reality/
    Pre-2011 x64 chips might be slower with these kinds of accesses, but:
       - We should not bend over backwards for processors we've not evaluated yet.
       - .bss handling is already difficult enough as is.
       - The flexibility granted by this feature likely justifies a relatively small performance impact.
    We should address this as a problem if future users find that performance is noticeably/severely impacted.
    """
    bss_section_name = ".bss"
    return (
        f"    {bss_section_name} : SUBALIGN(1) {{\n"
        f"        *.o({bss_section_name}, {bss_section_name}.*)\n"
        f"    }} > {memory_region_name}"
    )