Skip to content

gnu.py

ofrak_patch_maker.toolchain.gnu

Abstract_GNU_Toolchain (Toolchain, ABC)

_linker_script_flag: str private property readonly

Returns:

Type Description
str

the linker script flag for this toolchain, usually -T

segment_alignment: int property readonly

For example, x86 returns 16. This will most often be used when programmatically allocating memory for code/data.

Returns:

Type Description
int

required alignment factor for the toolchain/ISA

_get_compiler_target(self, processor) private

Returns a default compiler target for the provided processor unless one is provided in self._config.

Red Balloon Security strongly recommends all users provide their specific hardware target for best results.

Parameters:

Name Type Description Default
processor ArchInfo required

Returns:

Type Description
Optional[str]
Source code in ofrak_patch_maker/toolchain/gnu.py
def _get_compiler_target(self, processor: ArchInfo) -> Optional[str]:
    return self._config.compiler_target

_get_linker_map_flag(exec_path) private staticmethod

Generates the linker map file flag for a linker invocation given the executable path.

Parameters:

Name Type Description Default
exec_path str

path to executable

required

Returns:

Type Description
Iterable[str]

path to map file

Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def _get_linker_map_flag(exec_path: str) -> Iterable[str]:
    return "-Map", f"{exec_path}.map"

add_linker_include_values(self, symbols, path)

Adds linker include entries to a provided file (usually ending in .inc).

For example GNU syntax prescribes PROVIDE(name = 0xdeadbeef);.

Parameters:

Name Type Description Default
symbols Mapping[str, int]

mapping of symbol string to effective address

required
path str

path to the provided linker include file.

required
Source code in ofrak_patch_maker/toolchain/gnu.py
def add_linker_include_values(self, symbols: Mapping[str, int], path: str):
    with open(path, "a") as f:
        for name, addr in symbols.items():
            if self.linker_include_filter(name):
                continue
            f.write(f"PROVIDE({name} = {hex(addr)});\n")

generate_linker_include_file(self, symbols, out_path)

This utility function receives the generated symbols dictionary that results from preprocessing a firmware image and generates a .inc file for use with linker scripts, enabling direct function calls when using the complete cross compilation toolchain.

This functionality must be defined for each toolchain given potential syntactical differences.

Parameters:

Name Type Description Default
symbols Mapping[str, int]

mappings of symbol string to effective address

required
out_path str

the path to the resulting symbol include file (usually .inc)

required

Returns:

Type Description
str

returns out_path

Source code in ofrak_patch_maker/toolchain/gnu.py
def generate_linker_include_file(self, symbols: Mapping[str, int], out_path: str) -> str:
    with open(out_path, "w") as f:
        f.write(RBS_AUTOGEN_WARNING)

    self.add_linker_include_values(symbols, out_path)
    return out_path

ld_generate_region(self, object_path, segment_name, permissions, vm_address, length)

Generates regions for linker scripts.

Returns:

Type Description
Tuple[str, str]

a string entry for a "memory region" for the toolchain in question.

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_region(
    self,
    object_path: str,
    segment_name: str,
    permissions: MemoryPermissions,
    vm_address: int,
    length: int,
) -> Tuple[str, str]:
    perms_string = self._ld_perm2str(permissions)
    stripped_seg_name = segment_name.strip(".")
    stripped_obj_name = os.path.basename(object_path).split(".")[0]
    region_name = f'".rbs_{stripped_obj_name}_{stripped_seg_name}_mem"'
    return (
        f"    {region_name} ({perms_string}) : ORIGIN = {hex(vm_address)}, LENGTH = {hex(length)}",
        region_name,
    )

ld_generate_bss_region(self, vm_address, length)

Generates .bss regions for linker scripts.

Returns:

Type Description
Tuple[str, str]

a .bss memory entry string for the toolchain in question.

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_bss_region(
    self,
    vm_address: int,
    length: int,
) -> Tuple[str, str]:
    region_name = '".bss_mem"'
    perms_string = self._ld_perm2str(MemoryPermissions.RW)
    return (
        f"    {region_name} ({perms_string}) : ORIGIN = {hex(vm_address)}, LENGTH = {hex(length)}",
        region_name,
    )

ld_generate_section(object_path, segment_name, memory_region_name) staticmethod

Generates sections for linker scripts.

Returns:

Type Description
str

a string entry for a "section" for the toolchain in question.

Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def ld_generate_section(
    object_path: str,
    segment_name: str,
    memory_region_name: str,
) -> str:
    stripped_seg_name = segment_name.strip(".")
    stripped_obj_name = os.path.basename(object_path).split(".")[0]
    abs_path = os.path.abspath(object_path)
    return (
        f"    .rbs_{stripped_obj_name}_{stripped_seg_name} ORIGIN({memory_region_name}) : SUBALIGN(0) {{\n"
        f"        {abs_path}({segment_name})\n"
        f"    }} > {memory_region_name}"
    )

ld_generate_bss_section(memory_region_name) staticmethod

Generates .bss sections for linker scripts.

Returns:

Type Description
str

a .bss section entry string for the toolchain in question.

Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def ld_generate_bss_section(
    memory_region_name: str,
) -> str:
    bss_section_name = ".bss"
    return (
        f"    {bss_section_name} : {{\n"
        f"        *.o({bss_section_name}, {bss_section_name}.*)\n"
        f"    }} > {memory_region_name}"
    )

ld_generate_placeholder_reloc_sections(self)

GCC generates these sections for relocatable binaries even if they are completely unnecessary.

They don't seem to make it into the final executable, so there should be no risk of injecting them inadvertently.

Todo

No clear way to get size, so way overestimate.

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_placeholder_reloc_sections(self) -> Tuple[List[str], List[str]]:
    """
    GCC generates these sections for relocatable binaries even if they are completely
    unnecessary.

    They don't seem to make it into the final executable, so there should be no risk
    of injecting them inadvertently.

    !!! todo

        No clear way to get size, so way overestimate.
    """
    (
        got_plt_region,
        got_plt_name,
    ) = self._ld_generate_got_plt_region(0xDEADBEEF, 0x1000)
    got_plt_section = self._ld_generate_got_plt_section(got_plt_name)
    (
        rel_dyn_region,
        rel_dyn_name,
    ) = self._ld_generate_rel_dyn_region(0xDEADBEEF + 0x20000, 0x1000)
    rel_dyn_section = self._ld_generate_rel_dyn_section(rel_dyn_name)
    return [got_plt_region, rel_dyn_region], [got_plt_section, rel_dyn_section]

ld_script_create(self, name, memory_regions, sections, build_dir, symbol_files)

Constructs the linker script for the concrete toolchain class in use.

Uses the provided name, memory region strings, section strings, symbol files, expected entrypoint (if any) to generate a linker script that results in a valid FEM object when used within link.

Parameters:

Name Type Description Default
name str required
memory_regions List[str] required
sections List[str] required
build_dir str required
symbol_files List[str] required

Returns:

Type Description
str

path to the generated linker script

Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_script_create(
    self,
    name: str,
    memory_regions: List[str],
    sections: List[str],
    build_dir: str,
    symbol_files: List[str],
) -> str:
    # I know that it's annoying we're duplicating all of this source
    # from the LLVM implementation, but ultimately each toolchain
    # is responsible for maintaining its own syntax.
    _, ld_script_path = tempfile.mkstemp(dir=build_dir, prefix=name + "_", suffix=".ld")
    with open(ld_script_path, "w") as f:
        f.write(RBS_AUTOGEN_WARNING)
        for file in symbol_files:
            f.write(f"INCLUDE {str(os.path.abspath(file))}\n")

        f.write("\n\n")

        f.write("MEMORY\n{\n")
        for r in memory_regions:
            f.write(r + "\n")
        f.write("}\n")

        f.write("\n")

        f.write("SECTIONS\n{\n")
        for s in sections:
            f.write(s + "\n")
        f.write("\n")

        f.write("    /DISCARD/ : {\n")
        for d in self._linker_discard_list:
            f.write(f"        *({d})\n")
        f.write("    }\n")

        f.write("}\n")

    return ld_script_path

get_bin_file_symbols(self, executable_path)

For now, this utility only searches for global function and data symbols which are actually contained in a section in the file, as opposed to symbols which are referenced but undefined.

Parameters:

Name Type Description Default
executable_path str

path to the program to be analyzed for symbols

required

Returns:

Type Description
Dict[str, Tuple[int, ofrak_type.symbol_type.LinkableSymbolType]]

mapping of symbol string to tuple of effective address, symbol type.

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_bin_file_symbols(
    self, executable_path: str
) -> Dict[str, Tuple[int, LinkableSymbolType]]:
    # This happens to be the same as LLVM but it really doesn't belong in Parent code.
    # Note: readobj for gcc is objdump
    readobj_output = self._execute_tool(self._readobj_path, ["--syms"], [executable_path])

    return self._parser.parse_symbols(readobj_output)

get_bin_file_segments(self, path)

Parses all segments found in the executable path provided.

Parameters:

Name Type Description Default
path str

path to the program to be analyzed for symbols

required

Returns:

Type Description
Tuple[ofrak_patch_maker.toolchain.model.Segment, ...]

Tuple of Segment objects

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_bin_file_segments(self, path: str) -> Tuple[Segment, ...]:
    if get_file_format(path) != self.file_format:
        raise ToolchainException(
            "Extracted file format does not match this toolchain instance!"
        )

    readobj_output = self._execute_tool(self._readobj_path, ["--section-headers"], [path])

    return self._parser.parse_sections(readobj_output)

get_bin_file_rel_symbols(self, executable_path)

This utility searches for global function and data symbols which are referenced in a section in the file but are undefined.

Parameters:

Name Type Description Default
executable_path str

path to the program to be analyzed for symbols

required

Returns:

Type Description
Dict[str, Tuple[int, ofrak_type.symbol_type.LinkableSymbolType]]

mapping of symbol string to tuple of effective address, symbol type.

Source code in ofrak_patch_maker/toolchain/gnu.py
def get_bin_file_rel_symbols(
    self, executable_path: str
) -> Dict[str, Tuple[int, LinkableSymbolType]]:
    readobj_output = self._execute_tool(self._readobj_path, ["--syms"], [executable_path])

    return self._parser.parse_relocations(readobj_output)