gnu.py
ofrak_patch_maker.toolchain.gnu
Abstract_GNU_Toolchain (Toolchain, ABC)
_linker_script_flag: str
private
property
readonly
Returns:
Type | Description |
---|---|
str |
the linker script flag for this toolchain, usually |
segment_alignment: int
property
readonly
For example, x86 returns 16. This will most often be used when programmatically allocating memory for code/data.
Returns:
Type | Description |
---|---|
int |
required alignment factor for the toolchain/ISA |
_get_compiler_target(self, processor)
private
Returns a default compiler target for the provided processor unless one is provided
in self._config
.
Red Balloon Security strongly recommends all users provide their specific hardware target for best results.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
processor |
ArchInfo |
required |
Returns:
Type | Description |
---|---|
Optional[str] |
Source code in ofrak_patch_maker/toolchain/gnu.py
def _get_compiler_target(self, processor: ArchInfo) -> Optional[str]:
return self._config.compiler_target
_get_linker_map_flag(exec_path)
private
staticmethod
Generates the linker map file flag for a linker invocation given the executable path.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
exec_path |
str |
path to executable |
required |
Returns:
Type | Description |
---|---|
Iterable[str] |
path to map file |
Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def _get_linker_map_flag(exec_path: str) -> Iterable[str]:
return "-Map", f"{exec_path}.map"
add_linker_include_values(self, symbols, path)
Adds linker include entries to a provided file (usually ending in .inc
).
For example GNU syntax prescribes PROVIDE(name = 0xdeadbeef);
.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
symbols |
Mapping[str, int] |
mapping of symbol string to effective address |
required |
path |
str |
path to the provided linker include file. |
required |
Source code in ofrak_patch_maker/toolchain/gnu.py
def add_linker_include_values(self, symbols: Mapping[str, int], path: str):
with open(path, "a") as f:
for name, addr in symbols.items():
if self.linker_include_filter(name):
continue
f.write(f"PROVIDE({name} = {hex(addr)});\n")
generate_linker_include_file(self, symbols, out_path)
This utility function receives the generated symbols dictionary that results
from preprocessing a firmware image and generates a .inc
file for use
with linker scripts, enabling direct function calls when using the complete
cross compilation toolchain.
This functionality must be defined for each toolchain given potential syntactical differences.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
symbols |
Mapping[str, int] |
mappings of symbol string to effective address |
required |
out_path |
str |
the path to the resulting symbol include file (usually |
required |
Returns:
Type | Description |
---|---|
str |
returns out_path |
Source code in ofrak_patch_maker/toolchain/gnu.py
def generate_linker_include_file(self, symbols: Mapping[str, int], out_path: str) -> str:
with open(out_path, "w") as f:
f.write(RBS_AUTOGEN_WARNING)
self.add_linker_include_values(symbols, out_path)
return out_path
ld_generate_region(self, object_path, segment_name, permissions, vm_address, length)
Generates regions for linker scripts.
Returns:
Type | Description |
---|---|
Tuple[str, str] |
a string entry for a "memory region" for the toolchain in question. |
Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_region(
self,
object_path: str,
segment_name: str,
permissions: MemoryPermissions,
vm_address: int,
length: int,
) -> Tuple[str, str]:
perms_string = self._ld_perm2str(permissions)
stripped_seg_name = segment_name.strip(".")
stripped_obj_name = os.path.basename(object_path).split(".")[0]
region_name = f'".rbs_{stripped_obj_name}_{stripped_seg_name}_mem"'
return (
f" {region_name} ({perms_string}) : ORIGIN = {hex(vm_address)}, LENGTH = {hex(length)}",
region_name,
)
ld_generate_bss_region(self, vm_address, length)
Generates .bss
regions for linker scripts.
Returns:
Type | Description |
---|---|
Tuple[str, str] |
a |
Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_bss_region(
self,
vm_address: int,
length: int,
) -> Tuple[str, str]:
region_name = '".bss_mem"'
perms_string = self._ld_perm2str(MemoryPermissions.RW)
return (
f" {region_name} ({perms_string}) : ORIGIN = {hex(vm_address)}, LENGTH = {hex(length)}",
region_name,
)
ld_generate_section(object_path, segment_name, memory_region_name)
staticmethod
Generates sections for linker scripts.
Returns:
Type | Description |
---|---|
str |
a string entry for a "section" for the toolchain in question. |
Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def ld_generate_section(
object_path: str,
segment_name: str,
memory_region_name: str,
) -> str:
stripped_seg_name = segment_name.strip(".")
stripped_obj_name = os.path.basename(object_path).split(".")[0]
abs_path = os.path.abspath(object_path)
return (
f" .rbs_{stripped_obj_name}_{stripped_seg_name} ORIGIN({memory_region_name}) : SUBALIGN(0) {{\n"
f" {abs_path}({segment_name})\n"
f" }} > {memory_region_name}"
)
ld_generate_bss_section(memory_region_name)
staticmethod
Generates .bss
sections for linker scripts.
Returns:
Type | Description |
---|---|
str |
a |
Source code in ofrak_patch_maker/toolchain/gnu.py
@staticmethod
def ld_generate_bss_section(
memory_region_name: str,
) -> str:
bss_section_name = ".bss"
return (
f" {bss_section_name} : {{\n"
f" *.o({bss_section_name}, {bss_section_name}.*)\n"
f" }} > {memory_region_name}"
)
ld_generate_placeholder_reloc_sections(self)
GCC generates these sections for relocatable binaries even if they are completely unnecessary.
They don't seem to make it into the final executable, so there should be no risk of injecting them inadvertently.
Todo
No clear way to get size, so way overestimate.
Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_generate_placeholder_reloc_sections(self) -> Tuple[List[str], List[str]]:
"""
GCC generates these sections for relocatable binaries even if they are completely
unnecessary.
They don't seem to make it into the final executable, so there should be no risk
of injecting them inadvertently.
!!! todo
No clear way to get size, so way overestimate.
"""
(
got_plt_region,
got_plt_name,
) = self._ld_generate_got_plt_region(0xDEADBEEF, 0x1000)
got_plt_section = self._ld_generate_got_plt_section(got_plt_name)
(
rel_dyn_region,
rel_dyn_name,
) = self._ld_generate_rel_dyn_region(0xDEADBEEF + 0x20000, 0x1000)
rel_dyn_section = self._ld_generate_rel_dyn_section(rel_dyn_name)
return [got_plt_region, rel_dyn_region], [got_plt_section, rel_dyn_section]
ld_script_create(self, name, memory_regions, sections, build_dir, symbol_files)
Constructs the linker script for the concrete toolchain class in use.
Uses the provided name, memory region strings, section strings, symbol files,
expected entrypoint (if any) to generate a linker script that results in a valid
FEM object when used within link
.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
required | |
memory_regions |
List[str] |
required | |
sections |
List[str] |
required | |
build_dir |
str |
required | |
symbol_files |
List[str] |
required |
Returns:
Type | Description |
---|---|
str |
path to the generated linker script |
Source code in ofrak_patch_maker/toolchain/gnu.py
def ld_script_create(
self,
name: str,
memory_regions: List[str],
sections: List[str],
build_dir: str,
symbol_files: List[str],
) -> str:
# I know that it's annoying we're duplicating all of this source
# from the LLVM implementation, but ultimately each toolchain
# is responsible for maintaining its own syntax.
_, ld_script_path = tempfile.mkstemp(dir=build_dir, prefix=name + "_", suffix=".ld")
with open(ld_script_path, "w") as f:
f.write(RBS_AUTOGEN_WARNING)
for file in symbol_files:
f.write(f"INCLUDE {str(os.path.abspath(file))}\n")
f.write("\n\n")
f.write("MEMORY\n{\n")
for r in memory_regions:
f.write(r + "\n")
f.write("}\n")
f.write("\n")
f.write("SECTIONS\n{\n")
for s in sections:
f.write(s + "\n")
f.write("\n")
f.write(" /DISCARD/ : {\n")
for d in self._linker_discard_list:
f.write(f" *({d})\n")
f.write(" }\n")
f.write("}\n")
return ld_script_path
get_bin_file_symbols(self, executable_path)
For now, this utility only searches for global function and data symbols which are actually contained in a section in the file, as opposed to symbols which are referenced but undefined.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
executable_path |
str |
path to the program to be analyzed for symbols |
required |
Returns:
Type | Description |
---|---|
Dict[str, Tuple[int, ofrak_type.symbol_type.LinkableSymbolType]] |
mapping of symbol string to tuple of effective address, symbol type. |
Source code in ofrak_patch_maker/toolchain/gnu.py
def get_bin_file_symbols(
self, executable_path: str
) -> Dict[str, Tuple[int, LinkableSymbolType]]:
# This happens to be the same as LLVM but it really doesn't belong in Parent code.
# Note: readobj for gcc is objdump
readobj_output = self._execute_tool(self._readobj_path, ["--syms"], [executable_path])
return self._parser.parse_symbols(readobj_output)
get_bin_file_segments(self, path)
Parses all segments found in the executable path provided.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
path to the program to be analyzed for symbols |
required |
Returns:
Type | Description |
---|---|
Tuple[ofrak_patch_maker.toolchain.model.Segment, ...] |
Tuple of Segment objects |
Source code in ofrak_patch_maker/toolchain/gnu.py
def get_bin_file_segments(self, path: str) -> Tuple[Segment, ...]:
if get_file_format(path) != self.file_format:
raise ToolchainException(
"Extracted file format does not match this toolchain instance!"
)
readobj_output = self._execute_tool(self._readobj_path, ["--section-headers"], [path])
return self._parser.parse_sections(readobj_output)
get_bin_file_rel_symbols(self, executable_path)
This utility searches for global function and data symbols which are referenced in a section in the file but are undefined.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
executable_path |
str |
path to the program to be analyzed for symbols |
required |
Returns:
Type | Description |
---|---|
Dict[str, Tuple[int, ofrak_type.symbol_type.LinkableSymbolType]] |
mapping of symbol string to tuple of effective address, symbol type. |
Source code in ofrak_patch_maker/toolchain/gnu.py
def get_bin_file_rel_symbols(
self, executable_path: str
) -> Dict[str, Tuple[int, LinkableSymbolType]]:
readobj_output = self._execute_tool(self._readobj_path, ["--syms"], [executable_path])
return self._parser.parse_relocations(readobj_output)