diff --git a/.vscode/settings.json b/.vscode/settings.json index 5c879d2..deb128c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -30,7 +30,8 @@ "strlen", "strncpy", "struct", - "structs" + "structs", + "xreg" ], "markdownlint.config": { "MD024":false diff --git a/README.md b/README.md index 8226907..3513469 100644 --- a/README.md +++ b/README.md @@ -46,12 +46,18 @@ and how parameters are passed. In this book we will use the ARM LINUX conventions. This means: -* You will need to run a ARM Linux VM on the Macintosh - even on +* You *may** need to run a ARM Linux VM on the Macintosh - even on ARM-based Macs. Why? Apple uses a different calling convention. + The convention used in this book should work on all ARM Linux machines while the Apple calling convention is specific to Apple - Silicon-based machine. In the future, we hope to add a chapter - offering an explanation of the differences. + Silicon-based machine. + + This necessity did not sit well with some on reddit. We listened. + + We now have a chapter devoted to bringing Linux and Apple code + together to the degree possible. [This chapter](./more/apple_silicon/) + also provides a suite of macros that provide this help. * You will need to run WSL (Windows Subsystem for Linux) on ARM-based Windows machines. These do exist! diff --git a/more/apple_silicon/README.md b/more/apple_silicon/README.md new file mode 100644 index 0000000..c5311f5 --- /dev/null +++ b/more/apple_silicon/README.md @@ -0,0 +1,252 @@ +# Apple Silicon + +This book is written to the Linux calling convention as stated early on. +Unfortunately, this means that even if you own an Apple Silicon machine, +which is AARCH64, you'd still need a Linux virtual machine. This didn't +sit well with some on reddit and rightfully so. We undertook to +develop a way of writing assembly code once and having it work on both +Mac OS and Linux to the degree possible. + +We are pleased to present this chapter along with a set of assembly +language macros that, if used, help a great deal. + +There are some things we cannot adapt, such as variadic functions (e.g. +`printf()`) but we explain how code can be written to be compatible with +both environments at the expense of some duplicated code. + +## Assembly language macros + +An early innovation in assemblers was the introduction of a macro +capability. Given what could be considered a certain amount of tedium in +coding in asm, macros provide a simple form of *meta programming* where +a series of statements can be encapsulated by a single macro. Think of a +macro as an early form of C++ templated function (kinda but not really). + +Here's an example of an assembly language macro: + +```text +.macro LD_ADDR xreg, label + adrp \xreg, \label@PAGE + add \xreg, \xreg, \label@PAGEOFF +.endm +``` + +Here's how it might be used: + +```text + LD_ADDR x0, fmt +``` + +This gets expanded to: + +```text + adrp x0, fmt@PAGE + add x0, x0, fmt@PAGEOFF +.endm +``` + +## Loading the address of data + +Assuming: + +```text + .data +fmt: .asciz "Hello!" +``` + +When we: + +`ldr x0, =fmt` + +we are hoping to put the address of the label `fmt` into `x0`. But how +would this be possible since we've seen that addresses are (often) six +bytes long and our instructions are always 4 bytes long? As we describe +elsewhere, the above `ldr` instance is actually turned into instructions +to load an address relative to the address of the current instruction. +As long as the data we want is relatively close to the `ldr`, this works +out to a difference in addresses that is small (and so, can be fit into +a 4 byte instruction). + +Apple does not allow instructions of the form: + +`ldr x0, =fmt` + +Instead they take a more general approach of splitting addresses of data +into two parts: + +1. The *page* on which the label lives - think of this as generating the +upper bits of the address. + +2. The *offset* on the page where the label actually resides - think of +this as the lower bits of the address. + +Hence: + +```text + adrp x0, fmt@PAGE + add x0, x0, fmt@PAGEOFF +.endm +``` + +The first instruction puts the high bits of the label's address in `x0`. +Then, the second instruction literally adds the low bits of the label's +address into `x0` forming a complete address. + +In this way, labels can be further away from the current instruction +than the Linux way. + +## How does this help bridge Apple and Linux? + +[Here](./macros.S) is an assembly language file containing the macros +we're developing to bring Linux and Apple Silicon assembly language +closer together. + +Notice it has: + +```text +.macro LD_ADDR xreg, label + adrp \xreg, \label@PAGE + add \xreg, \xreg, \label@PAGEOFF +.endm +``` + +but also: + +```text +.macro LD_ADDR xreg, label + ldr \xreg, =\label +.endm +``` + +Which of these are used is determined by whether or not you are +assembling on an Apple machine or a Linux machine using features +provided by the standard C pre-processor. I.e.: + +```text +# if defined(__APPLE__) +// apple stuff +# else +// not apple stuff +# endif +``` + +## How to force the C pre-processor to run on assembly language + +`clang` on Mac OS will run assembly language files through the +C pre-processor. `clang` on Linux will not by default but can if you +specify `-x assembler-with-cpp`. + +gcc on Mac OS can be based on clang so on Mac OS it inherits `clang`'s +behavior. gcc on Linux does not run assembly language files through +the C pre-processor *if the asm file ends in .s but WILL if the file +ends in .S* It took the author a long time to find this... + +## Differences between Apple and Linux + +### Loading label addresses + +This was described above. If you use `LD_ADDR` the macros will adapt for +you. + +### Function labels + +Apple prepends an underscore, Linux does not. Instead of: + +`bl printf` + +do: + +`CRT printf` + +and the macro will adapt. + +### main + +Like other function labels, Apple wants `_main` while Linux wants +`main`. + +Simply use: + +`MAIN` + +and the macro will adapt. + +### Globals + +Instead of writing: + +`.global main` + +use + +`GLABEL main` + +and the macros will adapt. + +## Variadic functions + +Functions like `printf()` are variadic. This means the function can take +any number of parameters. The first argument contains some information +that tells the function how many parameters were actually given. + +For example: + +`printf("%d is a number.\n");` + +There is but one `%` place holder in this text. This tells `printf()` +that in addition to the string there is but one more parameter to be +expected. + +Apple and Linux handle variadic differently. + +Linux will use the scratch registers first up to `x7`. *Then* it will +use the stack. + +Apple will put the first parameter in the zero register and then shifts +immediately to putting all other parameters onto the stack. + +Here is how we overcame this difference: + +```text + // setting up a two value printf as usual + LD_ADDR x0, fmt // loads the address of fmt + LD_ADDR x1, ptr // loads **ptr + ldr x1, [x1] // dereferences **ptr to make *ptr + ldr x2, [x1] // dereferences *ptr to get value +# if defined(__APPLE__) + // if apple, push the second and third argument to stack + stp x1, x2, [sp, -16]! + CRT printf + add sp, sp, 16 +# else + // if not apple, the registers are already set up + CRT printf +# endif +``` + +## Other differences + +### Frame pointer + +Apple requires that `x29` be kept as a valid stack frame pointer. The +frame pointer should always start out as equal to the stack pointer. +However, within the function, the stack pointer is free to change. The +frame pointer must remain fixed so that debuggers always know how to +find the initial stack *frame*. + +To be Apple compatible, in addition to backing up `x30` also back up +`x29` and then: + +`mov x29, sp` + +## START_PROC and END_PROC + +Again, for debugging purposes, you can insert frame checks into your +code. These work the same on both Apple Silicon and Linux. If you want +these, put `START_PROC` after the label introducing a function. Then, +put `END_PROC` after the last statement of the function. + +## A useful link + +[Here](https://gcc.gnu.org/onlinedocs/gcc/Invoking-GCC.html) is an +understandable version of gcc documentation. diff --git a/more/apple_silicon/macros.S b/more/apple_silicon/macros.S new file mode 100644 index 0000000..d506b73 --- /dev/null +++ b/more/apple_silicon/macros.S @@ -0,0 +1,52 @@ +// Macros to permit the "same" assembly language to build on ARM64 +// Linux systems as well as Apple Silicon systems. +// +// Perry Kivolowitz +// A Gentle Introduction to Assembly Language + +#if defined(__APPLE__) + +.macro LD_ADDR xreg, label + adrp \xreg, \label@PAGE + add \xreg, \xreg, \label@PAGEOFF +.endm + +.macro GLABEL label + .global _\label +.endm + +.macro MAIN +_main: +.endm + +.macro CRT label + bl _\label +.endm + +#else + +.macro GLABEL label + .global \label +.endm + +.macro MAIN +main: +.endm + +.macro CRT label + bl \label +.endm + +.macro LD_ADDR xreg, label + ldr \xreg, =\label +.endm + +#endif + +.macro START_PROC + .cfi_startproc +.endm + +.macro END_PROC + .cfi_endproc +.endm diff --git a/more/apple_silicon/stub.S b/more/apple_silicon/stub.S new file mode 100644 index 0000000..e239e81 --- /dev/null +++ b/more/apple_silicon/stub.S @@ -0,0 +1,45 @@ +#include "macros.S" + + .align 2 + .text + GLABEL main + +// adrp loads an address using a page number containing the variable + + +MAIN + START_PROC + stp x21, x30, [sp, -16]! + str x29, [sp, -16]! + mov x29, sp + + mov x0, 8 // allocating a long + CRT malloc // call malloc() + mov x1, 0xFF // load payload + str x1, [x0] // store payload + LD_ADDR x1, ptr + str x0, [x1] + + LD_ADDR x0, fmt // loads the address of fmt + LD_ADDR x1, ptr // loads **ptr + ldr x1, [x1] // dereferences **ptr to make *ptr + ldr x2, [x1] // dereferences *ptr to get value +#if defined(__APPLE__) + stp x1, x2, [sp, -16]! + CRT printf + add sp, sp, 16 +#else + CRT printf +#endif + + mov x0, xzr + ldr x29, [sp], 16 + ldp x21, x30, [sp], 16 + ret + END_PROC + + .data +ptr: .space 8 +fmt: .asciz "%p %ld\n" + + .end diff --git a/section_1/funcs/f01.s b/section_1/funcs/f01.s new file mode 100644 index 0000000..101a309 --- /dev/null +++ b/section_1/funcs/f01.s @@ -0,0 +1,24 @@ + .arch armv8-a + .file "f01.c" + .text + .align 2 + .global func + .type func, %function +func: +.LFB0: + .cfi_startproc + sub sp, sp, #16 + .cfi_def_cfa_offset 16 + str x0, [sp, 8] + str x1, [sp] + ldr x1, [sp, 8] + ldr x0, [sp] + add x0, x1, x0 + add sp, sp, 16 + .cfi_def_cfa_offset 0 + ret + .cfi_endproc +.LFE0: + .size func, .-func + .ident "GCC: (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0" + .section .note.GNU-stack,"",@progbits diff --git a/section_1/funcs/nine_args.c b/section_1/funcs/nine_args.c index bf15793..46a9ac3 100644 --- a/section_1/funcs/nine_args.c +++ b/section_1/funcs/nine_args.c @@ -1,5 +1,10 @@ #include +void LessSillyFunction(long p1, long p2, long p3, long p4, long p5, long p6, + long p7, long p8) { + printf("This example hurts my brain: %ld\n", p8); +} + void SillyFunction(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { printf("This example hurts my brain: %ld %ld\n", p8, p9); @@ -7,4 +12,5 @@ void SillyFunction(long p1, long p2, long p3, long p4, long p5, long p6, int main() { SillyFunction(1, 2, 3, 4, 5, 6, 7, 8, 9); +// LessSillyFunction(1, 2, 3, 4, 5, 6, 7, 8); } diff --git a/section_1/funcs/nine_args.s b/section_1/funcs/nine_args.s new file mode 100644 index 0000000..febf600 --- /dev/null +++ b/section_1/funcs/nine_args.s @@ -0,0 +1,103 @@ + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 13, 0 sdk_version 13, 1 + .globl _LessSillyFunction ; -- Begin function LessSillyFunction + .p2align 2 +_LessSillyFunction: ; @LessSillyFunction + .cfi_startproc +; %bb.0: + sub sp, sp, #96 + stp x29, x30, [sp, #80] ; 16-byte Folded Spill + add x29, sp, #80 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + stur x0, [x29, #-8] + stur x1, [x29, #-16] + stur x2, [x29, #-24] + stur x3, [x29, #-32] + str x4, [sp, #40] + str x5, [sp, #32] + str x6, [sp, #24] + str x7, [sp, #16] + ldr x8, [sp, #16] + mov x9, sp + str x8, [x9] + adrp x0, l_.str@PAGE + add x0, x0, l_.str@PAGEOFF + bl _printf + ldp x29, x30, [sp, #80] ; 16-byte Folded Reload + add sp, sp, #96 + ret + .cfi_endproc + ; -- End function + .globl _SillyFunction ; -- Begin function SillyFunction + .p2align 2 +_SillyFunction: ; @SillyFunction + .cfi_startproc +; %bb.0: + sub sp, sp, #112 + stp x29, x30, [sp, #96] ; 16-byte Folded Spill + add x29, sp, #96 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + ldr x8, [x29, #16] + stur x0, [x29, #-8] + stur x1, [x29, #-16] + stur x2, [x29, #-24] + stur x3, [x29, #-32] + stur x4, [x29, #-40] + str x5, [sp, #48] + str x6, [sp, #40] + str x7, [sp, #32] + str x8, [sp, #24] + ldr x10, [sp, #32] + ldr x8, [sp, #24] + mov x9, sp + str x10, [x9] + str x8, [x9, #8] + adrp x0, l_.str.1@PAGE + add x0, x0, l_.str.1@PAGEOFF + bl _printf + ldp x29, x30, [sp, #96] ; 16-byte Folded Reload + add sp, sp, #112 + ret + .cfi_endproc + ; -- End function + .globl _main ; -- Begin function main + .p2align 2 +_main: ; @main + .cfi_startproc +; %bb.0: + sub sp, sp, #32 + stp x29, x30, [sp, #16] ; 16-byte Folded Spill + add x29, sp, #16 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + mov x9, sp + mov x8, #9 + str x8, [x9] + mov x0, #1 + mov x1, #2 + mov x2, #3 + mov x3, #4 + mov x4, #5 + mov x5, #6 + mov x6, #7 + mov x7, #8 + bl _SillyFunction + mov w0, #0 + ldp x29, x30, [sp, #16] ; 16-byte Folded Reload + add sp, sp, #32 + ret + .cfi_endproc + ; -- End function + .section __TEXT,__cstring,cstring_literals +l_.str: ; @.str + .asciz "This example hurts my brain: %ld\n" + +l_.str.1: ; @.str.1 + .asciz "This example hurts my brain: %ld %ld\n" + +.subsections_via_symbols