diff --git a/section_2/float/README.md b/section_2/float/README.md new file mode 100644 index 0000000..7985c09 --- /dev/null +++ b/section_2/float/README.md @@ -0,0 +1,89 @@ +# Section 2 / Floating Point + +This chapter deals exclusively with the handling of floating point +operations on the AARCH64 platform. + +## What are Floating Point Numbers? + +Let's first begin with an understanding of what floating point numbers +are. [That can be found here](./what.md). + +The TL;DR is that floating point numbers are approximations with double +precision being better approximations that single precision. + +Floating point numbers have a sign bit (for signed floating points), an +exponent which controls the range of representable numbers, and a +mantissa that controls precision. + +## Floating Point Registers + +There are 31 general registers (the X and W registers). Similarly there +are 31 floating point registers which are reused for single, double and +vector (SIMD - Single Instruction Multiple Data) instructions. + +A bit more detail [is provided here](./working.md). + +## Rounding and Truncation + +Truncation is part of casting `float` and `double` to `int` and `long`. + +Rounding is important too. + +[Coverage on rounding and truncation is found here](./rounding.md). + +## Loading Floating Point Numbers into Registers + +This is a little confusing because some values can be loaded from +arguments in the `fmov` instruction. For example, 1.0 can be `fmov`ed. +Trying to do the same for 1.1 will fail. Remember that AARCH64 +instructions are always 32 bits wide and that floating point numbers are +at least that size. + +[This chapter](./literals.md) covers the loading of floating numbers +into registers. A sample program is linked below. + +## Nuances of `fmov` + +As indicated above, you can `fmov` a floating point literal into a +register. Except when you can't. Well, mostly you can't. + +Additionally, there are some rules about using `fmov` between registers. + +[This chapter](../../not_written_yet.md) covers the nuances of using +`fmov`. + +## Half Precision Floating Point Numbers + +Often used in Computer Graphics, half precision floats fit within 16 +bits, the size of a `short`. + +The TL;DR here is: Avoid them. + +[This chapter explains why](./half.md). + +## SIMD + +There are two types of SIMD instruction sets available in the AARCH64 +ISA but the makers of processors are not obligated to implement them on +any particular chip. + +The first kind is NEON. +[This is described here.](./../../not_written_yet.md) + +The second kind of Scalable Vector Extension (SVE) for which we do not +have near-term plans to cover. + +## Demo Programs in this Chapter + +In case you want to get right to the code, here are the demos presented +in this chapter. + +If you receive the assembly language files with a lower case extension, +you will need to make the `.s` extension into `.S`. + +| Link | Contents | Converged | +| ---- | -------- | --------- | +| [Link](./float_dump.cpp) | Deconstructs floating point values | NA | +| [Link](./asm_rounding.S) | Demonstrates some rounding in asm | Yes | +| [Link](./rounding.cpp) | Demonstrates some rounding in C++ | NA | +| [Link](./literals.S) | Demonstrates dealing with floating point literals | Yes | diff --git a/section_2/float/apple-linux-convergence.S b/section_2/float/apple-linux-convergence.S new file mode 100644 index 0000000..aae5135 --- /dev/null +++ b/section_2/float/apple-linux-convergence.S @@ -0,0 +1,116 @@ +/* Macros to permit the "same" assembly language to build on ARM64 + Linux systems as well as Apple Silicon systems. + + See the fuller documentation at: + https://github.com/pkivolowitz/asm_book/blob/main/macros/README.md + + Perry Kivolowitz + A Gentle Introduction to Assembly Language +*/ + +.macro GLD_PTR xreg, label +#if defined(__APPLE__) + adrp \xreg, _\label@GOTPAGE + ldr \xreg, [\xreg, _\label@GOTPAGEOFF] +#else + ldr \xreg, =\label + ldr \xreg, [\xreg] +#endif +.endm + +.macro GLD_ADDR xreg, label // Get a global address +#if defined(__APPLE__) + adrp \xreg, _\label@GOTPAGE + add \xreg, \xreg, _\label@GOTPAGEOFF +#else + ldr \xreg, =\label +#endif +.endm + +.macro LLD_ADDR xreg, label +#if defined(__APPLE__) + adrp \xreg, \label@PAGE + add \xreg, \xreg, \label@PAGEOFF +#else + ldr \xreg, =\label +#endif +.endm + +.macro LLD_DBL xreg, dreg, label +#if defined(__APPLE__) + adrp \xreg, \label@PAGE + add \xreg, \xreg, \label@PAGEOFF + ldur \dreg, [\xreg] +// fmov \dreg, \xreg +#else + ldr \xreg, =\label + ldur \dreg, [\xreg] +#endif +.endm + +.macro LLD_FLT xreg, sreg, label +#if defined(__APPLE__) + adrp \xreg, \label@PAGE + add \xreg, \xreg, \label@PAGEOFF + ldur \sreg, [\xreg] +#else + ldr \xreg, =\label + ldur \sreg, [\xreg] +#endif +.endm + +.macro GLABEL label +#if defined(__APPLE__) + .global _\label +#else + .global \label +#endif +.endm + +.macro MAIN +#if defined(__APPLE__) +_main: +#else +main: +#endif +.endm + +.macro CRT label +#if defined(__APPLE__) + bl _\label +#else + bl \label +#endif +.endm + +.macro START_PROC // after starting label + .cfi_startproc +.endm + +.macro END_PROC // after the return + .cfi_endproc +.endm + +.macro PUSH_P a, b + stp \a, \b, [sp, -16]! +.endm + +.macro PUSH_R a + str \a, [sp, -16]! +.endm + +.macro POP_P a, b + ldp \a, \b, [sp], 16 +.endm + +.macro POP_R a + ldr \a, [sp], 16 +.endm + +.macro MIN src_a, src_b, dest + csel \dest, \src_a, \src_b, GT +.endm + +.macro MAX src_a, src_b, dest + csel \dest, \src_a, \src_b, LT +.endm diff --git a/section_2/float/test.s b/section_2/float/test.s new file mode 100644 index 0000000..90b0fee --- /dev/null +++ b/section_2/float/test.s @@ -0,0 +1,99 @@ + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 13, 0 sdk_version 13, 1 + .globl _main ; -- Begin function main + .p2align 2 +_main: ; @main + .cfi_startproc +; %bb.0: + sub sp, sp, #80 + stp x22, x21, [sp, #32] ; 16-byte Folded Spill + stp x20, x19, [sp, #48] ; 16-byte Folded Spill + stp x29, x30, [sp, #64] ; 16-byte Folded Spill + add x29, sp, #64 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + .cfi_offset w19, -24 + .cfi_offset w20, -32 + .cfi_offset w21, -40 + .cfi_offset w22, -48 +Lloh0: + adrp x20, _d@PAGE +Lloh1: + add x20, x20, _d@PAGEOFF + ldr d0, [x20] +Lloh2: + adrp x21, _f@PAGE +Lloh3: + add x21, x21, _f@PAGEOFF + ldr s1, [x21] + fcvt d1, s1 + str d1, [sp, #16] + str d0, [sp, #8] + str xzr, [sp] +Lloh4: + adrp x19, l_.str@PAGE +Lloh5: + add x19, x19, l_.str@PAGEOFF + mov x0, x19 + bl _printf + ldr d0, [x20, #8] + ldr s1, [x21, #4] + fcvt d1, s1 + str d1, [sp, #16] + str d0, [sp, #8] + mov w8, #1 + str x8, [sp] + mov x0, x19 + bl _printf + ldr d0, [x20, #16] + ldr s1, [x21, #8] + fcvt d1, s1 + str d1, [sp, #16] + str d0, [sp, #8] + mov w8, #2 + str x8, [sp] + mov x0, x19 + bl _printf + ldr d0, [x20, #24] + ldr s1, [x21, #12] + fcvt d1, s1 + str d1, [sp, #16] + str d0, [sp, #8] + mov w8, #3 + str x8, [sp] + mov x0, x19 + bl _printf + mov w0, #0 + ldp x29, x30, [sp, #64] ; 16-byte Folded Reload + ldp x20, x19, [sp, #48] ; 16-byte Folded Reload + ldp x22, x21, [sp, #32] ; 16-byte Folded Reload + add sp, sp, #80 + ret + .loh AdrpAdd Lloh4, Lloh5 + .loh AdrpAdd Lloh2, Lloh3 + .loh AdrpAdd Lloh0, Lloh1 + .cfi_endproc + ; -- End function + .section __DATA,__data + .globl _d ; @d + .p2align 3 +_d: + .quad 0x3ff8e38da3c21188 ; double 1.555555 + .quad 0x40055554fbdad752 ; double 2.6666660000000002 + .quad 0x400e38e325d4a5df ; double 3.7777769999999999 + .quad 0x40138e38a7e73a36 ; double 4.8888879999999997 + + .globl _f ; @f + .p2align 2 +_f: + .long 0x3f8e38e3 ; float 1.11111104 + .long 0x400e38e3 ; float 2.22222209 + .long 0x40555554 ; float 3.33333302 + .long 0x408e38e3 ; float 4.44444418 + + .section __TEXT,__cstring,cstring_literals +l_.str: ; @.str + .asciz "index %ld double %f float %f\n" + +.subsections_via_symbols