the sample assembly programs here have been converged

2026-06-21 01:46:46 +08:00 · 2023-01-20 17:59:13 -06:00 · 2023-01-20 17:59:13 -06:00 · 8b00b33c75
commit 8b00b33c75
parent cc1a4a77e3
3 changed files with 304 additions and 0 deletions
--- a/section_2/float/README.md
+++ b/section_2/float/README.md
@ -0,0 +1,89 @@
+# Section 2 / Floating Point
+
+This chapter deals exclusively with the handling of floating point
+operations on the AARCH64 platform.
+
+## What are Floating Point Numbers?
+
+Let's first begin with an understanding of what floating point numbers
+are. [That can be found here](./what.md).
+
+The TL;DR is that floating point numbers are approximations with double
+precision being better approximations that single precision.
+
+Floating point numbers have a sign bit (for signed floating points), an
+exponent which controls the range of representable numbers, and a
+mantissa that controls precision.
+
+## Floating Point Registers
+
+There are 31 general registers (the X and W registers). Similarly there
+are 31 floating point registers which are reused for single, double and
+vector (SIMD - Single Instruction Multiple Data) instructions.
+
+A bit more detail [is provided here](./working.md).
+
+## Rounding and Truncation
+
+Truncation is part of casting `float` and `double` to `int` and `long`.
+
+Rounding is important too.
+
+[Coverage on rounding and truncation is found here](./rounding.md).
+
+## Loading Floating Point Numbers into Registers
+
+This is a little confusing because some values can be loaded from
+arguments in the `fmov` instruction. For example, 1.0 can be `fmov`ed.
+Trying to do the same for 1.1 will fail. Remember that AARCH64
+instructions are always 32 bits wide and that floating point numbers are
+at least that size.
+
+[This chapter](./literals.md) covers the loading of floating numbers
+into registers. A sample program is linked below.
+
+## Nuances of `fmov`
+
+As indicated above, you can `fmov` a floating point literal into a
+register. Except when you can't. Well, mostly you can't.
+
+Additionally, there are some rules about using `fmov` between registers.
+
+[This chapter](../../not_written_yet.md) covers the nuances of using
+`fmov`.
+
+## Half Precision Floating Point Numbers
+
+Often used in Computer Graphics, half precision floats fit within 16
+bits, the size of a `short`.
+
+The TL;DR here is: Avoid them.
+
+[This chapter explains why](./half.md).
+
+## SIMD
+
+There are two types of SIMD instruction sets available in the AARCH64
+ISA but the makers of processors are not obligated to implement them on
+any particular chip.
+
+The first kind is NEON.
+[This is described here.](./../../not_written_yet.md)
+
+The second kind of Scalable Vector Extension (SVE) for which we do not
+have near-term plans to cover.
+
+## Demo Programs in this Chapter
+
+In case you want to get right to the code, here are the demos presented
+in this chapter.
+
+If you receive the assembly language files with a lower case extension,
+you will need to make the `.s` extension into `.S`.
+
+| Link | Contents | Converged |
+| ---- | -------- | --------- |
+| [Link](./float_dump.cpp) | Deconstructs floating point values | NA |
+| [Link](./asm_rounding.S) | Demonstrates some rounding in asm | Yes |
+| [Link](./rounding.cpp) | Demonstrates some rounding in C++ | NA |
+| [Link](./literals.S) | Demonstrates dealing with floating point literals | Yes |
--- a/section_2/float/apple-linux-convergence.S
+++ b/section_2/float/apple-linux-convergence.S
@ -0,0 +1,116 @@
+/*  Macros to permit the "same" assembly language to build on ARM64
+    Linux systems as well as Apple Silicon systems.
+
+    See the fuller documentation at:
+    https://github.com/pkivolowitz/asm_book/blob/main/macros/README.md
+
+    Perry Kivolowitz
+    A Gentle Introduction to Assembly Language
+*/
+
+.macro  GLD_PTR     xreg, label
+#if defined(__APPLE__)
+        adrp	    \xreg, _\label@GOTPAGE
+        ldr	        \xreg, [\xreg, _\label@GOTPAGEOFF]
+#else
+        ldr         \xreg, =\label
+        ldr         \xreg, [\xreg]
+#endif
+.endm
+
+.macro  GLD_ADDR    xreg, label     // Get a global address
+#if defined(__APPLE__)
+        adrp	    \xreg, _\label@GOTPAGE
+        add         \xreg, \xreg, _\label@GOTPAGEOFF
+#else
+        ldr         \xreg, =\label
+#endif
+.endm
+
+.macro  LLD_ADDR xreg, label
+#if defined(__APPLE__)
+        adrp    \xreg, \label@PAGE
+        add     \xreg, \xreg, \label@PAGEOFF
+#else
+        ldr         \xreg, =\label
+#endif
+.endm
+
+.macro  LLD_DBL xreg, dreg, label
+#if defined(__APPLE__)
+        adrp    \xreg, \label@PAGE
+        add     \xreg, \xreg, \label@PAGEOFF
+        ldur    \dreg, [\xreg]
+//      fmov    \dreg, \xreg
+#else
+        ldr     \xreg, =\label
+        ldur    \dreg, [\xreg]
+#endif
+.endm
+
+.macro  LLD_FLT xreg, sreg, label
+#if defined(__APPLE__)
+        adrp    \xreg, \label@PAGE
+        add     \xreg, \xreg, \label@PAGEOFF
+        ldur    \sreg, [\xreg]
+#else
+        ldr     \xreg, =\label
+        ldur    \sreg, [\xreg]
+#endif
+.endm
+
+.macro GLABEL label
+#if defined(__APPLE__)
+        .global _\label
+#else
+        .global \label
+#endif
+.endm
+
+.macro MAIN
+#if defined(__APPLE__)
+_main:
+#else
+main:
+#endif
+.endm
+
+.macro  CRT label
+#if defined(__APPLE__)
+        bl  _\label
+#else
+        bl  \label
+#endif
+.endm
+
+.macro  START_PROC          // after starting label
+        .cfi_startproc
+.endm
+
+.macro  END_PROC            // after the return
+        .cfi_endproc
+.endm
+
+.macro  PUSH_P  a, b 
+        stp     \a, \b, [sp, -16]!
+.endm
+
+.macro  PUSH_R  a 
+        str     \a, [sp, -16]!
+.endm
+
+.macro  POP_P   a, b 
+        ldp     \a, \b, [sp], 16
+.endm
+
+.macro  POP_R   a 
+        ldr     \a, [sp], 16
+.endm
+
+.macro  MIN     src_a, src_b, dest
+        csel    \dest, \src_a, \src_b, GT
+.endm
+
+.macro  MAX     src_a, src_b, dest
+        csel    \dest, \src_a, \src_b, LT
+.endm
--- a/section_2/float/test.s
+++ b/section_2/float/test.s
@ -0,0 +1,99 @@
+	.section	__TEXT,__text,regular,pure_instructions
+	.build_version macos, 13, 0	sdk_version 13, 1
+	.globl	_main                           ; -- Begin function main
+	.p2align	2
+_main:                                  ; @main
+	.cfi_startproc
+; %bb.0:
+	sub	sp, sp, #80
+	stp	x22, x21, [sp, #32]             ; 16-byte Folded Spill
+	stp	x20, x19, [sp, #48]             ; 16-byte Folded Spill
+	stp	x29, x30, [sp, #64]             ; 16-byte Folded Spill
+	add	x29, sp, #64
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	.cfi_offset w19, -24
+	.cfi_offset w20, -32
+	.cfi_offset w21, -40
+	.cfi_offset w22, -48
+Lloh0:
+	adrp	x20, _d@PAGE
+Lloh1:
+	add	x20, x20, _d@PAGEOFF
+	ldr	d0, [x20]
+Lloh2:
+	adrp	x21, _f@PAGE
+Lloh3:
+	add	x21, x21, _f@PAGEOFF
+	ldr	s1, [x21]
+	fcvt	d1, s1
+	str	d1, [sp, #16]
+	str	d0, [sp, #8]
+	str	xzr, [sp]
+Lloh4:
+	adrp	x19, l_.str@PAGE
+Lloh5:
+	add	x19, x19, l_.str@PAGEOFF
+	mov	x0, x19
+	bl	_printf
+	ldr	d0, [x20, #8]
+	ldr	s1, [x21, #4]
+	fcvt	d1, s1
+	str	d1, [sp, #16]
+	str	d0, [sp, #8]
+	mov	w8, #1
+	str	x8, [sp]
+	mov	x0, x19
+	bl	_printf
+	ldr	d0, [x20, #16]
+	ldr	s1, [x21, #8]
+	fcvt	d1, s1
+	str	d1, [sp, #16]
+	str	d0, [sp, #8]
+	mov	w8, #2
+	str	x8, [sp]
+	mov	x0, x19
+	bl	_printf
+	ldr	d0, [x20, #24]
+	ldr	s1, [x21, #12]
+	fcvt	d1, s1
+	str	d1, [sp, #16]
+	str	d0, [sp, #8]
+	mov	w8, #3
+	str	x8, [sp]
+	mov	x0, x19
+	bl	_printf
+	mov	w0, #0
+	ldp	x29, x30, [sp, #64]             ; 16-byte Folded Reload
+	ldp	x20, x19, [sp, #48]             ; 16-byte Folded Reload
+	ldp	x22, x21, [sp, #32]             ; 16-byte Folded Reload
+	add	sp, sp, #80
+	ret
+	.loh AdrpAdd	Lloh4, Lloh5
+	.loh AdrpAdd	Lloh2, Lloh3
+	.loh AdrpAdd	Lloh0, Lloh1
+	.cfi_endproc
+                                        ; -- End function
+	.section	__DATA,__data
+	.globl	_d                              ; @d
+	.p2align	3
+_d:
+	.quad	0x3ff8e38da3c21188              ; double 1.555555
+	.quad	0x40055554fbdad752              ; double 2.6666660000000002
+	.quad	0x400e38e325d4a5df              ; double 3.7777769999999999
+	.quad	0x40138e38a7e73a36              ; double 4.8888879999999997
+
+	.globl	_f                              ; @f
+	.p2align	2
+_f:
+	.long	0x3f8e38e3                      ; float 1.11111104
+	.long	0x400e38e3                      ; float 2.22222209
+	.long	0x40555554                      ; float 3.33333302
+	.long	0x408e38e3                      ; float 4.44444418
+
+	.section	__TEXT,__cstring,cstring_literals
+l_.str:                                 ; @.str
+	.asciz	"index %ld double %f float %f\n"
+
+.subsections_via_symbols