the sample assembly programs here have been converged

This commit is contained in:
Perry Kivolowitz 2023-01-20 17:59:13 -06:00
parent cc1a4a77e3
commit 8b00b33c75
3 changed files with 304 additions and 0 deletions

89
section_2/float/README.md Normal file
View file

@ -0,0 +1,89 @@
# Section 2 / Floating Point
This chapter deals exclusively with the handling of floating point
operations on the AARCH64 platform.
## What are Floating Point Numbers?
Let's first begin with an understanding of what floating point numbers
are. [That can be found here](./what.md).
The TL;DR is that floating point numbers are approximations with double
precision being better approximations that single precision.
Floating point numbers have a sign bit (for signed floating points), an
exponent which controls the range of representable numbers, and a
mantissa that controls precision.
## Floating Point Registers
There are 31 general registers (the X and W registers). Similarly there
are 31 floating point registers which are reused for single, double and
vector (SIMD - Single Instruction Multiple Data) instructions.
A bit more detail [is provided here](./working.md).
## Rounding and Truncation
Truncation is part of casting `float` and `double` to `int` and `long`.
Rounding is important too.
[Coverage on rounding and truncation is found here](./rounding.md).
## Loading Floating Point Numbers into Registers
This is a little confusing because some values can be loaded from
arguments in the `fmov` instruction. For example, 1.0 can be `fmov`ed.
Trying to do the same for 1.1 will fail. Remember that AARCH64
instructions are always 32 bits wide and that floating point numbers are
at least that size.
[This chapter](./literals.md) covers the loading of floating numbers
into registers. A sample program is linked below.
## Nuances of `fmov`
As indicated above, you can `fmov` a floating point literal into a
register. Except when you can't. Well, mostly you can't.
Additionally, there are some rules about using `fmov` between registers.
[This chapter](../../not_written_yet.md) covers the nuances of using
`fmov`.
## Half Precision Floating Point Numbers
Often used in Computer Graphics, half precision floats fit within 16
bits, the size of a `short`.
The TL;DR here is: Avoid them.
[This chapter explains why](./half.md).
## SIMD
There are two types of SIMD instruction sets available in the AARCH64
ISA but the makers of processors are not obligated to implement them on
any particular chip.
The first kind is NEON.
[This is described here.](./../../not_written_yet.md)
The second kind of Scalable Vector Extension (SVE) for which we do not
have near-term plans to cover.
## Demo Programs in this Chapter
In case you want to get right to the code, here are the demos presented
in this chapter.
If you receive the assembly language files with a lower case extension,
you will need to make the `.s` extension into `.S`.
| Link | Contents | Converged |
| ---- | -------- | --------- |
| [Link](./float_dump.cpp) | Deconstructs floating point values | NA |
| [Link](./asm_rounding.S) | Demonstrates some rounding in asm | Yes |
| [Link](./rounding.cpp) | Demonstrates some rounding in C++ | NA |
| [Link](./literals.S) | Demonstrates dealing with floating point literals | Yes |

View file

@ -0,0 +1,116 @@
/* Macros to permit the "same" assembly language to build on ARM64
Linux systems as well as Apple Silicon systems.
See the fuller documentation at:
https://github.com/pkivolowitz/asm_book/blob/main/macros/README.md
Perry Kivolowitz
A Gentle Introduction to Assembly Language
*/
.macro GLD_PTR xreg, label
#if defined(__APPLE__)
adrp \xreg, _\label@GOTPAGE
ldr \xreg, [\xreg, _\label@GOTPAGEOFF]
#else
ldr \xreg, =\label
ldr \xreg, [\xreg]
#endif
.endm
.macro GLD_ADDR xreg, label // Get a global address
#if defined(__APPLE__)
adrp \xreg, _\label@GOTPAGE
add \xreg, \xreg, _\label@GOTPAGEOFF
#else
ldr \xreg, =\label
#endif
.endm
.macro LLD_ADDR xreg, label
#if defined(__APPLE__)
adrp \xreg, \label@PAGE
add \xreg, \xreg, \label@PAGEOFF
#else
ldr \xreg, =\label
#endif
.endm
.macro LLD_DBL xreg, dreg, label
#if defined(__APPLE__)
adrp \xreg, \label@PAGE
add \xreg, \xreg, \label@PAGEOFF
ldur \dreg, [\xreg]
// fmov \dreg, \xreg
#else
ldr \xreg, =\label
ldur \dreg, [\xreg]
#endif
.endm
.macro LLD_FLT xreg, sreg, label
#if defined(__APPLE__)
adrp \xreg, \label@PAGE
add \xreg, \xreg, \label@PAGEOFF
ldur \sreg, [\xreg]
#else
ldr \xreg, =\label
ldur \sreg, [\xreg]
#endif
.endm
.macro GLABEL label
#if defined(__APPLE__)
.global _\label
#else
.global \label
#endif
.endm
.macro MAIN
#if defined(__APPLE__)
_main:
#else
main:
#endif
.endm
.macro CRT label
#if defined(__APPLE__)
bl _\label
#else
bl \label
#endif
.endm
.macro START_PROC // after starting label
.cfi_startproc
.endm
.macro END_PROC // after the return
.cfi_endproc
.endm
.macro PUSH_P a, b
stp \a, \b, [sp, -16]!
.endm
.macro PUSH_R a
str \a, [sp, -16]!
.endm
.macro POP_P a, b
ldp \a, \b, [sp], 16
.endm
.macro POP_R a
ldr \a, [sp], 16
.endm
.macro MIN src_a, src_b, dest
csel \dest, \src_a, \src_b, GT
.endm
.macro MAX src_a, src_b, dest
csel \dest, \src_a, \src_b, LT
.endm

99
section_2/float/test.s Normal file
View file

@ -0,0 +1,99 @@
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 13, 0 sdk_version 13, 1
.globl _main ; -- Begin function main
.p2align 2
_main: ; @main
.cfi_startproc
; %bb.0:
sub sp, sp, #80
stp x22, x21, [sp, #32] ; 16-byte Folded Spill
stp x20, x19, [sp, #48] ; 16-byte Folded Spill
stp x29, x30, [sp, #64] ; 16-byte Folded Spill
add x29, sp, #64
.cfi_def_cfa w29, 16
.cfi_offset w30, -8
.cfi_offset w29, -16
.cfi_offset w19, -24
.cfi_offset w20, -32
.cfi_offset w21, -40
.cfi_offset w22, -48
Lloh0:
adrp x20, _d@PAGE
Lloh1:
add x20, x20, _d@PAGEOFF
ldr d0, [x20]
Lloh2:
adrp x21, _f@PAGE
Lloh3:
add x21, x21, _f@PAGEOFF
ldr s1, [x21]
fcvt d1, s1
str d1, [sp, #16]
str d0, [sp, #8]
str xzr, [sp]
Lloh4:
adrp x19, l_.str@PAGE
Lloh5:
add x19, x19, l_.str@PAGEOFF
mov x0, x19
bl _printf
ldr d0, [x20, #8]
ldr s1, [x21, #4]
fcvt d1, s1
str d1, [sp, #16]
str d0, [sp, #8]
mov w8, #1
str x8, [sp]
mov x0, x19
bl _printf
ldr d0, [x20, #16]
ldr s1, [x21, #8]
fcvt d1, s1
str d1, [sp, #16]
str d0, [sp, #8]
mov w8, #2
str x8, [sp]
mov x0, x19
bl _printf
ldr d0, [x20, #24]
ldr s1, [x21, #12]
fcvt d1, s1
str d1, [sp, #16]
str d0, [sp, #8]
mov w8, #3
str x8, [sp]
mov x0, x19
bl _printf
mov w0, #0
ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
add sp, sp, #80
ret
.loh AdrpAdd Lloh4, Lloh5
.loh AdrpAdd Lloh2, Lloh3
.loh AdrpAdd Lloh0, Lloh1
.cfi_endproc
; -- End function
.section __DATA,__data
.globl _d ; @d
.p2align 3
_d:
.quad 0x3ff8e38da3c21188 ; double 1.555555
.quad 0x40055554fbdad752 ; double 2.6666660000000002
.quad 0x400e38e325d4a5df ; double 3.7777769999999999
.quad 0x40138e38a7e73a36 ; double 4.8888879999999997
.globl _f ; @f
.p2align 2
_f:
.long 0x3f8e38e3 ; float 1.11111104
.long 0x400e38e3 ; float 2.22222209
.long 0x40555554 ; float 3.33333302
.long 0x408e38e3 ; float 4.44444418
.section __TEXT,__cstring,cstring_literals
l_.str: ; @.str
.asciz "index %ld double %f float %f\n"
.subsections_via_symbols