mirror of
https://github.com/pkivolowitz/asm_book.git
synced 2026-06-21 01:46:46 +08:00
the sample assembly programs here have been converged
This commit is contained in:
parent
cc1a4a77e3
commit
8b00b33c75
3 changed files with 304 additions and 0 deletions
89
section_2/float/README.md
Normal file
89
section_2/float/README.md
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
# Section 2 / Floating Point
|
||||
|
||||
This chapter deals exclusively with the handling of floating point
|
||||
operations on the AARCH64 platform.
|
||||
|
||||
## What are Floating Point Numbers?
|
||||
|
||||
Let's first begin with an understanding of what floating point numbers
|
||||
are. [That can be found here](./what.md).
|
||||
|
||||
The TL;DR is that floating point numbers are approximations with double
|
||||
precision being better approximations that single precision.
|
||||
|
||||
Floating point numbers have a sign bit (for signed floating points), an
|
||||
exponent which controls the range of representable numbers, and a
|
||||
mantissa that controls precision.
|
||||
|
||||
## Floating Point Registers
|
||||
|
||||
There are 31 general registers (the X and W registers). Similarly there
|
||||
are 31 floating point registers which are reused for single, double and
|
||||
vector (SIMD - Single Instruction Multiple Data) instructions.
|
||||
|
||||
A bit more detail [is provided here](./working.md).
|
||||
|
||||
## Rounding and Truncation
|
||||
|
||||
Truncation is part of casting `float` and `double` to `int` and `long`.
|
||||
|
||||
Rounding is important too.
|
||||
|
||||
[Coverage on rounding and truncation is found here](./rounding.md).
|
||||
|
||||
## Loading Floating Point Numbers into Registers
|
||||
|
||||
This is a little confusing because some values can be loaded from
|
||||
arguments in the `fmov` instruction. For example, 1.0 can be `fmov`ed.
|
||||
Trying to do the same for 1.1 will fail. Remember that AARCH64
|
||||
instructions are always 32 bits wide and that floating point numbers are
|
||||
at least that size.
|
||||
|
||||
[This chapter](./literals.md) covers the loading of floating numbers
|
||||
into registers. A sample program is linked below.
|
||||
|
||||
## Nuances of `fmov`
|
||||
|
||||
As indicated above, you can `fmov` a floating point literal into a
|
||||
register. Except when you can't. Well, mostly you can't.
|
||||
|
||||
Additionally, there are some rules about using `fmov` between registers.
|
||||
|
||||
[This chapter](../../not_written_yet.md) covers the nuances of using
|
||||
`fmov`.
|
||||
|
||||
## Half Precision Floating Point Numbers
|
||||
|
||||
Often used in Computer Graphics, half precision floats fit within 16
|
||||
bits, the size of a `short`.
|
||||
|
||||
The TL;DR here is: Avoid them.
|
||||
|
||||
[This chapter explains why](./half.md).
|
||||
|
||||
## SIMD
|
||||
|
||||
There are two types of SIMD instruction sets available in the AARCH64
|
||||
ISA but the makers of processors are not obligated to implement them on
|
||||
any particular chip.
|
||||
|
||||
The first kind is NEON.
|
||||
[This is described here.](./../../not_written_yet.md)
|
||||
|
||||
The second kind of Scalable Vector Extension (SVE) for which we do not
|
||||
have near-term plans to cover.
|
||||
|
||||
## Demo Programs in this Chapter
|
||||
|
||||
In case you want to get right to the code, here are the demos presented
|
||||
in this chapter.
|
||||
|
||||
If you receive the assembly language files with a lower case extension,
|
||||
you will need to make the `.s` extension into `.S`.
|
||||
|
||||
| Link | Contents | Converged |
|
||||
| ---- | -------- | --------- |
|
||||
| [Link](./float_dump.cpp) | Deconstructs floating point values | NA |
|
||||
| [Link](./asm_rounding.S) | Demonstrates some rounding in asm | Yes |
|
||||
| [Link](./rounding.cpp) | Demonstrates some rounding in C++ | NA |
|
||||
| [Link](./literals.S) | Demonstrates dealing with floating point literals | Yes |
|
||||
116
section_2/float/apple-linux-convergence.S
Normal file
116
section_2/float/apple-linux-convergence.S
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
/* Macros to permit the "same" assembly language to build on ARM64
|
||||
Linux systems as well as Apple Silicon systems.
|
||||
|
||||
See the fuller documentation at:
|
||||
https://github.com/pkivolowitz/asm_book/blob/main/macros/README.md
|
||||
|
||||
Perry Kivolowitz
|
||||
A Gentle Introduction to Assembly Language
|
||||
*/
|
||||
|
||||
.macro GLD_PTR xreg, label
|
||||
#if defined(__APPLE__)
|
||||
adrp \xreg, _\label@GOTPAGE
|
||||
ldr \xreg, [\xreg, _\label@GOTPAGEOFF]
|
||||
#else
|
||||
ldr \xreg, =\label
|
||||
ldr \xreg, [\xreg]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro GLD_ADDR xreg, label // Get a global address
|
||||
#if defined(__APPLE__)
|
||||
adrp \xreg, _\label@GOTPAGE
|
||||
add \xreg, \xreg, _\label@GOTPAGEOFF
|
||||
#else
|
||||
ldr \xreg, =\label
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro LLD_ADDR xreg, label
|
||||
#if defined(__APPLE__)
|
||||
adrp \xreg, \label@PAGE
|
||||
add \xreg, \xreg, \label@PAGEOFF
|
||||
#else
|
||||
ldr \xreg, =\label
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro LLD_DBL xreg, dreg, label
|
||||
#if defined(__APPLE__)
|
||||
adrp \xreg, \label@PAGE
|
||||
add \xreg, \xreg, \label@PAGEOFF
|
||||
ldur \dreg, [\xreg]
|
||||
// fmov \dreg, \xreg
|
||||
#else
|
||||
ldr \xreg, =\label
|
||||
ldur \dreg, [\xreg]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro LLD_FLT xreg, sreg, label
|
||||
#if defined(__APPLE__)
|
||||
adrp \xreg, \label@PAGE
|
||||
add \xreg, \xreg, \label@PAGEOFF
|
||||
ldur \sreg, [\xreg]
|
||||
#else
|
||||
ldr \xreg, =\label
|
||||
ldur \sreg, [\xreg]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro GLABEL label
|
||||
#if defined(__APPLE__)
|
||||
.global _\label
|
||||
#else
|
||||
.global \label
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro MAIN
|
||||
#if defined(__APPLE__)
|
||||
_main:
|
||||
#else
|
||||
main:
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro CRT label
|
||||
#if defined(__APPLE__)
|
||||
bl _\label
|
||||
#else
|
||||
bl \label
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro START_PROC // after starting label
|
||||
.cfi_startproc
|
||||
.endm
|
||||
|
||||
.macro END_PROC // after the return
|
||||
.cfi_endproc
|
||||
.endm
|
||||
|
||||
.macro PUSH_P a, b
|
||||
stp \a, \b, [sp, -16]!
|
||||
.endm
|
||||
|
||||
.macro PUSH_R a
|
||||
str \a, [sp, -16]!
|
||||
.endm
|
||||
|
||||
.macro POP_P a, b
|
||||
ldp \a, \b, [sp], 16
|
||||
.endm
|
||||
|
||||
.macro POP_R a
|
||||
ldr \a, [sp], 16
|
||||
.endm
|
||||
|
||||
.macro MIN src_a, src_b, dest
|
||||
csel \dest, \src_a, \src_b, GT
|
||||
.endm
|
||||
|
||||
.macro MAX src_a, src_b, dest
|
||||
csel \dest, \src_a, \src_b, LT
|
||||
.endm
|
||||
99
section_2/float/test.s
Normal file
99
section_2/float/test.s
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
.section __TEXT,__text,regular,pure_instructions
|
||||
.build_version macos, 13, 0 sdk_version 13, 1
|
||||
.globl _main ; -- Begin function main
|
||||
.p2align 2
|
||||
_main: ; @main
|
||||
.cfi_startproc
|
||||
; %bb.0:
|
||||
sub sp, sp, #80
|
||||
stp x22, x21, [sp, #32] ; 16-byte Folded Spill
|
||||
stp x20, x19, [sp, #48] ; 16-byte Folded Spill
|
||||
stp x29, x30, [sp, #64] ; 16-byte Folded Spill
|
||||
add x29, sp, #64
|
||||
.cfi_def_cfa w29, 16
|
||||
.cfi_offset w30, -8
|
||||
.cfi_offset w29, -16
|
||||
.cfi_offset w19, -24
|
||||
.cfi_offset w20, -32
|
||||
.cfi_offset w21, -40
|
||||
.cfi_offset w22, -48
|
||||
Lloh0:
|
||||
adrp x20, _d@PAGE
|
||||
Lloh1:
|
||||
add x20, x20, _d@PAGEOFF
|
||||
ldr d0, [x20]
|
||||
Lloh2:
|
||||
adrp x21, _f@PAGE
|
||||
Lloh3:
|
||||
add x21, x21, _f@PAGEOFF
|
||||
ldr s1, [x21]
|
||||
fcvt d1, s1
|
||||
str d1, [sp, #16]
|
||||
str d0, [sp, #8]
|
||||
str xzr, [sp]
|
||||
Lloh4:
|
||||
adrp x19, l_.str@PAGE
|
||||
Lloh5:
|
||||
add x19, x19, l_.str@PAGEOFF
|
||||
mov x0, x19
|
||||
bl _printf
|
||||
ldr d0, [x20, #8]
|
||||
ldr s1, [x21, #4]
|
||||
fcvt d1, s1
|
||||
str d1, [sp, #16]
|
||||
str d0, [sp, #8]
|
||||
mov w8, #1
|
||||
str x8, [sp]
|
||||
mov x0, x19
|
||||
bl _printf
|
||||
ldr d0, [x20, #16]
|
||||
ldr s1, [x21, #8]
|
||||
fcvt d1, s1
|
||||
str d1, [sp, #16]
|
||||
str d0, [sp, #8]
|
||||
mov w8, #2
|
||||
str x8, [sp]
|
||||
mov x0, x19
|
||||
bl _printf
|
||||
ldr d0, [x20, #24]
|
||||
ldr s1, [x21, #12]
|
||||
fcvt d1, s1
|
||||
str d1, [sp, #16]
|
||||
str d0, [sp, #8]
|
||||
mov w8, #3
|
||||
str x8, [sp]
|
||||
mov x0, x19
|
||||
bl _printf
|
||||
mov w0, #0
|
||||
ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
|
||||
ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
|
||||
ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
|
||||
add sp, sp, #80
|
||||
ret
|
||||
.loh AdrpAdd Lloh4, Lloh5
|
||||
.loh AdrpAdd Lloh2, Lloh3
|
||||
.loh AdrpAdd Lloh0, Lloh1
|
||||
.cfi_endproc
|
||||
; -- End function
|
||||
.section __DATA,__data
|
||||
.globl _d ; @d
|
||||
.p2align 3
|
||||
_d:
|
||||
.quad 0x3ff8e38da3c21188 ; double 1.555555
|
||||
.quad 0x40055554fbdad752 ; double 2.6666660000000002
|
||||
.quad 0x400e38e325d4a5df ; double 3.7777769999999999
|
||||
.quad 0x40138e38a7e73a36 ; double 4.8888879999999997
|
||||
|
||||
.globl _f ; @f
|
||||
.p2align 2
|
||||
_f:
|
||||
.long 0x3f8e38e3 ; float 1.11111104
|
||||
.long 0x400e38e3 ; float 2.22222209
|
||||
.long 0x40555554 ; float 3.33333302
|
||||
.long 0x408e38e3 ; float 4.44444418
|
||||
|
||||
.section __TEXT,__cstring,cstring_literals
|
||||
l_.str: ; @.str
|
||||
.asciz "index %ld double %f float %f\n"
|
||||
|
||||
.subsections_via_symbols
|
||||
Loading…
Reference in a new issue