mirror of
https://github.com/pkivolowitz/asm_book.git
synced 2026-06-21 02:26:59 +08:00
big refactoring and sprucing up of programs and text
This commit is contained in:
parent
8b00b33c75
commit
5fc9422143
8 changed files with 295 additions and 210 deletions
|
|
@ -1,4 +1,6 @@
|
||||||
.global main
|
#include "apple-linux-convergence.S"
|
||||||
|
|
||||||
|
GLABEL main
|
||||||
.text
|
.text
|
||||||
.align 2
|
.align 2
|
||||||
|
|
||||||
|
|
@ -7,77 +9,95 @@ dmore .req d21
|
||||||
ndless .req d22
|
ndless .req d22
|
||||||
ndmore .req d23
|
ndmore .req d23
|
||||||
|
|
||||||
Emit: str x30, [sp, -16]!
|
Emit:
|
||||||
bl printf
|
START_PROC
|
||||||
ldr x30, [sp], 16
|
PUSH_P x29, x30
|
||||||
|
mov x29, sp
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
PUSH_P x1, x2
|
||||||
|
CRT printf
|
||||||
|
add sp, sp, 16
|
||||||
|
#else
|
||||||
|
CRT printf
|
||||||
|
#endif
|
||||||
|
POP_P x29, x30
|
||||||
ret
|
ret
|
||||||
|
END_PROC
|
||||||
|
|
||||||
main: str x30, [sp, -16]!
|
MAIN
|
||||||
stp dless, dmore, [sp, -16]!
|
START_PROC
|
||||||
stp ndless, ndmore, [sp, -16]!
|
PUSH_P x29, x30
|
||||||
|
stp dless, dmore, [sp, -16]!
|
||||||
|
stp ndless, ndmore, [sp, -16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
ldr x0, =vless
|
LLD_ADDR x0, leg
|
||||||
ldr dless, [x0]
|
CRT printf
|
||||||
ldr dmore, [x0, 8]
|
|
||||||
ldr ndless, [x0, 16]
|
|
||||||
ldr ndmore, [x0, 24]
|
|
||||||
|
|
||||||
//-fcvtps----------------------
|
LLD_ADDR x0, vless
|
||||||
fcvtps x1, dless
|
ldr dless, [x0]
|
||||||
fcvtps x2, dmore
|
ldr dmore, [x0, 8]
|
||||||
ldr x0, =fmt1
|
ldr ndless, [x0, 16]
|
||||||
bl Emit
|
ldr ndmore, [x0, 24]
|
||||||
|
|
||||||
fcvtps x1, ndless
|
//-fcvtps- Floating-point Convert to Signed integer, rounding toward Plus infinity
|
||||||
fcvtps x2, ndmore
|
fcvtps x1, dless
|
||||||
ldr x0, =fmt1
|
fcvtps x2, dmore
|
||||||
bl Emit
|
LLD_ADDR x0, fmt1
|
||||||
//-fcvtms-----------------------
|
bl Emit
|
||||||
fcvtms x1, dless
|
|
||||||
fcvtms x2, dmore
|
|
||||||
ldr x0, =fmt2
|
|
||||||
bl Emit
|
|
||||||
|
|
||||||
fcvtms x1, ndless
|
fcvtps x1, ndless
|
||||||
fcvtms x2, ndmore
|
fcvtps x2, ndmore
|
||||||
ldr x0, =fmt2
|
LLD_ADDR x0, fmt1
|
||||||
bl Emit
|
bl Emit
|
||||||
//-fcvtzs-----------------------
|
//-fcvtns- Floating-point Convert to Signed integer, rounding to nearest with ties to even (scalar).
|
||||||
fcvtzs x1, dless
|
fcvtns x1, dless
|
||||||
fcvtzs x2, dmore
|
fcvtns x2, dmore
|
||||||
ldr x0, =fmt4
|
LLD_ADDR x0, fmt2
|
||||||
bl Emit
|
bl Emit
|
||||||
|
|
||||||
fcvtzs x1, ndless
|
fcvtns x1, ndless
|
||||||
fcvtzs x2, ndmore
|
fcvtns x2, ndmore
|
||||||
ldr x0, =fmt4
|
LLD_ADDR x0, fmt2
|
||||||
bl Emit
|
bl Emit
|
||||||
//-fcvtas-----------------------
|
//-fcvtzs- Floating-point Convert to Signed integer, rounding toward Zero (scalar).
|
||||||
fcvtas x1, dless
|
fcvtzs x1, dless
|
||||||
fcvtas x2, dmore
|
fcvtzs x2, dmore
|
||||||
ldr x0, =fmt3
|
LLD_ADDR x0, fmt4
|
||||||
bl Emit
|
bl Emit
|
||||||
|
|
||||||
fcvtas x1, ndless
|
fcvtzs x1, ndless
|
||||||
fcvtas x2, ndmore
|
fcvtzs x2, ndmore
|
||||||
ldr x0, =fmt3
|
LLD_ADDR x0, fmt4
|
||||||
bl Emit
|
bl Emit
|
||||||
|
//-fcvtas- Floating-point Convert to Signed integer, rounding to nearest with ties to Away (scalar).
|
||||||
|
fcvtas x1, dless
|
||||||
|
fcvtas x2, dmore
|
||||||
|
LLD_ADDR x0, fmt3
|
||||||
|
bl Emit
|
||||||
|
|
||||||
|
fcvtas x1, ndless
|
||||||
|
fcvtas x2, ndmore
|
||||||
|
LLD_ADDR x0, fmt3
|
||||||
|
bl Emit
|
||||||
//------------------------------
|
//------------------------------
|
||||||
|
|
||||||
ldp ndless, ndmore, [sp], 16
|
ldp ndless, ndmore, [sp], 16
|
||||||
ldp dless, dmore, [sp], 16
|
ldp dless, dmore, [sp], 16
|
||||||
ldr x30, [sp], 16
|
POP_P x29, x30
|
||||||
mov w0, wzr
|
mov w0, wzr
|
||||||
ret
|
ret
|
||||||
|
END_PROC
|
||||||
.section .rodata
|
|
||||||
|
.data
|
||||||
vless: .double 5.49
|
vless: .double 5.49
|
||||||
vmore: .double 5.51
|
vmore: .double 5.51
|
||||||
nvless: .double -5.49
|
nvless: .double -5.49
|
||||||
nvmore: .double -5.51
|
nvmore: .double -5.51
|
||||||
fmt1: .asciz "fcvtps less: %d more: %d\n"
|
fmt1: .asciz "fcvtps less: %d more: %d\n"
|
||||||
fmt2: .asciz "fcvtms less: %d more: %d\n"
|
fmt2: .asciz "fcvtns less: %d more: %d\n"
|
||||||
fmt3: .asciz "fcvtta less: %d more: %d\n"
|
fmt3: .asciz "fcvtta less: %d more: %d\n"
|
||||||
fmt4: .asciz "fcvtzs less: %d more: %d\n"
|
fmt4: .asciz "fcvtzs less: %d more: %d\n"
|
||||||
|
leg: .asciz "less values are +/- 5.49. more values are +/- 5.51.\n"
|
||||||
|
|
||||||
.end
|
.end
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
/* Perry Kivolowitz
|
/* Perry Kivolowitz
|
||||||
Professor and Chair of Computer Science
|
A Gentle Introduction to Assembly Language
|
||||||
Carthage College
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
@ -12,24 +11,28 @@
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
const int BIASD = 1023;
|
const int BIASD = 1023; // biasing value for double exponents
|
||||||
const int BIASF = 127;
|
const int BIASF = 127; // biasing value for floats
|
||||||
|
|
||||||
const int FRAC_SIZD = 52;
|
// The mantissa controls precision.
|
||||||
const int FRAC_SIZF = 23;
|
|
||||||
|
|
||||||
const int EXPO_SIZD = 11;
|
const int FRAC_SIZD = 52; // number of bits in double's mantissa
|
||||||
const int EXPO_SIZF = 8;
|
const int FRAC_SIZF = 23; // number of bits in float's mantissa
|
||||||
|
|
||||||
|
// The exponent controls range.
|
||||||
|
|
||||||
|
const int EXPO_SIZD = 11; // number of bits in a double's exponent
|
||||||
|
const int EXPO_SIZF = 8; // number of bits in a float's exponent
|
||||||
|
|
||||||
const int SIGN_SIZE = 1;
|
const int SIGN_SIZE = 1;
|
||||||
|
|
||||||
struct SP {
|
struct SP { // construction of a float
|
||||||
unsigned int frac : FRAC_SIZF;
|
unsigned int frac : FRAC_SIZF;
|
||||||
unsigned int expo : EXPO_SIZF;
|
unsigned int expo : EXPO_SIZF;
|
||||||
unsigned int sign : SIGN_SIZE;
|
unsigned int sign : SIGN_SIZE;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct DP {
|
struct DP { // construction of a double
|
||||||
unsigned long frac : FRAC_SIZD;
|
unsigned long frac : FRAC_SIZD;
|
||||||
unsigned long expo : EXPO_SIZD;
|
unsigned long expo : EXPO_SIZD;
|
||||||
unsigned long sign : SIGN_SIZE;
|
unsigned long sign : SIGN_SIZE;
|
||||||
|
|
@ -61,7 +64,9 @@ template<class T>
|
||||||
string MakeEquation(T & u, int bias) {
|
string MakeEquation(T & u, int bias) {
|
||||||
stringstream ss;
|
stringstream ss;
|
||||||
bool is_double = (bias == BIASD);
|
bool is_double = (bias == BIASD);
|
||||||
ss << (u.sign ? "-" : "") << dec << setprecision(11) << 1.0 + DeBinary(is_double, u.frac) << " x 2^" << (u.expo - bias);
|
ss << (u.sign ? "-" : "") << dec << setprecision(11);
|
||||||
|
ss << 1.0 + DeBinary(is_double, u.frac);
|
||||||
|
ss << " x 2^" << (u.expo - bias);
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -69,22 +74,26 @@ int main(int argc, char ** argv) {
|
||||||
Double d;
|
Double d;
|
||||||
Single f;
|
Single f;
|
||||||
|
|
||||||
const int fore_space = 20;
|
const int fore_space = 18;
|
||||||
const int field_space = 25;
|
const int field_space = 20;
|
||||||
|
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
cerr << "Must supply a floating point value as a command line argument.\n";
|
cerr << "Requires a floating point value on command line .\n";
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
d.d = atof(argv[1]);
|
d.d = atof(argv[1]);
|
||||||
f.f = float(d.d);
|
f.f = float(d.d);
|
||||||
|
|
||||||
cout << left << setw(fore_space) << "Component" << left << setw(25) << "Double";
|
cout << left << setw(fore_space) << "Component" << left;
|
||||||
cout << left << setw(field_space) << "Float" << "Comment" << endl;
|
cout << setw(field_space);
|
||||||
|
cout << "Double" << left << setw(field_space) << "Float";
|
||||||
|
cout << "Comment" << endl;
|
||||||
|
|
||||||
cout << left << setw(fore_space) << "Value:" << setw(25) << setprecision(10) << d.d;
|
cout << left << setw(fore_space) << "Value:" << setw(field_space);
|
||||||
|
cout << setprecision(10) << d.d;
|
||||||
cout << setw(field_space) << setprecision(10) << f.f;
|
cout << setw(field_space) << setprecision(10) << f.f;
|
||||||
cout << "Delta(F - D): " << setw(16) << setprecision(10) << f.f - d.d << endl;
|
cout << "Delta(F - D): " << setw(16) << setprecision(10);
|
||||||
|
cout << f.f - d.d << endl;
|
||||||
|
|
||||||
cout << left << setw(fore_space) << "Sign:";
|
cout << left << setw(fore_space) << "Sign:";
|
||||||
cout << setw(field_space) << (bool)d.D.sign;
|
cout << setw(field_space) << (bool)d.D.sign;
|
||||||
|
|
@ -107,28 +116,38 @@ int main(int argc, char ** argv) {
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
|
||||||
cout << setw(fore_space) << "Halves:";
|
cout << setw(fore_space) << "Halves:";
|
||||||
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 1)) & 1);
|
cout << setw(field_space) << hex;
|
||||||
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 1)) & 1);
|
cout << ((d.D.frac >> (FRAC_SIZD - 1)) & 1);
|
||||||
|
cout << setw(field_space) << hex;
|
||||||
|
cout << ((f.F.frac >> (FRAC_SIZF - 1)) & 1);
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
|
||||||
cout << setw(fore_space) << "Quarters:";
|
cout << setw(fore_space) << "Quarters:";
|
||||||
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 2)) & 1);
|
cout << setw(field_space) << hex;
|
||||||
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 2)) & 1);
|
cout << ((d.D.frac >> (FRAC_SIZD - 2)) & 1);
|
||||||
|
cout << setw(field_space) << hex;
|
||||||
|
cout << ((f.F.frac >> (FRAC_SIZF - 2)) & 1);
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
|
||||||
cout << setw(fore_space) << "Eighths:";
|
cout << setw(fore_space) << "Eighths:";
|
||||||
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 3)) & 1);
|
cout << setw(field_space) << hex;
|
||||||
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 3)) & 1);
|
cout << ((d.D.frac >> (FRAC_SIZD - 3)) & 1);
|
||||||
|
cout << setw(field_space) << hex;
|
||||||
|
cout << ((f.F.frac >> (FRAC_SIZF - 3)) & 1);
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
|
||||||
cout << setw(fore_space) << "Sixteenths:";
|
cout << setw(fore_space) << "Sixteenths:";
|
||||||
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 4)) & 1);
|
cout << setw(field_space) << hex;
|
||||||
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 4)) & 1);
|
cout << ((d.D.frac >> (FRAC_SIZD - 4)) & 1);
|
||||||
|
cout << setw(field_space) << hex;
|
||||||
|
cout << ((f.F.frac >> (FRAC_SIZF - 4)) & 1);
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
|
||||||
cout << setw(fore_space) << "Thirty seconds:";
|
cout << setw(fore_space) << "Thirty seconds:";
|
||||||
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 5)) & 1);
|
cout << setw(field_space) << hex;
|
||||||
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 5)) & 1);
|
cout << ((d.D.frac >> (FRAC_SIZD - 5)) & 1);
|
||||||
|
cout << setw(field_space) << hex;
|
||||||
|
cout << ((f.F.frac >> (FRAC_SIZF - 5)) & 1);
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
|
||||||
cout << setw(fore_space) << "Full fraction:";
|
cout << setw(fore_space) << "Full fraction:";
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,8 @@ themselves, using floating point literals is extremely constrained.
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
```asm
|
```asm
|
||||||
fmov d0, 1 // 1
|
fmov d0, 1 // 1
|
||||||
fmov d0, 1.1 // 2
|
fmov d0, 1.1 // 2
|
||||||
```
|
```
|
||||||
|
|
||||||
`Line 1` will pass muster but `Line 2` will cause an error.
|
`Line 1` will pass muster but `Line 2` will cause an error.
|
||||||
|
|
@ -150,13 +150,13 @@ what that magic is.
|
||||||
Build the program with the `-g` option to enable debugging using GDB.
|
Build the program with the `-g` option to enable debugging using GDB.
|
||||||
|
|
||||||
```text
|
```text
|
||||||
$ gcc -g t.s
|
% gcc -g t.s
|
||||||
```
|
```
|
||||||
|
|
||||||
Then launch GDB on the executable:
|
Then launch GDB on the executable:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
$ gdb a.out
|
% gdb a.out
|
||||||
```
|
```
|
||||||
|
|
||||||
Set a breakpoint on line 6.
|
Set a breakpoint on line 6.
|
||||||
|
|
|
||||||
|
|
@ -1,38 +1,64 @@
|
||||||
.global main
|
#include "apple-linux-convergence.S"
|
||||||
|
|
||||||
|
GLABEL main
|
||||||
.text
|
.text
|
||||||
.align 2
|
.p2align 2
|
||||||
|
|
||||||
counter .req x20
|
counter .req x20
|
||||||
dptr .req x21
|
dptr .req x21
|
||||||
fptr .req x22
|
fptr .req x22
|
||||||
.equ max, 4
|
.equ max, 4
|
||||||
|
|
||||||
main: stp counter, x30, [sp, -16]!
|
MAIN
|
||||||
stp dptr, fptr, [sp, -16]!
|
START_PROC
|
||||||
ldr dptr, =d
|
PUSH_P counter, x30
|
||||||
ldr fptr, =f
|
PUSH_P dptr, fptr
|
||||||
mov counter, xzr
|
PUSH_R x29
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
1: cmp counter, max
|
LLD_ADDR dptr, d
|
||||||
beq 2f
|
LLD_ADDR fptr, f
|
||||||
|
mov counter, xzr
|
||||||
|
|
||||||
ldr d0, [dptr, counter, lsl 3]
|
1: cmp counter, max
|
||||||
ldr s1, [fptr, counter, lsl 2]
|
beq 2f
|
||||||
fcvt d1, s1
|
ldr d0, [dptr, counter, lsl 3]
|
||||||
ldr x0, =fmt
|
ldr s1, [fptr, counter, lsl 2]
|
||||||
add counter, counter, 1
|
fcvt d1, s1
|
||||||
mov x1, counter
|
LLD_ADDR x0, fmt
|
||||||
bl printf
|
add counter, counter, 1
|
||||||
b 1b
|
mov x1, counter
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
/*
|
||||||
|
Give us some stack space. Then read the printf template
|
||||||
|
string right to left. Variadics on the Mac are difficult
|
||||||
|
to get right. Remember that printf never prints floats.
|
||||||
|
Only doubles. Internally, floats are converted to double.
|
||||||
|
See the fcvt instruction above.
|
||||||
|
*/
|
||||||
|
sub sp, sp, 32
|
||||||
|
str d1, [sp, 16]
|
||||||
|
str d0, [sp, 8]
|
||||||
|
str x1, [sp]
|
||||||
|
CRT printf
|
||||||
|
add sp, sp, 32
|
||||||
|
#else
|
||||||
|
CRT printf
|
||||||
|
#endif
|
||||||
|
b 1b
|
||||||
|
|
||||||
2: ldp dptr, fptr, [sp], 16
|
2: POP_R x29
|
||||||
ldp counter, x30, [sp], 16
|
POP_P dptr, fptr
|
||||||
mov w0, wzr
|
POP_P counter, x30
|
||||||
|
mov w0, wzr
|
||||||
|
END_PROC
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.data
|
.data
|
||||||
fmt: .asciz "%d %f %f\n"
|
fmt: .asciz "index %ld double %f float %f\n"
|
||||||
d: .double 1.111111, 2.222222, 3.333333, 4.444444
|
.p2align 3
|
||||||
f: .float 1.111111, 2.222222, 3.333333, 4.444444
|
d: .double 1.555555, 2.666666, 3.777777, 4.888888
|
||||||
|
.p2align 2
|
||||||
|
f: .float 1.111111, 2.222222, 3.333333, 4.444444
|
||||||
|
|
||||||
.end
|
.end
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,11 @@ For this chapter, I will use:
|
||||||
* Rounding means picking some fractional value and if the float's
|
* Rounding means picking some fractional value and if the float's
|
||||||
fraction is higher, you go one way and if lower, you go the other.
|
fraction is higher, you go one way and if lower, you go the other.
|
||||||
|
|
||||||
* Truncation means you don't look too closely at the fractional value.
|
* Truncation means you don't care about the fractional value. You just
|
||||||
Instead, you just eliminate the fractional part and slam the whole
|
eliminate the fractional part and slam the whole number ... one way or
|
||||||
number ... one way or the other.
|
the other.
|
||||||
|
|
||||||
|
"One way or the other" is defined next.
|
||||||
|
|
||||||
## Truncation Towards Zero
|
## Truncation Towards Zero
|
||||||
|
|
||||||
|
|
@ -30,8 +32,8 @@ Diving a little deeper, there is a choice to be made as to whether or
|
||||||
not `integer_variable` is signed or unsigned. And, whether or not
|
not `integer_variable` is signed or unsigned. And, whether or not
|
||||||
`integer_variable` is a 32 bit or 64 bit value.
|
`integer_variable` is a 32 bit or 64 bit value.
|
||||||
|
|
||||||
The instruction is `fcvtz` - convert towards zero. Then, the choice
|
The instruction is `fcvtz` - convert towards zero. Then, the choice as
|
||||||
as to whether to produce a signed or unsigned result is defined by the
|
to whether to produce a signed or unsigned result is defined by the
|
||||||
final letterL `u` or `s`.
|
final letterL `u` or `s`.
|
||||||
|
|
||||||
| Mnemonic | Meaning |
|
| Mnemonic | Meaning |
|
||||||
|
|
@ -41,7 +43,7 @@ final letterL `u` or `s`.
|
||||||
|
|
||||||
As an example of how the ARM documentation is confusing - this
|
As an example of how the ARM documentation is confusing - this
|
||||||
instruction which completely discards the fractional value is said by
|
instruction which completely discards the fractional value is said by
|
||||||
the ARM documentation as doing rounding.
|
the ARM documentation as doing rounding not truncating.
|
||||||
|
|
||||||
The the choice of source register defined whether you are converting
|
The the choice of source register defined whether you are converting
|
||||||
a double or single precision floating point value.
|
a double or single precision floating point value.
|
||||||
|
|
@ -60,14 +62,18 @@ Examples where `d` is a `double` and `f` is a `float`:
|
||||||
|
|
||||||
| C++ | Instruction |
|
| C++ | Instruction |
|
||||||
| --- | ----------- |
|
| --- | ----------- |
|
||||||
| `int32_t(d)` | `fcvtzs w0, d0` |
|
| `int32_t(d)` | `fcvtzs w0, d0` |
|
||||||
| `uint32_t(d)` | `fcvtzu w0, d0` |
|
| `uint32_t(d)` | `fcvtzu w0, d0` |
|
||||||
| `int64_t(d)` | `fcvtzs x0, d0` |
|
| `int64_t(d)` | `fcvtzs x0, d0` |
|
||||||
| `uint64_t(d)` | `fcvtzu x0, d0` |
|
| `uint64_t(d)` | `fcvtzu x0, d0` |
|
||||||
|
|
||||||
[Here](./asm_rounding.s) is a program which demonstrates various
|
[Here](./asm_rounding.S) is a program which demonstrates various
|
||||||
ways of converting doubles to integers.
|
ways of converting doubles to integers.
|
||||||
|
|
||||||
|
Note: This source code has been updated using the author's
|
||||||
|
Apple / Linux Convergence macros and can be built on both Apple Mac OS
|
||||||
|
and Linux ARM systems.
|
||||||
|
|
||||||
Let's look at:
|
Let's look at:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
|
|
@ -105,23 +111,23 @@ Notice all the values were truncated to the whole number that is
|
||||||
Truncation away from zero is not as easy. In fact, it cannot be
|
Truncation away from zero is not as easy. In fact, it cannot be
|
||||||
performed with a single instruction.
|
performed with a single instruction.
|
||||||
|
|
||||||
In C and C++:
|
In C (and C++):
|
||||||
|
|
||||||
```c
|
```c
|
||||||
iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
|
iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
|
||||||
```
|
```
|
||||||
|
|
||||||
If the `fv` is already equal to a whole number, the
|
If the `fv` is already equal to a whole number, the integer value will
|
||||||
integer value will be that whole number. Other wise the `iv` is
|
be that whole number. Other wise the `iv` is the whole number further
|
||||||
the whole number further *away from zero*.
|
*away from zero*.
|
||||||
|
|
||||||
In C++, a more sophisticated version would require `<cmath>` and
|
In C++, a more sophisticated version would require `<cmath>` and could
|
||||||
could look like:
|
look like:
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int MyTruncate(T x) {
|
int MyTruncate(T x) {
|
||||||
return int((x < 0) ? floor(x) : ceil(x));
|
return int((x < 0) ? floor(x) : ceil(x));
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -136,15 +142,15 @@ given above.
|
||||||
|
|
||||||
```asm
|
```asm
|
||||||
RoundAwayFromZero:
|
RoundAwayFromZero:
|
||||||
fcmp d0, 0
|
fcmp d0, 0
|
||||||
ble 1f
|
ble 1f
|
||||||
// Value is positive, truncate towards positive infinity (ceil)
|
// Value is positive, truncate towards positive infinity (ceil)
|
||||||
frintp d0, d0
|
frintp d0, d0
|
||||||
b 2f
|
b 2f
|
||||||
1: // Value is negative, truncate towards negative infinity (floor)
|
1: // Value is negative, truncate towards negative infinity (floor)
|
||||||
frintm d0, d0
|
frintm d0, d0
|
||||||
2: fcvtzs x0, d0
|
2: fcvtzs x0, d0
|
||||||
ret
|
ret
|
||||||
```
|
```
|
||||||
|
|
||||||
`frintp` and `frintm` will honor the source register already being
|
`frintp` and `frintm` will honor the source register already being
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,19 @@
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define MAX 4
|
||||||
|
double d[4] = { 1.555555, 2.666666, 3.777777, 4.888888 };
|
||||||
|
float f[4] = { 1.111111, 2.222222, 3.333333, 4.444444 };
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
for (long counter = 0; counter < MAX; counter++) {
|
||||||
|
printf("index %ld double %f float %f\n", counter, d[counter], f[counter]);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
extern "C" uint32_t T1(double d) {
|
extern "C" uint32_t T1(double d) {
|
||||||
return uint32_t(d);
|
return uint32_t(d);
|
||||||
}
|
}
|
||||||
|
|
@ -19,3 +33,4 @@ extern "C" int32_t T4(float f) {
|
||||||
extern "C" uint64_t T5(double d) {
|
extern "C" uint64_t T5(double d) {
|
||||||
return uint64_t(d);
|
return uint64_t(d);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
# Section 2 / What Are Floating Point Numbers?
|
# Section 2 / What Are Floating Point Numbers?
|
||||||
|
|
||||||
Before we introduce floating point instructions in the AARCH64 ISA, it is
|
Before we introduce floating point instructions in the AARCH64 ISA, it
|
||||||
worth going over exactly what a floating point value is. Integers are easy.
|
is worth going over exactly what a floating point value is. Integers are
|
||||||
They're just powers of two summed together with a single bit at one end
|
easy. They're just powers of two summed together with a single bit at
|
||||||
determining the sign (if the integer is signed).
|
one end determining the sign (if the integer is signed).
|
||||||
|
|
||||||
But what are floating numbers?
|
But what are floating numbers?
|
||||||
|
|
||||||
|
|
@ -45,41 +45,42 @@ Full fraction: 0 0
|
||||||
Equation: 1 x 2^0 1 x 2^0
|
Equation: 1 x 2^0 1 x 2^0
|
||||||
```
|
```
|
||||||
|
|
||||||
On the line marked "Value" you can see the values represented as double precision
|
On the line marked "Value" you can see the values represented as double
|
||||||
and as single precision. Under "Comment" you can see that there
|
precision and as single precision. Under "Comment" you can see that
|
||||||
is no difference between the double and the single precision numbers. Remember
|
there is no difference between the double and the single precision
|
||||||
the key thing about floating point numbers: they are approximations. Sometimes,
|
numbers. Remember the key thing about floating point numbers: they are
|
||||||
as in the case of whole numbers like 1, the approximation is exact. When there
|
approximations. Sometimes, as in the case of whole numbers like 1, the
|
||||||
is a difference, the difference will be small and printed in the Comment
|
approximation is exact. When there is a difference, the difference will
|
||||||
column.
|
be small and printed in the Comment column.
|
||||||
|
|
||||||
The Sign field is 0. This indicates that the whole floating point value is positive.
|
The Sign field is 0. This indicates that the whole floating point value
|
||||||
There are no other sign values including in the exponent. However, exponents can
|
is positive. There are no other sign values including in the exponent.
|
||||||
be negative... this is explained next.
|
However, exponents can be negative... this is explained next.
|
||||||
|
|
||||||
First, notice that the double precision exponent is 11 bits wide while the single
|
First, notice that the double precision exponent is 11 bits wide while
|
||||||
precision exponent is only 8 bits wide. Next, notice the values... 1023 and 127
|
the single precision exponent is only 8 bits wide. Next, notice the
|
||||||
respectively. The value of 1 is 1 raised to the power of 0 base 2. So why 1023
|
values... 1023 and 127 respectively. The value of 1 is 1 raised to the
|
||||||
or 127?
|
power of 0 base 2. So why 1023 or 127?
|
||||||
|
|
||||||
There is no sign bit for the exponent yet the exponent must support negative numbers.
|
There is no sign bit for the exponent yet the exponent must support
|
||||||
It does this by incorporating an offset of 1023 and 127 respectively (both representing
|
negative numbers. It does this by incorporating an offset of 1023 and
|
||||||
0). Anything above 1023 and 127 are positive exponents. Anything below these values
|
127 respectively (both representing 0). Anything above 1023 and 127 are
|
||||||
are negative exponents.
|
positive exponents. Anything below these values are negative exponents.
|
||||||
|
|
||||||
The De-biased line are the values of the exponent with their bias removed.
|
The De-biased line are the values of the exponent with their bias
|
||||||
Notice they work out to 0. So, the value of 1 is represented by 1 raised to the power of 0.
|
removed. Notice they work out to 0. So, the value of 1 is represented by
|
||||||
|
1 raised to the power of 0.
|
||||||
|
|
||||||
The Fraction has a value of zero. Where's the 1 that we've been talking about get stored?
|
The Fraction has a value of zero. Where's the 1 that we've been talking
|
||||||
It isn't. A value of 1 is always assumed to be the only value in front of the decimal place
|
about get stored? It isn't. A value of 1 is always assumed to be the
|
||||||
in a `float` or `double`. Every floating point value is 1 plus a fraction all raised to
|
only value in front of the decimal place in a `float` or `double`. Every
|
||||||
some power of 2.
|
floating point value is 1 plus a fraction all raised to some power of 2.
|
||||||
|
|
||||||
We thought we'd highlight a few of the bits in the fractional part of a floating point
|
We thought we'd highlight a few of the bits in the fractional part of a
|
||||||
number. These can be illuminating when the value being shown is in the range of
|
floating point number. These can be illuminating when the value being
|
||||||
-2 < x < 2. Notice the the values of -2 and 2 are outside this range. In other words,
|
shown is in the range of -2 < x < 2. Notice the the values of -2 and 2
|
||||||
showing the first few bits of the fraction are illuminating when the exponent works
|
are outside this range. In other words, showing the first few bits of
|
||||||
out to 0.
|
the fraction are illuminating when the exponent works out to 0.
|
||||||
|
|
||||||
* Halves - There are no halves in the value of 1.
|
* Halves - There are no halves in the value of 1.
|
||||||
|
|
||||||
|
|
@ -91,11 +92,12 @@ out to 0.
|
||||||
|
|
||||||
* Thirty Seconds - There are no thirty seconds in the value of 1.
|
* Thirty Seconds - There are no thirty seconds in the value of 1.
|
||||||
|
|
||||||
Of course, there are more fractional values to `float` and `doubles` but listing them all
|
Of course, there are more fractional values to `float` and `doubles` but
|
||||||
wouldn't be a fun tasks and we're all about fun. :)
|
listing them all wouldn't be a fun tasks and we're all about fun. :)
|
||||||
|
|
||||||
Finally, the Equation line rebuilds the floating point value in its actual "scientific"
|
Finally, the Equation line rebuilds the floating point value in its
|
||||||
notation. The value of 1 is a 1 raised to the zeroth power of 2.
|
actual "scientific" notation. The value of 1 is a 1 raised to the zeroth
|
||||||
|
power of 2.
|
||||||
|
|
||||||
How about a value of 1.5?
|
How about a value of 1.5?
|
||||||
|
|
||||||
|
|
@ -115,7 +117,7 @@ Full fraction: 0.5 0.5
|
||||||
Equation: 1.5 x 2^0 1.5 x 2^0
|
Equation: 1.5 x 2^0 1.5 x 2^0
|
||||||
```
|
```
|
||||||
|
|
||||||
The only difference is that there is a bit turned on in the fraction.
|
The only difference is that there is a bit turned on in the fraction.
|
||||||
It is the most significant bit... there is a half in one and a half.
|
It is the most significant bit... there is a half in one and a half.
|
||||||
|
|
||||||
How about 1.875?
|
How about 1.875?
|
||||||
|
|
@ -138,11 +140,10 @@ Equation: 1.875 x 2^0 1.875 x 2^0
|
||||||
|
|
||||||
How about 8.5?
|
How about 8.5?
|
||||||
|
|
||||||
This is the first time we are looking at
|
This is the first time we are looking at a value which increases the
|
||||||
a value which increases the (de-biased) exponent to non-zero.
|
(de-biased) exponent to non-zero. Things get a little more complicated.
|
||||||
Things get a little more complicated. Now, there isn't an
|
Now, there isn't an obvious mapping of the fraction bits to the final
|
||||||
obvious mapping of the fraction bits to the final number they
|
number they represent. This is the impact of the non-zero exponent.
|
||||||
represent. This is the impact of the non-zero exponent.
|
|
||||||
|
|
||||||
```text
|
```text
|
||||||
Component Double Float Comment
|
Component Double Float Comment
|
||||||
|
|
@ -160,17 +161,16 @@ Full fraction: 0.0625 0.0625
|
||||||
Equation: 1.0625 x 2^3 1.0625 x 2^3
|
Equation: 1.0625 x 2^3 1.0625 x 2^3
|
||||||
```
|
```
|
||||||
|
|
||||||
Even though there is a half in eight and a half, the Halves bit
|
Even though there is a half in eight and a half, the Halves bit is 0.
|
||||||
is 0. What is 8? Eight is a 2 raised to the power of 3. In
|
What is 8? Eight is a 2 raised to the power of 3. In other words, the
|
||||||
other words, the bit for the half in 8.5 is shifted to the
|
bit for the half in 8.5 is shifted to the right by three bits. Confirm
|
||||||
right by three bits. Confirm this by looking at the
|
this by looking at the Sixteenths. *There's our bit!*
|
||||||
Sixteenths. *There's our bit!*
|
|
||||||
|
|
||||||
Turn your attention to the Equation. 1.0625 multiplied by 8
|
Turn your attention to the Equation. 1.0625 multiplied by 8 is 8.5. Cool
|
||||||
is 8.5. Cool huh?
|
huh?
|
||||||
|
|
||||||
How about something harder? Like 8.51 - just a teensy bit
|
How about something harder? Like 8.51 - just a teensy bit different from
|
||||||
different from the previous example.
|
the previous example.
|
||||||
|
|
||||||
```text
|
```text
|
||||||
Component Double Float Comment
|
Component Double Float Comment
|
||||||
|
|
@ -189,19 +189,19 @@ Equation: 1.06375 x 2^3 1.0637500286 x 2^3
|
||||||
```
|
```
|
||||||
|
|
||||||
For the first time we're seeing that 8.51 cannot be perfectly
|
For the first time we're seeing that 8.51 cannot be perfectly
|
||||||
represented by `float`. `double` gets it right. The difference
|
represented by `float`. `double` gets it right. The difference between
|
||||||
between the `double` and `float` is the very small number shown
|
the `double` and `float` is the very small number shown on the first
|
||||||
on the first line of output.
|
line of output.
|
||||||
|
|
||||||
## When a Number is Not a Number and How About Infinity?
|
## When a Number is Not a Number and How About Infinity?
|
||||||
|
|
||||||
`NaN` is an actual value. It means `not a number`.
|
`NaN` is an actual value. It means `not a number`.
|
||||||
|
|
||||||
[Here](./floatster.cpp) is the source code to another program we
|
[Here](./floatster.cpp) is the source code to another program we have
|
||||||
have written that explores both `NaN` and `Inf`.
|
written that explores both `NaN` and `Inf`.
|
||||||
|
|
||||||
Let's examine `NaN` which is produced when you do naughty things
|
Let's examine `NaN` which is produced when you do naughty things like
|
||||||
like take the square root of a negative number.
|
take the square root of a negative number.
|
||||||
|
|
||||||
```text
|
```text
|
||||||
Enter a number (-100 causes divide by 0, -200 causes sqrt(-1): -200
|
Enter a number (-100 causes divide by 0, -200 causes sqrt(-1): -200
|
||||||
|
|
@ -213,8 +213,8 @@ NaN: 1
|
||||||
Inf: 0
|
Inf: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
`Nan` is true (for `float`) when its exponent is 0xFF and fraction
|
`Nan` is true (for `float`) when its exponent is 0xFF and fraction is
|
||||||
is not zero.
|
not zero.
|
||||||
|
|
||||||
You'll never get a `float` that is 2 raised to the power of 128 because
|
You'll never get a `float` that is 2 raised to the power of 128 because
|
||||||
that value is reserved for `NaN` and `Inf`.
|
that value is reserved for `NaN` and `Inf`.
|
||||||
|
|
@ -232,13 +232,13 @@ Inf: 1
|
||||||
```
|
```
|
||||||
|
|
||||||
Once again, notice the out-of-bounds value for the exponent: 0xFF.
|
Once again, notice the out-of-bounds value for the exponent: 0xFF.
|
||||||
Secondly, the fraction is fully zero. The sign bit specifies negative
|
Secondly, the fraction is fully zero. The sign bit specifies negative or
|
||||||
or positive infinity.
|
positive infinity.
|
||||||
|
|
||||||
## Testing for Naughty Values
|
## Testing for Naughty Values
|
||||||
|
|
||||||
Thankfully, there exists two functions that will do the inspection
|
Thankfully, there exists two functions that will do the inspection for
|
||||||
for you, looking for `Nan` and `Inf`.
|
you, looking for `Nan` and `Inf`.
|
||||||
|
|
||||||
* `isnan(floating point value)` and
|
* `isnan(floating point value)` and
|
||||||
|
|
||||||
|
|
@ -246,6 +246,6 @@ for you, looking for `Nan` and `Inf`.
|
||||||
|
|
||||||
Both of these functions work with `double` and `float`.
|
Both of these functions work with `double` and `float`.
|
||||||
|
|
||||||
Once a variable goes `NaN` or `Inf`, all subsequent operations
|
Once a variable goes `NaN` or `Inf`, all subsequent operations will
|
||||||
will remain `NaN` or `Inf` until the variable is reset to a
|
remain `NaN` or `Inf` until the variable is reset to a valid number.
|
||||||
valid number. That is, 1 + `Inf` is `Inf`, for example.
|
That is, 1 + `Inf` is `Inf`, for example.
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ We say aliases because, like the integer registers, how you reference a
|
||||||
floating point register determines how it is interpreted.
|
floating point register determines how it is interpreted.
|
||||||
|
|
||||||
For example, in the following image, note the overlap of two single
|
For example, in the following image, note the overlap of two single
|
||||||
precision floats within a single double precision floating point
|
precision floats within a single double precision floating point
|
||||||
register.
|
register.
|
||||||
|
|
||||||

|

|
||||||
|
|
@ -35,7 +35,6 @@ It is worth noting early and often that you should not mix dealing
|
||||||
with different precisions assuming that because of the overlaps in
|
with different precisions assuming that because of the overlaps in
|
||||||
space, you'll get a meaningful result.
|
space, you'll get a meaningful result.
|
||||||
|
|
||||||
The above image does not show the corresponding layout of *half
|
The above image does not show the corresponding layout of [half
|
||||||
precision* floating point registers. `H0` sits in the least
|
precision](./half.md) floating point registers. `H0` sits in the least
|
||||||
significant bits of `S0` and so on.
|
significant bits of `S0` and so on.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue