big refactoring and sprucing up of programs and text

This commit is contained in:
Perry Kivolowitz 2023-01-20 17:59:43 -06:00
parent 8b00b33c75
commit 5fc9422143
8 changed files with 295 additions and 210 deletions

View file

@ -1,4 +1,6 @@
.global main
#include "apple-linux-convergence.S"
GLABEL main
.text
.align 2
@ -7,77 +9,95 @@ dmore .req d21
ndless .req d22
ndmore .req d23
Emit: str x30, [sp, -16]!
bl printf
ldr x30, [sp], 16
Emit:
START_PROC
PUSH_P x29, x30
mov x29, sp
#if defined(__APPLE__)
PUSH_P x1, x2
CRT printf
add sp, sp, 16
#else
CRT printf
#endif
POP_P x29, x30
ret
END_PROC
main: str x30, [sp, -16]!
MAIN
START_PROC
PUSH_P x29, x30
stp dless, dmore, [sp, -16]!
stp ndless, ndmore, [sp, -16]!
mov x29, sp
ldr x0, =vless
LLD_ADDR x0, leg
CRT printf
LLD_ADDR x0, vless
ldr dless, [x0]
ldr dmore, [x0, 8]
ldr ndless, [x0, 16]
ldr ndmore, [x0, 24]
//-fcvtps----------------------
//-fcvtps- Floating-point Convert to Signed integer, rounding toward Plus infinity
fcvtps x1, dless
fcvtps x2, dmore
ldr x0, =fmt1
LLD_ADDR x0, fmt1
bl Emit
fcvtps x1, ndless
fcvtps x2, ndmore
ldr x0, =fmt1
LLD_ADDR x0, fmt1
bl Emit
//-fcvtms-----------------------
fcvtms x1, dless
fcvtms x2, dmore
ldr x0, =fmt2
//-fcvtns- Floating-point Convert to Signed integer, rounding to nearest with ties to even (scalar).
fcvtns x1, dless
fcvtns x2, dmore
LLD_ADDR x0, fmt2
bl Emit
fcvtms x1, ndless
fcvtms x2, ndmore
ldr x0, =fmt2
fcvtns x1, ndless
fcvtns x2, ndmore
LLD_ADDR x0, fmt2
bl Emit
//-fcvtzs-----------------------
//-fcvtzs- Floating-point Convert to Signed integer, rounding toward Zero (scalar).
fcvtzs x1, dless
fcvtzs x2, dmore
ldr x0, =fmt4
LLD_ADDR x0, fmt4
bl Emit
fcvtzs x1, ndless
fcvtzs x2, ndmore
ldr x0, =fmt4
LLD_ADDR x0, fmt4
bl Emit
//-fcvtas-----------------------
//-fcvtas- Floating-point Convert to Signed integer, rounding to nearest with ties to Away (scalar).
fcvtas x1, dless
fcvtas x2, dmore
ldr x0, =fmt3
LLD_ADDR x0, fmt3
bl Emit
fcvtas x1, ndless
fcvtas x2, ndmore
ldr x0, =fmt3
LLD_ADDR x0, fmt3
bl Emit
//------------------------------
ldp ndless, ndmore, [sp], 16
ldp dless, dmore, [sp], 16
ldr x30, [sp], 16
POP_P x29, x30
mov w0, wzr
ret
END_PROC
.section .rodata
.data
vless: .double 5.49
vmore: .double 5.51
nvless: .double -5.49
nvmore: .double -5.51
fmt1: .asciz "fcvtps less: %d more: %d\n"
fmt2: .asciz "fcvtms less: %d more: %d\n"
fmt2: .asciz "fcvtns less: %d more: %d\n"
fmt3: .asciz "fcvtta less: %d more: %d\n"
fmt4: .asciz "fcvtzs less: %d more: %d\n"
leg: .asciz "less values are +/- 5.49. more values are +/- 5.51.\n"
.end

View file

@ -1,6 +1,5 @@
/* Perry Kivolowitz
Professor and Chair of Computer Science
Carthage College
A Gentle Introduction to Assembly Language
*/
#include <iostream>
@ -12,24 +11,28 @@
using namespace std;
const int BIASD = 1023;
const int BIASF = 127;
const int BIASD = 1023; // biasing value for double exponents
const int BIASF = 127; // biasing value for floats
const int FRAC_SIZD = 52;
const int FRAC_SIZF = 23;
// The mantissa controls precision.
const int EXPO_SIZD = 11;
const int EXPO_SIZF = 8;
const int FRAC_SIZD = 52; // number of bits in double's mantissa
const int FRAC_SIZF = 23; // number of bits in float's mantissa
// The exponent controls range.
const int EXPO_SIZD = 11; // number of bits in a double's exponent
const int EXPO_SIZF = 8; // number of bits in a float's exponent
const int SIGN_SIZE = 1;
struct SP {
struct SP { // construction of a float
unsigned int frac : FRAC_SIZF;
unsigned int expo : EXPO_SIZF;
unsigned int sign : SIGN_SIZE;
};
struct DP {
struct DP { // construction of a double
unsigned long frac : FRAC_SIZD;
unsigned long expo : EXPO_SIZD;
unsigned long sign : SIGN_SIZE;
@ -61,7 +64,9 @@ template<class T>
string MakeEquation(T & u, int bias) {
stringstream ss;
bool is_double = (bias == BIASD);
ss << (u.sign ? "-" : "") << dec << setprecision(11) << 1.0 + DeBinary(is_double, u.frac) << " x 2^" << (u.expo - bias);
ss << (u.sign ? "-" : "") << dec << setprecision(11);
ss << 1.0 + DeBinary(is_double, u.frac);
ss << " x 2^" << (u.expo - bias);
return ss.str();
}
@ -69,22 +74,26 @@ int main(int argc, char ** argv) {
Double d;
Single f;
const int fore_space = 20;
const int field_space = 25;
const int fore_space = 18;
const int field_space = 20;
if (argc < 2) {
cerr << "Must supply a floating point value as a command line argument.\n";
cerr << "Requires a floating point value on command line .\n";
return 1;
}
d.d = atof(argv[1]);
f.f = float(d.d);
cout << left << setw(fore_space) << "Component" << left << setw(25) << "Double";
cout << left << setw(field_space) << "Float" << "Comment" << endl;
cout << left << setw(fore_space) << "Component" << left;
cout << setw(field_space);
cout << "Double" << left << setw(field_space) << "Float";
cout << "Comment" << endl;
cout << left << setw(fore_space) << "Value:" << setw(25) << setprecision(10) << d.d;
cout << left << setw(fore_space) << "Value:" << setw(field_space);
cout << setprecision(10) << d.d;
cout << setw(field_space) << setprecision(10) << f.f;
cout << "Delta(F - D): " << setw(16) << setprecision(10) << f.f - d.d << endl;
cout << "Delta(F - D): " << setw(16) << setprecision(10);
cout << f.f - d.d << endl;
cout << left << setw(fore_space) << "Sign:";
cout << setw(field_space) << (bool)d.D.sign;
@ -107,28 +116,38 @@ int main(int argc, char ** argv) {
cout << endl;
cout << setw(fore_space) << "Halves:";
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 1)) & 1);
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 1)) & 1);
cout << setw(field_space) << hex;
cout << ((d.D.frac >> (FRAC_SIZD - 1)) & 1);
cout << setw(field_space) << hex;
cout << ((f.F.frac >> (FRAC_SIZF - 1)) & 1);
cout << endl;
cout << setw(fore_space) << "Quarters:";
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 2)) & 1);
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 2)) & 1);
cout << setw(field_space) << hex;
cout << ((d.D.frac >> (FRAC_SIZD - 2)) & 1);
cout << setw(field_space) << hex;
cout << ((f.F.frac >> (FRAC_SIZF - 2)) & 1);
cout << endl;
cout << setw(fore_space) << "Eighths:";
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 3)) & 1);
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 3)) & 1);
cout << setw(field_space) << hex;
cout << ((d.D.frac >> (FRAC_SIZD - 3)) & 1);
cout << setw(field_space) << hex;
cout << ((f.F.frac >> (FRAC_SIZF - 3)) & 1);
cout << endl;
cout << setw(fore_space) << "Sixteenths:";
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 4)) & 1);
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 4)) & 1);
cout << setw(field_space) << hex;
cout << ((d.D.frac >> (FRAC_SIZD - 4)) & 1);
cout << setw(field_space) << hex;
cout << ((f.F.frac >> (FRAC_SIZF - 4)) & 1);
cout << endl;
cout << setw(fore_space) << "Thirty seconds:";
cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 5)) & 1);
cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 5)) & 1);
cout << setw(field_space) << hex;
cout << ((d.D.frac >> (FRAC_SIZD - 5)) & 1);
cout << setw(field_space) << hex;
cout << ((f.F.frac >> (FRAC_SIZF - 5)) & 1);
cout << endl;
cout << setw(fore_space) << "Full fraction:";

View file

@ -150,13 +150,13 @@ what that magic is.
Build the program with the `-g` option to enable debugging using GDB.
```text
$ gcc -g t.s
% gcc -g t.s
```
Then launch GDB on the executable:
```text
$ gdb a.out
% gdb a.out
```
Set a breakpoint on line 6.

View file

@ -1,38 +1,64 @@
.global main
#include "apple-linux-convergence.S"
GLABEL main
.text
.align 2
.p2align 2
counter .req x20
dptr .req x21
fptr .req x22
.equ max, 4
main: stp counter, x30, [sp, -16]!
stp dptr, fptr, [sp, -16]!
ldr dptr, =d
ldr fptr, =f
MAIN
START_PROC
PUSH_P counter, x30
PUSH_P dptr, fptr
PUSH_R x29
mov x29, sp
LLD_ADDR dptr, d
LLD_ADDR fptr, f
mov counter, xzr
1: cmp counter, max
beq 2f
ldr d0, [dptr, counter, lsl 3]
ldr s1, [fptr, counter, lsl 2]
fcvt d1, s1
ldr x0, =fmt
LLD_ADDR x0, fmt
add counter, counter, 1
mov x1, counter
bl printf
#if defined(__APPLE__)
/*
Give us some stack space. Then read the printf template
string right to left. Variadics on the Mac are difficult
to get right. Remember that printf never prints floats.
Only doubles. Internally, floats are converted to double.
See the fcvt instruction above.
*/
sub sp, sp, 32
str d1, [sp, 16]
str d0, [sp, 8]
str x1, [sp]
CRT printf
add sp, sp, 32
#else
CRT printf
#endif
b 1b
2: ldp dptr, fptr, [sp], 16
ldp counter, x30, [sp], 16
2: POP_R x29
POP_P dptr, fptr
POP_P counter, x30
mov w0, wzr
END_PROC
ret
.data
fmt: .asciz "%d %f %f\n"
d: .double 1.111111, 2.222222, 3.333333, 4.444444
fmt: .asciz "index %ld double %f float %f\n"
.p2align 3
d: .double 1.555555, 2.666666, 3.777777, 4.888888
.p2align 2
f: .float 1.111111, 2.222222, 3.333333, 4.444444
.end

View file

@ -13,9 +13,11 @@ For this chapter, I will use:
* Rounding means picking some fractional value and if the float's
fraction is higher, you go one way and if lower, you go the other.
* Truncation means you don't look too closely at the fractional value.
Instead, you just eliminate the fractional part and slam the whole
number ... one way or the other.
* Truncation means you don't care about the fractional value. You just
eliminate the fractional part and slam the whole number ... one way or
the other.
"One way or the other" is defined next.
## Truncation Towards Zero
@ -30,8 +32,8 @@ Diving a little deeper, there is a choice to be made as to whether or
not `integer_variable` is signed or unsigned. And, whether or not
`integer_variable` is a 32 bit or 64 bit value.
The instruction is `fcvtz` - convert towards zero. Then, the choice
as to whether to produce a signed or unsigned result is defined by the
The instruction is `fcvtz` - convert towards zero. Then, the choice as
to whether to produce a signed or unsigned result is defined by the
final letterL `u` or `s`.
| Mnemonic | Meaning |
@ -41,7 +43,7 @@ final letterL `u` or `s`.
As an example of how the ARM documentation is confusing - this
instruction which completely discards the fractional value is said by
the ARM documentation as doing rounding.
the ARM documentation as doing rounding not truncating.
The the choice of source register defined whether you are converting
a double or single precision floating point value.
@ -65,9 +67,13 @@ Examples where `d` is a `double` and `f` is a `float`:
| `int64_t(d)` | `fcvtzs x0, d0` |
| `uint64_t(d)` | `fcvtzu x0, d0` |
[Here](./asm_rounding.s) is a program which demonstrates various
[Here](./asm_rounding.S) is a program which demonstrates various
ways of converting doubles to integers.
Note: This source code has been updated using the author's
Apple / Linux Convergence macros and can be built on both Apple Mac OS
and Linux ARM systems.
Let's look at:
```text
@ -105,18 +111,18 @@ Notice all the values were truncated to the whole number that is
Truncation away from zero is not as easy. In fact, it cannot be
performed with a single instruction.
In C and C++:
In C (and C++):
```c
iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
```
If the `fv` is already equal to a whole number, the
integer value will be that whole number. Other wise the `iv` is
the whole number further *away from zero*.
If the `fv` is already equal to a whole number, the integer value will
be that whole number. Other wise the `iv` is the whole number further
*away from zero*.
In C++, a more sophisticated version would require `<cmath>` and
could look like:
In C++, a more sophisticated version would require `<cmath>` and could
look like:
```c++
template <typename T>

View file

@ -1,5 +1,19 @@
#include <cinttypes>
#include <stdio.h>
#define MAX 4
double d[4] = { 1.555555, 2.666666, 3.777777, 4.888888 };
float f[4] = { 1.111111, 2.222222, 3.333333, 4.444444 };
int main() {
for (long counter = 0; counter < MAX; counter++) {
printf("index %ld double %f float %f\n", counter, d[counter], f[counter]);
}
return 0;
}
/*
extern "C" uint32_t T1(double d) {
return uint32_t(d);
}
@ -19,3 +33,4 @@ extern "C" int32_t T4(float f) {
extern "C" uint64_t T5(double d) {
return uint64_t(d);
}
*/

View file

@ -1,9 +1,9 @@
# Section 2 / What Are Floating Point Numbers?
Before we introduce floating point instructions in the AARCH64 ISA, it is
worth going over exactly what a floating point value is. Integers are easy.
They're just powers of two summed together with a single bit at one end
determining the sign (if the integer is signed).
Before we introduce floating point instructions in the AARCH64 ISA, it
is worth going over exactly what a floating point value is. Integers are
easy. They're just powers of two summed together with a single bit at
one end determining the sign (if the integer is signed).
But what are floating numbers?
@ -45,41 +45,42 @@ Full fraction: 0 0
Equation: 1 x 2^0 1 x 2^0
```
On the line marked "Value" you can see the values represented as double precision
and as single precision. Under "Comment" you can see that there
is no difference between the double and the single precision numbers. Remember
the key thing about floating point numbers: they are approximations. Sometimes,
as in the case of whole numbers like 1, the approximation is exact. When there
is a difference, the difference will be small and printed in the Comment
column.
On the line marked "Value" you can see the values represented as double
precision and as single precision. Under "Comment" you can see that
there is no difference between the double and the single precision
numbers. Remember the key thing about floating point numbers: they are
approximations. Sometimes, as in the case of whole numbers like 1, the
approximation is exact. When there is a difference, the difference will
be small and printed in the Comment column.
The Sign field is 0. This indicates that the whole floating point value is positive.
There are no other sign values including in the exponent. However, exponents can
be negative... this is explained next.
The Sign field is 0. This indicates that the whole floating point value
is positive. There are no other sign values including in the exponent.
However, exponents can be negative... this is explained next.
First, notice that the double precision exponent is 11 bits wide while the single
precision exponent is only 8 bits wide. Next, notice the values... 1023 and 127
respectively. The value of 1 is 1 raised to the power of 0 base 2. So why 1023
or 127?
First, notice that the double precision exponent is 11 bits wide while
the single precision exponent is only 8 bits wide. Next, notice the
values... 1023 and 127 respectively. The value of 1 is 1 raised to the
power of 0 base 2. So why 1023 or 127?
There is no sign bit for the exponent yet the exponent must support negative numbers.
It does this by incorporating an offset of 1023 and 127 respectively (both representing
0). Anything above 1023 and 127 are positive exponents. Anything below these values
are negative exponents.
There is no sign bit for the exponent yet the exponent must support
negative numbers. It does this by incorporating an offset of 1023 and
127 respectively (both representing 0). Anything above 1023 and 127 are
positive exponents. Anything below these values are negative exponents.
The De-biased line are the values of the exponent with their bias removed.
Notice they work out to 0. So, the value of 1 is represented by 1 raised to the power of 0.
The De-biased line are the values of the exponent with their bias
removed. Notice they work out to 0. So, the value of 1 is represented by
1 raised to the power of 0.
The Fraction has a value of zero. Where's the 1 that we've been talking about get stored?
It isn't. A value of 1 is always assumed to be the only value in front of the decimal place
in a `float` or `double`. Every floating point value is 1 plus a fraction all raised to
some power of 2.
The Fraction has a value of zero. Where's the 1 that we've been talking
about get stored? It isn't. A value of 1 is always assumed to be the
only value in front of the decimal place in a `float` or `double`. Every
floating point value is 1 plus a fraction all raised to some power of 2.
We thought we'd highlight a few of the bits in the fractional part of a floating point
number. These can be illuminating when the value being shown is in the range of
-2 < x < 2. Notice the the values of -2 and 2 are outside this range. In other words,
showing the first few bits of the fraction are illuminating when the exponent works
out to 0.
We thought we'd highlight a few of the bits in the fractional part of a
floating point number. These can be illuminating when the value being
shown is in the range of -2 < x < 2. Notice the the values of -2 and 2
are outside this range. In other words, showing the first few bits of
the fraction are illuminating when the exponent works out to 0.
* Halves - There are no halves in the value of 1.
@ -91,11 +92,12 @@ out to 0.
* Thirty Seconds - There are no thirty seconds in the value of 1.
Of course, there are more fractional values to `float` and `doubles` but listing them all
wouldn't be a fun tasks and we're all about fun. :)
Of course, there are more fractional values to `float` and `doubles` but
listing them all wouldn't be a fun tasks and we're all about fun. :)
Finally, the Equation line rebuilds the floating point value in its actual "scientific"
notation. The value of 1 is a 1 raised to the zeroth power of 2.
Finally, the Equation line rebuilds the floating point value in its
actual "scientific" notation. The value of 1 is a 1 raised to the zeroth
power of 2.
How about a value of 1.5?
@ -138,11 +140,10 @@ Equation: 1.875 x 2^0 1.875 x 2^0
How about 8.5?
This is the first time we are looking at
a value which increases the (de-biased) exponent to non-zero.
Things get a little more complicated. Now, there isn't an
obvious mapping of the fraction bits to the final number they
represent. This is the impact of the non-zero exponent.
This is the first time we are looking at a value which increases the
(de-biased) exponent to non-zero. Things get a little more complicated.
Now, there isn't an obvious mapping of the fraction bits to the final
number they represent. This is the impact of the non-zero exponent.
```text
Component Double Float Comment
@ -160,17 +161,16 @@ Full fraction: 0.0625 0.0625
Equation: 1.0625 x 2^3 1.0625 x 2^3
```
Even though there is a half in eight and a half, the Halves bit
is 0. What is 8? Eight is a 2 raised to the power of 3. In
other words, the bit for the half in 8.5 is shifted to the
right by three bits. Confirm this by looking at the
Sixteenths. *There's our bit!*
Even though there is a half in eight and a half, the Halves bit is 0.
What is 8? Eight is a 2 raised to the power of 3. In other words, the
bit for the half in 8.5 is shifted to the right by three bits. Confirm
this by looking at the Sixteenths. *There's our bit!*
Turn your attention to the Equation. 1.0625 multiplied by 8
is 8.5. Cool huh?
Turn your attention to the Equation. 1.0625 multiplied by 8 is 8.5. Cool
huh?
How about something harder? Like 8.51 - just a teensy bit
different from the previous example.
How about something harder? Like 8.51 - just a teensy bit different from
the previous example.
```text
Component Double Float Comment
@ -189,19 +189,19 @@ Equation: 1.06375 x 2^3 1.0637500286 x 2^3
```
For the first time we're seeing that 8.51 cannot be perfectly
represented by `float`. `double` gets it right. The difference
between the `double` and `float` is the very small number shown
on the first line of output.
represented by `float`. `double` gets it right. The difference between
the `double` and `float` is the very small number shown on the first
line of output.
## When a Number is Not a Number and How About Infinity?
`NaN` is an actual value. It means `not a number`.
[Here](./floatster.cpp) is the source code to another program we
have written that explores both `NaN` and `Inf`.
[Here](./floatster.cpp) is the source code to another program we have
written that explores both `NaN` and `Inf`.
Let's examine `NaN` which is produced when you do naughty things
like take the square root of a negative number.
Let's examine `NaN` which is produced when you do naughty things like
take the square root of a negative number.
```text
Enter a number (-100 causes divide by 0, -200 causes sqrt(-1): -200
@ -213,8 +213,8 @@ NaN: 1
Inf: 0
```
`Nan` is true (for `float`) when its exponent is 0xFF and fraction
is not zero.
`Nan` is true (for `float`) when its exponent is 0xFF and fraction is
not zero.
You'll never get a `float` that is 2 raised to the power of 128 because
that value is reserved for `NaN` and `Inf`.
@ -232,13 +232,13 @@ Inf: 1
```
Once again, notice the out-of-bounds value for the exponent: 0xFF.
Secondly, the fraction is fully zero. The sign bit specifies negative
or positive infinity.
Secondly, the fraction is fully zero. The sign bit specifies negative or
positive infinity.
## Testing for Naughty Values
Thankfully, there exists two functions that will do the inspection
for you, looking for `Nan` and `Inf`.
Thankfully, there exists two functions that will do the inspection for
you, looking for `Nan` and `Inf`.
* `isnan(floating point value)` and
@ -246,6 +246,6 @@ for you, looking for `Nan` and `Inf`.
Both of these functions work with `double` and `float`.
Once a variable goes `NaN` or `Inf`, all subsequent operations
will remain `NaN` or `Inf` until the variable is reset to a
valid number. That is, 1 + `Inf` is `Inf`, for example.
Once a variable goes `NaN` or `Inf`, all subsequent operations will
remain `NaN` or `Inf` until the variable is reset to a valid number.
That is, 1 + `Inf` is `Inf`, for example.

View file

@ -35,7 +35,6 @@ It is worth noting early and often that you should not mix dealing
with different precisions assuming that because of the overlaps in
space, you'll get a meaningful result.
The above image does not show the corresponding layout of *half
precision* floating point registers. `H0` sits in the least
significant bits of `S0` and so on.
The above image does not show the corresponding layout of [half
precision](./half.md) floating point registers. `H0` sits in the least
significant bits of `S0` and so on.