mirror of
https://github.com/pkivolowitz/asm_book.git
synced 2026-06-21 03:26:46 +08:00
a tough section - source code
This commit is contained in:
parent
a4e498510c
commit
7b4aa0ef13
3 changed files with 334 additions and 0 deletions
83
section_1/float/asm_rounding.s
Normal file
83
section_1/float/asm_rounding.s
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
.global main
|
||||
.text
|
||||
.align 2
|
||||
|
||||
dless .req d20
|
||||
dmore .req d21
|
||||
ndless .req d22
|
||||
ndmore .req d23
|
||||
|
||||
Emit: str x30, [sp, -16]!
|
||||
bl printf
|
||||
ldr x30, [sp], 16
|
||||
ret
|
||||
|
||||
main: str x30, [sp, -16]!
|
||||
stp dless, dmore, [sp, -16]!
|
||||
stp ndless, ndmore, [sp, -16]!
|
||||
|
||||
ldr x0, =vless
|
||||
ldr dless, [x0]
|
||||
ldr dmore, [x0, 8]
|
||||
ldr ndless, [x0, 16]
|
||||
ldr ndmore, [x0, 24]
|
||||
|
||||
//-fcvtps----------------------
|
||||
fcvtps x1, dless
|
||||
fcvtps x2, dmore
|
||||
ldr x0, =fmt1
|
||||
bl Emit
|
||||
|
||||
fcvtps x1, ndless
|
||||
fcvtps x2, ndmore
|
||||
ldr x0, =fmt1
|
||||
bl Emit
|
||||
//-fcvtms-----------------------
|
||||
fcvtms x1, dless
|
||||
fcvtms x2, dmore
|
||||
ldr x0, =fmt2
|
||||
bl Emit
|
||||
|
||||
fcvtms x1, ndless
|
||||
fcvtms x2, ndmore
|
||||
ldr x0, =fmt2
|
||||
bl Emit
|
||||
//-fcvtzs-----------------------
|
||||
fcvtzs x1, dless
|
||||
fcvtzs x2, dmore
|
||||
ldr x0, =fmt4
|
||||
bl Emit
|
||||
|
||||
fcvtzs x1, ndless
|
||||
fcvtzs x2, ndmore
|
||||
ldr x0, =fmt4
|
||||
bl Emit
|
||||
//-fcvtas-----------------------
|
||||
fcvtas x1, dless
|
||||
fcvtas x2, dmore
|
||||
ldr x0, =fmt3
|
||||
bl Emit
|
||||
|
||||
fcvtas x1, ndless
|
||||
fcvtas x2, ndmore
|
||||
ldr x0, =fmt3
|
||||
bl Emit
|
||||
//------------------------------
|
||||
|
||||
ldp ndless, ndmore, [sp], 16
|
||||
ldp dless, dmore, [sp], 16
|
||||
ldr x30, [sp], 16
|
||||
mov w0, wzr
|
||||
ret
|
||||
|
||||
.section .rodata
|
||||
vless: .double 5.49
|
||||
vmore: .double 5.51
|
||||
nvless: .double -5.49
|
||||
nvmore: .double -5.51
|
||||
fmt1: .asciz "fcvtps less: %d more: %d\n"
|
||||
fmt2: .asciz "fcvtms less: %d more: %d\n"
|
||||
fmt3: .asciz "fcvtta less: %d more: %d\n"
|
||||
fmt4: .asciz "fcvtzs less: %d more: %d\n"
|
||||
|
||||
.end
|
||||
35
section_1/float/rounding.cpp
Normal file
35
section_1/float/rounding.cpp
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <typename T>
|
||||
int RoundAwayFromZero(T x) {
|
||||
return int((x < 0) ? floor(x) : ceil(x));
|
||||
}
|
||||
|
||||
int main() {
|
||||
int32_t iv;
|
||||
float fv = 5.1;
|
||||
|
||||
iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
|
||||
cout << setw(4) << fv << " away from zero (should be 6): ";
|
||||
cout << iv << endl;
|
||||
fv = -fv;
|
||||
iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
|
||||
cout << setw(4) << fv << " away from zero (should be -6): ";
|
||||
cout << iv << endl;
|
||||
cout << endl;
|
||||
cout << "Using MyRound()\n";
|
||||
fv = -fv;
|
||||
iv = RoundAwayFromZero(fv);
|
||||
cout << setw(4) << fv << " away from zero (should be 6): ";
|
||||
cout << iv << endl;
|
||||
fv = -fv;
|
||||
iv = RoundAwayFromZero(fv);
|
||||
cout << setw(4) << fv << " away from zero (should be -6): ";
|
||||
cout << iv << endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
216
section_1/float/rounding.md
Normal file
216
section_1/float/rounding.md
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
# Section 1 / Conversion of Floating Point and Integers
|
||||
|
||||
This chapter has been surprisingly difficult to research and write. Huh?
|
||||
All we're talking about is taking a floating point value and turning it
|
||||
into an integer - what could be hard?
|
||||
|
||||
It's hard because the AARCH64 has so many instructions that seemingly
|
||||
do the aforementioned job and each of them come in many variations. Even
|
||||
the language used is confusing.
|
||||
|
||||
For this chapter, I will use:
|
||||
|
||||
* Rounding means picking some fractional value and if the float's
|
||||
fraction is higher, you go one way and if lower, you go the other.
|
||||
|
||||
* Truncation means you don't look too closely at the fractional value.
|
||||
Instead, you just eliminate the fractional part and slam the whole
|
||||
number ... one way or the other.
|
||||
|
||||
## Truncation Towards Zero
|
||||
|
||||
In C and C++, truncation is what we get from:
|
||||
|
||||
```c++
|
||||
integer_variable = int(floating_variable); // C++
|
||||
integer_variable = (int) floating_variable; // C
|
||||
```
|
||||
|
||||
Diving a little deeper, there is a choice to be made as to whether or
|
||||
not `integer_variable` is signed or unsigned. And, whether or not
|
||||
`integer_variable` is a 32 bit or 64 bit value.
|
||||
|
||||
The instruction is `fcvtz` - convert towards zero. Then, the choice
|
||||
as to whether to produce a signed or unsigned result is defined by the
|
||||
final letterL `u` or `s`.
|
||||
|
||||
| Mnemonic | Meaning |
|
||||
| -------- | ------- |
|
||||
| fcvtzu | Truncate (always towards 0) producing an unsigned int |
|
||||
| fcvtzs | Truncate (always towards 0) producing a signed int |
|
||||
|
||||
As an example of how the ARM documentation is confusing - this
|
||||
instruction which completely discards the fractional value is said by
|
||||
the ARM documentation as doing rounding.
|
||||
|
||||
The the choice of source register defined whether you are converting
|
||||
a double or single precision floating point value.
|
||||
|
||||
| Source Register | Converts a |
|
||||
| --------------- | ---------- |
|
||||
| dX | `double` to an integer |
|
||||
| sX | `float` to an integer |
|
||||
|
||||
| Destination Register | Converts a |
|
||||
| --------------- | ---------- |
|
||||
| xX | 64 bit integer |
|
||||
| wX | 32 bit or less integer |
|
||||
|
||||
Examples where `d` is a `double` and `f` is a `float`:
|
||||
|
||||
| C++ | Instruction |
|
||||
| --- | ----------- |
|
||||
| `int32_t(d)` | `fcvtzs w0, d0` |
|
||||
| `uint32_t(d)` | `fcvtzu w0, d0` |
|
||||
| `int64_t(d)` | `fcvtzs x0, d0` |
|
||||
| `uint64_t(d)` | `fcvtzu x0, d0` |
|
||||
|
||||
[Here](./asm_rounding.s) is a program which demonstrates various
|
||||
ways of converting doubles to integers.
|
||||
|
||||
Let's look at:
|
||||
|
||||
```text
|
||||
//-fcvtzs----------------------- // 45
|
||||
fcvtzs x1, dless // 46
|
||||
fcvtzs x2, dmore // 47
|
||||
ldr x0, =fmt4 // 48
|
||||
bl Emit // 49
|
||||
// 50
|
||||
fcvtzs x1, ndless // 51
|
||||
fcvtzs x2, ndmore // 52
|
||||
ldr x0, =fmt4 // 53
|
||||
bl Emit // 54
|
||||
```
|
||||
|
||||
Reminder:
|
||||
|
||||
* `dless` is 5.49
|
||||
* `dmore` is 5.51
|
||||
* `ndless` is -5.49
|
||||
* `ndmore` is -5.51
|
||||
|
||||
Here is the relevant output:
|
||||
|
||||
```text
|
||||
fcvtzs less: 5 more: 5
|
||||
fcvtzs less: -5 more: -5
|
||||
```
|
||||
|
||||
Notice all the values were truncated to the whole number that is
|
||||
*closer to zero*.
|
||||
|
||||
## Truncation Away From Zero
|
||||
|
||||
Truncation away from zero is not as easy. In fact, it cannot be
|
||||
performed with a single instruction.
|
||||
|
||||
In C and C++:
|
||||
|
||||
```c
|
||||
iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
|
||||
```
|
||||
|
||||
If the `fv` is already equal to a whole number, the
|
||||
integer value will be that whole number. Other wise the `iv` is
|
||||
the whole number further *away from zero*.
|
||||
|
||||
In C++, a more sophisticated version would require `<cmath>` and
|
||||
could look like:
|
||||
|
||||
```c++
|
||||
template <typename T>
|
||||
int MyTruncate(T x) {
|
||||
return int((x < 0) ? floor(x) : ceil(x));
|
||||
}
|
||||
```
|
||||
|
||||
* `floor()` always truncates downward (towards more negative).
|
||||
* `ceil()` always truncates upwards (towards more positive).
|
||||
|
||||
[Here](./rounding.cpp) is a program which demonstrates this:
|
||||
|
||||
In assembly language, a function is used which implements
|
||||
what is in essence, one instantiation of the templated function
|
||||
given above.
|
||||
|
||||
```asm
|
||||
RoundAwayFromZero:
|
||||
fcmp d0, 0
|
||||
ble 1f
|
||||
// Value is positive, truncate towards positive infinity (ceil)
|
||||
frintp d0, d0
|
||||
b 2f
|
||||
1: // Value is negative, truncate towards negative infinity (floor)
|
||||
frintm d0, d0
|
||||
2: fcvtzs x0, d0
|
||||
ret
|
||||
```
|
||||
|
||||
`frintp` and `frintm` will honor the source register already being
|
||||
a whole number (no fractional part). Thus a value of 5 will not be
|
||||
converted to 6 and -5 will not be converted to -6. But, a value of
|
||||
5.000000001 **will** go to 6, etc.
|
||||
|
||||
[Here](./frintp.s) is a program that demonstrates this:
|
||||
|
||||
```text
|
||||
.text // 1
|
||||
.global main // 2
|
||||
.align 2 // 3
|
||||
// 4
|
||||
main: str x30, [sp, -16]! // 5
|
||||
// 6
|
||||
ldr x0, =d // 7
|
||||
ldr d0, [x0] // 8
|
||||
frintp d0, d0 // 9
|
||||
ldr x0, =fmt1 // 10
|
||||
bl printf // 11
|
||||
// 12
|
||||
ldr x0, =h // 13
|
||||
ldr d0, [x0] // 14
|
||||
frintp d0, d0 // 15
|
||||
ldr x0, =fmt2 // 16
|
||||
bl printf // 17
|
||||
// 18
|
||||
ldr x30, [sp], 16 // 19
|
||||
mov w0, wzr // 20
|
||||
ret // 21
|
||||
// 22
|
||||
.data // 23
|
||||
fmt1: .asciz "with fraction: %f\n" // 24
|
||||
fmt2: .asciz "without fraction: %f\n" // 25
|
||||
d: .double 5.00000001 // 26
|
||||
h: .double 5.0 // 27
|
||||
.end // 28
|
||||
```
|
||||
|
||||
The output is:
|
||||
|
||||
```text
|
||||
with fraction: 6.000000
|
||||
without fraction: 5.000000
|
||||
```
|
||||
|
||||
## Rounding Conversion
|
||||
|
||||
An instruction which does what we normally think of as rounding is
|
||||
`frinta`. This is the conversion "to nearest with ties going away."
|
||||
So, 5.5 goes to 6 as one would expect from "rounding."
|
||||
|
||||
## Converting an Integer to a Floating Point Value
|
||||
|
||||
In C / C++:
|
||||
|
||||
```c
|
||||
double_var = double(integer_var); // C++
|
||||
double_var = (double)integer_var; // C
|
||||
```
|
||||
|
||||
Is handled by two instructions:
|
||||
|
||||
* `scvtf` converts a signed integer to a floating point value
|
||||
* `ucvtf` converts an unsigned integer to a floating point value
|
||||
|
||||
The name of the destination register controls which kind of floating
|
||||
point value is made. For example, specifying `dX` makes a double etc.
|
||||
Loading…
Reference in a new issue