diff --git a/section_1/float/asm_rounding.s b/section_1/float/asm_rounding.s new file mode 100644 index 0000000..f43787e --- /dev/null +++ b/section_1/float/asm_rounding.s @@ -0,0 +1,83 @@ + .global main + .text + .align 2 + +dless .req d20 +dmore .req d21 +ndless .req d22 +ndmore .req d23 + +Emit: str x30, [sp, -16]! + bl printf + ldr x30, [sp], 16 + ret + +main: str x30, [sp, -16]! + stp dless, dmore, [sp, -16]! + stp ndless, ndmore, [sp, -16]! + + ldr x0, =vless + ldr dless, [x0] + ldr dmore, [x0, 8] + ldr ndless, [x0, 16] + ldr ndmore, [x0, 24] + +//-fcvtps---------------------- + fcvtps x1, dless + fcvtps x2, dmore + ldr x0, =fmt1 + bl Emit + + fcvtps x1, ndless + fcvtps x2, ndmore + ldr x0, =fmt1 + bl Emit +//-fcvtms----------------------- + fcvtms x1, dless + fcvtms x2, dmore + ldr x0, =fmt2 + bl Emit + + fcvtms x1, ndless + fcvtms x2, ndmore + ldr x0, =fmt2 + bl Emit +//-fcvtzs----------------------- + fcvtzs x1, dless + fcvtzs x2, dmore + ldr x0, =fmt4 + bl Emit + + fcvtzs x1, ndless + fcvtzs x2, ndmore + ldr x0, =fmt4 + bl Emit +//-fcvtas----------------------- + fcvtas x1, dless + fcvtas x2, dmore + ldr x0, =fmt3 + bl Emit + + fcvtas x1, ndless + fcvtas x2, ndmore + ldr x0, =fmt3 + bl Emit +//------------------------------ + + ldp ndless, ndmore, [sp], 16 + ldp dless, dmore, [sp], 16 + ldr x30, [sp], 16 + mov w0, wzr + ret + + .section .rodata +vless: .double 5.49 +vmore: .double 5.51 +nvless: .double -5.49 +nvmore: .double -5.51 +fmt1: .asciz "fcvtps less: %d more: %d\n" +fmt2: .asciz "fcvtms less: %d more: %d\n" +fmt3: .asciz "fcvtta less: %d more: %d\n" +fmt4: .asciz "fcvtzs less: %d more: %d\n" + + .end diff --git a/section_1/float/rounding.cpp b/section_1/float/rounding.cpp new file mode 100644 index 0000000..0f61b74 --- /dev/null +++ b/section_1/float/rounding.cpp @@ -0,0 +1,35 @@ +#include +#include +#include + +using namespace std; + +template +int RoundAwayFromZero(T x) { + return int((x < 0) ? floor(x) : ceil(x)); +} + +int main() { + int32_t iv; + float fv = 5.1; + + iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1); + cout << setw(4) << fv << " away from zero (should be 6): "; + cout << iv << endl; + fv = -fv; + iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1); + cout << setw(4) << fv << " away from zero (should be -6): "; + cout << iv << endl; + cout << endl; + cout << "Using MyRound()\n"; + fv = -fv; + iv = RoundAwayFromZero(fv); + cout << setw(4) << fv << " away from zero (should be 6): "; + cout << iv << endl; + fv = -fv; + iv = RoundAwayFromZero(fv); + cout << setw(4) << fv << " away from zero (should be -6): "; + cout << iv << endl; + + return 0; +} \ No newline at end of file diff --git a/section_1/float/rounding.md b/section_1/float/rounding.md new file mode 100644 index 0000000..5b125f6 --- /dev/null +++ b/section_1/float/rounding.md @@ -0,0 +1,216 @@ +# Section 1 / Conversion of Floating Point and Integers + +This chapter has been surprisingly difficult to research and write. Huh? +All we're talking about is taking a floating point value and turning it +into an integer - what could be hard? + +It's hard because the AARCH64 has so many instructions that seemingly +do the aforementioned job and each of them come in many variations. Even +the language used is confusing. + +For this chapter, I will use: + +* Rounding means picking some fractional value and if the float's +fraction is higher, you go one way and if lower, you go the other. + +* Truncation means you don't look too closely at the fractional value. +Instead, you just eliminate the fractional part and slam the whole +number ... one way or the other. + +## Truncation Towards Zero + +In C and C++, truncation is what we get from: + +```c++ +integer_variable = int(floating_variable); // C++ +integer_variable = (int) floating_variable; // C +``` + +Diving a little deeper, there is a choice to be made as to whether or +not `integer_variable` is signed or unsigned. And, whether or not +`integer_variable` is a 32 bit or 64 bit value. + +The instruction is `fcvtz` - convert towards zero. Then, the choice +as to whether to produce a signed or unsigned result is defined by the +final letterL `u` or `s`. + +| Mnemonic | Meaning | +| -------- | ------- | +| fcvtzu | Truncate (always towards 0) producing an unsigned int | +| fcvtzs | Truncate (always towards 0) producing a signed int | + +As an example of how the ARM documentation is confusing - this +instruction which completely discards the fractional value is said by +the ARM documentation as doing rounding. + +The the choice of source register defined whether you are converting +a double or single precision floating point value. + +| Source Register | Converts a | +| --------------- | ---------- | +| dX | `double` to an integer | +| sX | `float` to an integer | + +| Destination Register | Converts a | +| --------------- | ---------- | +| xX | 64 bit integer | +| wX | 32 bit or less integer | + +Examples where `d` is a `double` and `f` is a `float`: + +| C++ | Instruction | +| --- | ----------- | +| `int32_t(d)` | `fcvtzs w0, d0` | +| `uint32_t(d)` | `fcvtzu w0, d0` | +| `int64_t(d)` | `fcvtzs x0, d0` | +| `uint64_t(d)` | `fcvtzu x0, d0` | + +[Here](./asm_rounding.s) is a program which demonstrates various +ways of converting doubles to integers. + +Let's look at: + +```text +//-fcvtzs----------------------- // 45 + fcvtzs x1, dless // 46 + fcvtzs x2, dmore // 47 + ldr x0, =fmt4 // 48 + bl Emit // 49 + // 50 + fcvtzs x1, ndless // 51 + fcvtzs x2, ndmore // 52 + ldr x0, =fmt4 // 53 + bl Emit // 54 +``` + +Reminder: + +* `dless` is 5.49 +* `dmore` is 5.51 +* `ndless` is -5.49 +* `ndmore` is -5.51 + +Here is the relevant output: + +```text +fcvtzs less: 5 more: 5 +fcvtzs less: -5 more: -5 +``` + +Notice all the values were truncated to the whole number that is +*closer to zero*. + +## Truncation Away From Zero + +Truncation away from zero is not as easy. In fact, it cannot be +performed with a single instruction. + +In C and C++: + +```c +iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1); +``` + +If the `fv` is already equal to a whole number, the +integer value will be that whole number. Other wise the `iv` is +the whole number further *away from zero*. + +In C++, a more sophisticated version would require `` and +could look like: + +```c++ +template +int MyTruncate(T x) { + return int((x < 0) ? floor(x) : ceil(x)); +} +``` + +* `floor()` always truncates downward (towards more negative). +* `ceil()` always truncates upwards (towards more positive). + +[Here](./rounding.cpp) is a program which demonstrates this: + +In assembly language, a function is used which implements +what is in essence, one instantiation of the templated function +given above. + +```asm +RoundAwayFromZero: + fcmp d0, 0 + ble 1f + // Value is positive, truncate towards positive infinity (ceil) + frintp d0, d0 + b 2f +1: // Value is negative, truncate towards negative infinity (floor) + frintm d0, d0 +2: fcvtzs x0, d0 + ret +``` + +`frintp` and `frintm` will honor the source register already being +a whole number (no fractional part). Thus a value of 5 will not be +converted to 6 and -5 will not be converted to -6. But, a value of +5.000000001 **will** go to 6, etc. + +[Here](./frintp.s) is a program that demonstrates this: + +```text + .text // 1 + .global main // 2 + .align 2 // 3 + // 4 +main: str x30, [sp, -16]! // 5 + // 6 + ldr x0, =d // 7 + ldr d0, [x0] // 8 + frintp d0, d0 // 9 + ldr x0, =fmt1 // 10 + bl printf // 11 + // 12 + ldr x0, =h // 13 + ldr d0, [x0] // 14 + frintp d0, d0 // 15 + ldr x0, =fmt2 // 16 + bl printf // 17 + // 18 + ldr x30, [sp], 16 // 19 + mov w0, wzr // 20 + ret // 21 + // 22 + .data // 23 +fmt1: .asciz "with fraction: %f\n" // 24 +fmt2: .asciz "without fraction: %f\n" // 25 +d: .double 5.00000001 // 26 +h: .double 5.0 // 27 + .end // 28 +``` + +The output is: + +```text +with fraction: 6.000000 +without fraction: 5.000000 +``` + +## Rounding Conversion + +An instruction which does what we normally think of as rounding is +`frinta`. This is the conversion "to nearest with ties going away." +So, 5.5 goes to 6 as one would expect from "rounding." + +## Converting an Integer to a Floating Point Value + +In C / C++: + +```c +double_var = double(integer_var); // C++ +double_var = (double)integer_var; // C +``` + +Is handled by two instructions: + +* `scvtf` converts a signed integer to a floating point value +* `ucvtf` converts an unsigned integer to a floating point value + +The name of the destination register controls which kind of floating +point value is made. For example, specifying `dX` makes a double etc.