a tough section - source code

2026-06-21 03:26:46 +08:00 · 2022-07-21 11:09:45 -05:00 · 2022-07-21 11:09:45 -05:00 · 7b4aa0ef13
commit 7b4aa0ef13
parent a4e498510c
3 changed files with 334 additions and 0 deletions
--- a/section_1/float/asm_rounding.s
+++ b/section_1/float/asm_rounding.s
@ -0,0 +1,83 @@
+        .global main
+        .text
+        .align  2
+
+dless   .req    d20
+dmore	.req	d21
+ndless	.req    d22
+ndmore	.req	d23
+
+Emit:	str     x30, [sp, -16]!
+		bl		printf
+		ldr     x30, [sp], 16
+		ret
+
+main:   str     x30, [sp, -16]!
+        stp     dless, dmore, [sp, -16]!
+        stp     ndless, ndmore, [sp, -16]!
+
+		ldr		x0, =vless
+		ldr		dless, [x0]
+		ldr		dmore, [x0, 8]
+		ldr		ndless, [x0, 16]
+		ldr		ndmore, [x0, 24]
+
+//-fcvtps----------------------
+		fcvtps	x1, dless
+		fcvtps	x2, dmore
+		ldr		x0, =fmt1
+		bl		Emit
+
+		fcvtps	x1, ndless
+		fcvtps	x2, ndmore
+		ldr		x0, =fmt1
+		bl		Emit
+//-fcvtms-----------------------
+		fcvtms	x1, dless
+		fcvtms	x2, dmore
+		ldr		x0, =fmt2
+		bl		Emit
+
+		fcvtms	x1, ndless
+		fcvtms	x2, ndmore
+		ldr		x0, =fmt2
+		bl		Emit
+//-fcvtzs-----------------------
+		fcvtzs	x1, dless
+		fcvtzs	x2, dmore
+		ldr		x0, =fmt4
+		bl		Emit
+
+		fcvtzs	x1, ndless
+		fcvtzs	x2, ndmore
+		ldr		x0, =fmt4
+		bl		Emit
+//-fcvtas-----------------------
+		fcvtas	x1, dless
+		fcvtas	x2, dmore
+		ldr		x0, =fmt3
+		bl		Emit
+
+		fcvtas	x1, ndless
+		fcvtas	x2, ndmore
+		ldr		x0, =fmt3
+		bl		Emit
+//------------------------------
+
+        ldp     ndless, ndmore, [sp], 16
+        ldp     dless, dmore, [sp], 16
+        ldr     x30, [sp], 16
+		mov		w0, wzr
+		ret
+		
+        .section    .rodata
+vless:	.double		5.49
+vmore:	.double		5.51
+nvless:	.double		-5.49
+nvmore:	.double		-5.51
+fmt1:	.asciz		"fcvtps less: %d more: %d\n"
+fmt2:	.asciz		"fcvtms less: %d more: %d\n"
+fmt3:	.asciz		"fcvtta less: %d more: %d\n"
+fmt4:	.asciz		"fcvtzs less: %d more: %d\n"
+
+		.end
--- a/section_1/float/rounding.cpp
+++ b/section_1/float/rounding.cpp
@ -0,0 +1,35 @@
+#include <iostream>
+#include <iomanip>
+#include <cmath>
+
+using namespace std;
+
+template <typename T>
+int RoundAwayFromZero(T x) {
+	return int((x < 0) ? floor(x) : ceil(x));
+}
+
+int main() {
+	int32_t iv;
+	float fv = 5.1;
+
+	iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
+	cout << setw(4) << fv << " away from zero (should be  6):  ";
+	cout << iv << endl;
+	fv = -fv;
+	iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
+	cout << setw(4) << fv << " away from zero (should be -6): ";
+	cout << iv << endl;
+	cout << endl;
+	cout << "Using MyRound()\n";
+	fv = -fv;
+	iv = RoundAwayFromZero(fv);
+	cout << setw(4) << fv << " away from zero (should be  6):  ";
+	cout << iv << endl;
+	fv = -fv;
+	iv = RoundAwayFromZero(fv);
+	cout << setw(4) << fv << " away from zero (should be -6): ";
+	cout << iv << endl;
+
+	return 0;
+}
--- a/section_1/float/rounding.md
+++ b/section_1/float/rounding.md
@ -0,0 +1,216 @@
+# Section 1 / Conversion of Floating Point and Integers
+
+This chapter has been surprisingly difficult to research and write. Huh?
+All we're talking about is taking a floating point value and turning it
+into an integer - what could be hard?
+
+It's hard because the AARCH64 has so many instructions that seemingly
+do the aforementioned job and each of them come in many variations. Even
+the language used is confusing.
+
+For this chapter, I will use:
+
+* Rounding means picking some fractional value and if the float's
+fraction is higher, you go one way and if lower, you go the other.
+
+* Truncation means you don't look too closely at the fractional value.
+Instead, you just eliminate the fractional part and slam the whole
+number ... one way or the other.
+
+## Truncation Towards Zero
+
+In C and C++, truncation is what we get from:
+
+```c++
+integer_variable = int(floating_variable);  // C++
+integer_variable = (int) floating_variable; // C
+```
+
+Diving a little deeper, there is a choice to be made as to whether or
+not `integer_variable` is signed or unsigned. And, whether or not
+`integer_variable` is a 32 bit or 64 bit value.
+
+The instruction is `fcvtz` - convert towards zero. Then, the choice
+as to whether to produce a signed or unsigned result is defined by the
+final letterL `u` or `s`.
+
+| Mnemonic | Meaning |
+| -------- | ------- |
+| fcvtzu   | Truncate (always towards 0) producing an unsigned int |
+| fcvtzs   | Truncate (always towards 0) producing a signed int |
+
+As an example of how the ARM documentation is confusing - this
+instruction which completely discards the fractional value is said by
+the ARM documentation as doing rounding.
+
+The the choice of source register defined whether you are converting
+a double or single precision floating point value.
+
+| Source Register | Converts a |
+| --------------- | ---------- |
+| dX | `double` to an integer |
+| sX | `float` to an integer |
+
+| Destination Register | Converts a |
+| --------------- | ---------- |
+| xX | 64 bit integer |
+| wX | 32 bit or less integer |
+
+Examples where `d` is a `double` and `f` is a `float`:
+
+| C++ | Instruction |
+| --- | ----------- |
+| `int32_t(d)` | `fcvtzs	w0, d0` |
+| `uint32_t(d)` | `fcvtzu	w0, d0` |
+| `int64_t(d)` | `fcvtzs	x0, d0` |
+| `uint64_t(d)` | `fcvtzu	x0, d0` |
+
+[Here](./asm_rounding.s) is a program which demonstrates various
+ways of converting doubles to integers.
+
+Let's look at:
+
+```text
+//-fcvtzs-----------------------                                    // 45 
+        fcvtzs    x1, dless                                         // 46 
+        fcvtzs    x2, dmore                                         // 47 
+        ldr       x0, =fmt4                                         // 48 
+        bl        Emit                                              // 49 
+                                                                    // 50 
+        fcvtzs    x1, ndless                                        // 51 
+        fcvtzs    x2, ndmore                                        // 52 
+        ldr       x0, =fmt4                                         // 53 
+        bl        Emit                                              // 54 
+```
+
+Reminder:
+
+* `dless` is 5.49
+* `dmore` is 5.51
+* `ndless` is -5.49
+* `ndmore` is -5.51
+
+Here is the relevant output:
+
+```text
+fcvtzs less: 5 more: 5
+fcvtzs less: -5 more: -5
+```
+
+Notice all the values were truncated to the whole number that is
+*closer to zero*.
+
+## Truncation Away From Zero
+
+Truncation away from zero is not as easy. In fact, it cannot be
+performed with a single instruction.
+
+In C and C++:
+
+```c
+iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
+```
+
+If the `fv` is already equal to a whole number, the
+integer value will be that whole number. Other wise the `iv` is
+the whole number further *away from zero*.
+
+In C++, a more sophisticated version would require `<cmath>` and
+could look like:
+
+```c++
+template <typename T>
+int MyTruncate(T x) {
+	return int((x < 0) ? floor(x) : ceil(x));
+}
+```
+
+* `floor()` always truncates downward (towards more negative).
+* `ceil()` always truncates upwards (towards more positive).
+
+[Here](./rounding.cpp) is a program which demonstrates this:
+
+In assembly language, a function is used which implements
+what is in essence, one instantiation of the templated function
+given above.
+
+```asm
+RoundAwayFromZero:
+		fcmp	d0, 0
+		ble		1f
+		// Value is positive, truncate towards positive infinity (ceil)
+		frintp	d0, d0
+		b 		2f
+1:		// Value is negative, truncate towards negative infinity (floor)
+		frintm	d0, d0
+2:		fcvtzs	x0, d0
+		ret
+```
+
+`frintp` and `frintm` will honor the source register already being
+a whole number (no fractional part). Thus a value of 5 will not be
+converted to 6 and -5 will not be converted to -6. But, a value of
+5.000000001 **will** go to 6, etc.
+
+[Here](./frintp.s) is a program that demonstrates this:
+
+```text
+        .text                                                       // 1 
+        .global main                                                // 2 
+        .align    2                                                 // 3 
+                                                                    // 4 
+main:   str       x30, [sp, -16]!                                   // 5 
+                                                                    // 6 
+        ldr       x0, =d                                            // 7 
+        ldr       d0, [x0]                                          // 8 
+        frintp    d0, d0                                            // 9 
+        ldr       x0, =fmt1                                         // 10 
+        bl        printf                                            // 11 
+                                                                    // 12 
+        ldr       x0, =h                                            // 13 
+        ldr       d0, [x0]                                          // 14 
+        frintp    d0, d0                                            // 15 
+        ldr       x0, =fmt2                                         // 16 
+        bl        printf                                            // 17 
+                                                                    // 18 
+        ldr       x30, [sp], 16                                     // 19 
+        mov       w0, wzr                                           // 20 
+        ret                                                         // 21 
+                                                                    // 22 
+        .data                                                       // 23 
+fmt1:    .asciz   "with fraction:    %f\n"                          // 24 
+fmt2:    .asciz   "without fraction: %f\n"                          // 25 
+d:       .double  5.00000001                                        // 26 
+h:       .double  5.0                                               // 27 
+        .end                                                        // 28 
+```
+
+The output is:
+
+```text
+with fraction:    6.000000
+without fraction: 5.000000
+```
+
+## Rounding Conversion
+
+An instruction which does what we normally think of as rounding is
+`frinta`. This is the conversion "to nearest with ties going away."
+So, 5.5 goes to 6 as one would expect from "rounding."
+
+## Converting an Integer to a Floating Point Value
+
+In C / C++:
+
+```c
+double_var = double(integer_var); // C++
+double_var = (double)integer_var; // C
+```
+
+Is handled by two instructions:
+
+* `scvtf` converts a signed integer to a floating point value
+* `ucvtf` converts an unsigned integer to a floating point value
+
+The name of the destination register controls which kind of floating
+point value is made. For example, specifying `dX` makes a double etc.