a tough section - source code

2026-06-22 01:17:22 +08:00 · 2022-07-21 11:09:45 -05:00 · 2022-07-21 11:09:45 -05:00 · 7b4aa0ef13
commit 7b4aa0ef13
parent a4e498510c
3 changed files with 334 additions and 0 deletions
--- a/section_1/float/asm_rounding.s
+++ b/section_1/float/asm_rounding.s
@ -0,0 +1,83 @@
        .global main
        .text
        .align  2
 dless   .req    d20
 dmore	.req	d21
 ndless	.req    d22
 ndmore	.req	d23
 Emit:	str     x30, [sp, -16]!
 		bl		printf
 		ldr     x30, [sp], 16
 		ret
 main:   str     x30, [sp, -16]!
        stp     dless, dmore, [sp, -16]!
        stp     ndless, ndmore, [sp, -16]!
 		ldr		x0, =vless
 		ldr		dless, [x0]
 		ldr		dmore, [x0, 8]
 		ldr		ndless, [x0, 16]
 		ldr		ndmore, [x0, 24]
 //-fcvtps----------------------
 		fcvtps	x1, dless
 		fcvtps	x2, dmore
 		ldr		x0, =fmt1
 		bl		Emit
 		fcvtps	x1, ndless
 		fcvtps	x2, ndmore
 		ldr		x0, =fmt1
 		bl		Emit
 //-fcvtms-----------------------
 		fcvtms	x1, dless
 		fcvtms	x2, dmore
 		ldr		x0, =fmt2
 		bl		Emit
 		fcvtms	x1, ndless
 		fcvtms	x2, ndmore
 		ldr		x0, =fmt2
 		bl		Emit
 //-fcvtzs-----------------------
 		fcvtzs	x1, dless
 		fcvtzs	x2, dmore
 		ldr		x0, =fmt4
 		bl		Emit
 		fcvtzs	x1, ndless
 		fcvtzs	x2, ndmore
 		ldr		x0, =fmt4
 		bl		Emit
 //-fcvtas-----------------------
 		fcvtas	x1, dless
 		fcvtas	x2, dmore
 		ldr		x0, =fmt3
 		bl		Emit
 		fcvtas	x1, ndless
 		fcvtas	x2, ndmore
 		ldr		x0, =fmt3
 		bl		Emit
 //------------------------------
        ldp     ndless, ndmore, [sp], 16
        ldp     dless, dmore, [sp], 16
        ldr     x30, [sp], 16
 		mov		w0, wzr
 		ret
        .section    .rodata
 vless:	.double		5.49
 vmore:	.double		5.51
 nvless:	.double		-5.49
 nvmore:	.double		-5.51
 fmt1:	.asciz		"fcvtps less: %d more: %d\n"
 fmt2:	.asciz		"fcvtms less: %d more: %d\n"
 fmt3:	.asciz		"fcvtta less: %d more: %d\n"
 fmt4:	.asciz		"fcvtzs less: %d more: %d\n"
 		.end
--- a/section_1/float/rounding.cpp
+++ b/section_1/float/rounding.cpp
@ -0,0 +1,35 @@
 #include <iostream>
 #include <iomanip>
 #include <cmath>
 using namespace std;
 template <typename T>
 int RoundAwayFromZero(T x) {
 	return int((x < 0) ? floor(x) : ceil(x));
 }
 int main() {
 	int32_t iv;
 	float fv = 5.1;
 	iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
 	cout << setw(4) << fv << " away from zero (should be  6):  ";
 	cout << iv << endl;
 	fv = -fv;
 	iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
 	cout << setw(4) << fv << " away from zero (should be -6): ";
 	cout << iv << endl;
 	cout << endl;
 	cout << "Using MyRound()\n";
 	fv = -fv;
 	iv = RoundAwayFromZero(fv);
 	cout << setw(4) << fv << " away from zero (should be  6):  ";
 	cout << iv << endl;
 	fv = -fv;
 	iv = RoundAwayFromZero(fv);
 	cout << setw(4) << fv << " away from zero (should be -6): ";
 	cout << iv << endl;
 	return 0;
 }
--- a/section_1/float/rounding.md
+++ b/section_1/float/rounding.md
@ -0,0 +1,216 @@
 # Section 1 / Conversion of Floating Point and Integers
 This chapter has been surprisingly difficult to research and write. Huh?
 All we're talking about is taking a floating point value and turning it
 into an integer - what could be hard?
 It's hard because the AARCH64 has so many instructions that seemingly
 do the aforementioned job and each of them come in many variations. Even
 the language used is confusing.
 For this chapter, I will use:
 * Rounding means picking some fractional value and if the float's
 fraction is higher, you go one way and if lower, you go the other.
 * Truncation means you don't look too closely at the fractional value.
 Instead, you just eliminate the fractional part and slam the whole
 number ... one way or the other.
 ## Truncation Towards Zero
 In C and C++, truncation is what we get from:
 ```c++
 integer_variable = int(floating_variable);  // C++
 integer_variable = (int) floating_variable; // C
 ```
 Diving a little deeper, there is a choice to be made as to whether or
 not `integer_variable` is signed or unsigned. And, whether or not
 `integer_variable` is a 32 bit or 64 bit value.
 The instruction is `fcvtz` - convert towards zero. Then, the choice
 as to whether to produce a signed or unsigned result is defined by the
 final letterL `u` or `s`.
 | Mnemonic | Meaning |
 | -------- | ------- |
 | fcvtzu   | Truncate (always towards 0) producing an unsigned int |
 | fcvtzs   | Truncate (always towards 0) producing a signed int |
 As an example of how the ARM documentation is confusing - this
 instruction which completely discards the fractional value is said by
 the ARM documentation as doing rounding.
 The the choice of source register defined whether you are converting
 a double or single precision floating point value.
 | Source Register | Converts a |
 | --------------- | ---------- |
 | dX | `double` to an integer |
 | sX | `float` to an integer |
 | Destination Register | Converts a |
 | --------------- | ---------- |
 | xX | 64 bit integer |
 | wX | 32 bit or less integer |
 Examples where `d` is a `double` and `f` is a `float`:
 | C++ | Instruction |
 | --- | ----------- |
 | `int32_t(d)` | `fcvtzs	w0, d0` |
 | `uint32_t(d)` | `fcvtzu	w0, d0` |
 | `int64_t(d)` | `fcvtzs	x0, d0` |
 | `uint64_t(d)` | `fcvtzu	x0, d0` |
 [Here](./asm_rounding.s) is a program which demonstrates various
 ways of converting doubles to integers.
 Let's look at:
 ```text
 //-fcvtzs-----------------------                                    // 45 
        fcvtzs    x1, dless                                         // 46 
        fcvtzs    x2, dmore                                         // 47 
        ldr       x0, =fmt4                                         // 48 
        bl        Emit                                              // 49 
                                                                    // 50 
        fcvtzs    x1, ndless                                        // 51 
        fcvtzs    x2, ndmore                                        // 52 
        ldr       x0, =fmt4                                         // 53 
        bl        Emit                                              // 54 
 ```
 Reminder:
 * `dless` is 5.49
 * `dmore` is 5.51
 * `ndless` is -5.49
 * `ndmore` is -5.51
 Here is the relevant output:
 ```text
 fcvtzs less: 5 more: 5
 fcvtzs less: -5 more: -5
 ```
 Notice all the values were truncated to the whole number that is
 *closer to zero*.
 ## Truncation Away From Zero
 Truncation away from zero is not as easy. In fact, it cannot be
 performed with a single instruction.
 In C and C++:
 ```c
 iv = (int(fv) == fv) ? int(fv) : int(fv) + ((fv < 0) ? -1 : 1);
 ```
 If the `fv` is already equal to a whole number, the
 integer value will be that whole number. Other wise the `iv` is
 the whole number further *away from zero*.
 In C++, a more sophisticated version would require `<cmath>` and
 could look like:
 ```c++
 template <typename T>
 int MyTruncate(T x) {
 	return int((x < 0) ? floor(x) : ceil(x));
 }
 ```
 * `floor()` always truncates downward (towards more negative).
 * `ceil()` always truncates upwards (towards more positive).
 [Here](./rounding.cpp) is a program which demonstrates this:
 In assembly language, a function is used which implements
 what is in essence, one instantiation of the templated function
 given above.
 ```asm
 RoundAwayFromZero:
 		fcmp	d0, 0
 		ble		1f
 		// Value is positive, truncate towards positive infinity (ceil)
 		frintp	d0, d0
 		b 		2f
 1:		// Value is negative, truncate towards negative infinity (floor)
 		frintm	d0, d0
 2:		fcvtzs	x0, d0
 		ret
 ```
 `frintp` and `frintm` will honor the source register already being
 a whole number (no fractional part). Thus a value of 5 will not be
 converted to 6 and -5 will not be converted to -6. But, a value of
 5.000000001 **will** go to 6, etc.
 [Here](./frintp.s) is a program that demonstrates this:
 ```text
        .text                                                       // 1 
        .global main                                                // 2 
        .align    2                                                 // 3 
                                                                    // 4 
 main:   str       x30, [sp, -16]!                                   // 5 
                                                                    // 6 
        ldr       x0, =d                                            // 7 
        ldr       d0, [x0]                                          // 8 
        frintp    d0, d0                                            // 9 
        ldr       x0, =fmt1                                         // 10 
        bl        printf                                            // 11 
                                                                    // 12 
        ldr       x0, =h                                            // 13 
        ldr       d0, [x0]                                          // 14 
        frintp    d0, d0                                            // 15 
        ldr       x0, =fmt2                                         // 16 
        bl        printf                                            // 17 
                                                                    // 18 
        ldr       x30, [sp], 16                                     // 19 
        mov       w0, wzr                                           // 20 
        ret                                                         // 21 
                                                                    // 22 
        .data                                                       // 23 
 fmt1:    .asciz   "with fraction:    %f\n"                          // 24 
 fmt2:    .asciz   "without fraction: %f\n"                          // 25 
 d:       .double  5.00000001                                        // 26 
 h:       .double  5.0                                               // 27 
        .end                                                        // 28 
 ```
 The output is:
 ```text
 with fraction:    6.000000
 without fraction: 5.000000
 ```
 ## Rounding Conversion
 An instruction which does what we normally think of as rounding is
 `frinta`. This is the conversion "to nearest with ties going away."
 So, 5.5 goes to 6 as one would expect from "rounding."
 ## Converting an Integer to a Floating Point Value
 In C / C++:
 ```c
 double_var = double(integer_var); // C++
 double_var = (double)integer_var; // C
 ```
 Is handled by two instructions:
 * `scvtf` converts a signed integer to a floating point value
 * `ucvtf` converts an unsigned integer to a floating point value
 The name of the destination register controls which kind of floating
 point value is made. For example, specifying `dX` makes a double etc.