FLOAT is coming together

2026-06-21 03:56:47 +08:00 · 2022-06-16 09:58:17 -05:00 · 2022-06-16 09:58:17 -05:00 · af92b0e1ea
commit af92b0e1ea
parent 9ec7bbcb17
3 changed files with 294 additions and 0 deletions
--- a/section_1/float/.vscode/settings.json
+++ b/section_1/float/.vscode/settings.json
@ -0,0 +1,9 @@
+{
+	"files.associations": {
+		"iomanip": "cpp",
+		"ios": "cpp"
+	},
+	"cSpell.ignoreWords": [
+		"SIZF"
+	]
+}
--- a/section_1/float/float_dump.cpp
+++ b/section_1/float/float_dump.cpp
@ -0,0 +1,142 @@
+/*	Perry Kivolowitz
+	Professor and Chair of Computer Science
+	Carthage College
+*/
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <sstream>
+#include <cstdlib>
+#include <cmath>
+
+using namespace std;
+
+const int FRAC_SIZD = 52;
+const int FRAC_SIZF = 23;
+
+const int EXPO_SIZD = 11;
+const int EXPO_SIZF = 8;
+
+const int SIGN_SIZE = 1;
+
+struct SP {
+	unsigned int frac : FRAC_SIZF;
+	unsigned int expo : EXPO_SIZF;
+	unsigned int sign : SIGN_SIZE;
+};
+
+struct DP {
+	unsigned long frac : FRAC_SIZD;
+	unsigned long expo : EXPO_SIZD;
+	unsigned long sign : SIGN_SIZE;
+};
+
+union Double {
+	double d;
+	DP D;
+};
+
+union Single {
+	float f;
+	SP F;
+};
+
+double DeBinary(bool is_double, unsigned long frac) {
+	double f = 0.0f;
+	int bits = (is_double ? FRAC_SIZD : FRAC_SIZF);
+
+	for (int i = 0; i < bits; i++) {
+		if (frac & ((unsigned long)1 << (bits - 1 - i))) {
+			f += 1.0f / double(1 << (i + 1));
+		}
+	}
+	return f;
+}
+
+template<class T>
+string MakeEquation(T & u, int bias) {
+	stringstream ss;
+	bool is_double = (bias == 1023);
+	ss << (u.sign ? "-" : "") << 1.0 + DeBinary(is_double, u.frac) << " x 2^" << (u.expo - bias);
+	return ss.str();
+}
+
+int main(int argc, char ** argv) {
+	Double d;
+	Single  f;
+
+	const int fore_space = 20;
+	const int field_space = 25;
+
+	if (argc < 2) {
+		cerr << "Must supply a floating point value as a command line argument.\n";
+		return 1;
+	}
+	d.d = atof(argv[1]);
+	f.f = float(d.d);
+
+	cout << left << setw(fore_space) << "Component" << left << setw(25) << "Double";
+	cout << left << setw(field_space) << "Float" << "Comment" << endl;
+
+	cout << left << setw(fore_space) << "Value:" << setw(25) << setprecision(10) << d.d;
+	cout << setw(field_space) << setprecision(10) << f.f;
+	cout << "Delta(F - D): " << setw(16) << setprecision(10) << f.f - d.d << endl;
+
+	cout << left << setw(fore_space) << "Sign:";
+	cout << setw(field_space) << (bool)d.D.sign;
+	cout << setw(field_space) << (bool)f.F.sign;
+	cout << endl;
+
+	cout << setw(fore_space) << "Exponent (hex):";
+	cout << setw(field_space) << hex << d.D.expo;
+	cout << setw(field_space) << hex << f.F.expo;
+	cout << endl;
+
+	cout << setw(fore_space) << "De-biased (dec):";
+	cout << setw(field_space) << dec << d.D.expo - 1023;
+	cout << setw(field_space) << dec << f.F.expo - 127;
+	cout << endl;
+
+	cout << setw(fore_space) << "Fraction (hex):";
+	cout << setw(field_space) << hex << d.D.frac;
+	cout << setw(field_space) << hex << f.F.frac;
+	cout << endl;
+
+	cout << setw(fore_space) << "Halves:";
+	cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 1)) & 1);
+	cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 1)) & 1);
+	cout << endl;
+
+	cout << setw(fore_space) << "Quarters:";
+	cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 2)) & 1);
+	cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 2)) & 1);
+	cout << endl;
+
+	cout << setw(fore_space) << "Eighths:";
+	cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 3)) & 1);
+	cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 3)) & 1);
+	cout << endl;
+
+	cout << setw(fore_space) << "Sixteenths:";
+	cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 4)) & 1);
+	cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 4)) & 1);
+	cout << endl;
+
+	cout << setw(fore_space) << "Thirty seconds:";
+	cout << setw(field_space) << hex << ((d.D.frac >> (FRAC_SIZD - 5)) & 1);
+	cout << setw(field_space) << hex << ((f.F.frac >> (FRAC_SIZF - 5)) & 1);
+	cout << endl;
+
+	cout << setw(fore_space) << "Full fraction:";
+	cout << setw(field_space) << dec << DeBinary(true,  d.D.frac);
+	cout << setw(field_space) << dec << DeBinary(false, f.F.frac);
+	cout << endl;
+
+	cout << setw(fore_space) << "Equation:";
+	cout << setw(field_space) << dec << MakeEquation<DP>(d.D, 1023);
+	cout << setw(field_space) << dec << MakeEquation<SP>(f.F, 127);
+	cout << endl;
+
+	return 0;
+}
--- a/section_1/float/what.md
+++ b/section_1/float/what.md
@ -0,0 +1,143 @@
+# Section 1 / What Are Floating Point Numbers?
+
+Before we introduce floating point instructions in the AARCH64 ISA, it is
+worth going over exactly what a floating point value is. Integers are easy.
+They're just powers of two summed together with a single bit at one end
+determining the sign (if the integer is signed).
+
+But what are floating numbers?
+
+## Key Point
+
+**Floating point values are approximations.**
+
+Sometimes they are spot on. Most of the time, they are close.
+
+## Floating Point Value Explorer
+
+[Here](./float_dump.cpp) is source code to a program for you that
+takes floating point values (both single and double precision)
+apart.
+
+Here are some examples:
+
+```text
+% ./a.out    
+Must supply a floating point value as a command line argument.
+% 
+```
+
+This is what happens when you do not provide a value to examine.
+
+```text
+% ./a.out 1
+Component           Double                   Float                    Comment
+Value:              1                        1                        Delta(F - D): 0               
+Sign:               0                        0                        
+Exponent (hex):     3ff                      7f                       
+De-biased (dec):    0                        0                        
+Fraction (hex):     0                        0                        
+Halves:             0                        0                        
+Quarters:           0                        0                        
+Eighths:            0                        0                        
+Sixteenths:         0                        0                        
+Thirty seconds:     0                        0                        
+%
+```
+
+Above, we examine the value of 1.
+
+On the line marked "Value" you can see the values represented as double precision and as single precious. Under "Comment" you can see that there
+is no difference between the double and the single precision numbers.
+
+| Line | Meaning |
+| ---- | ------- |
+| Sign | 1 is a positive number so the sign bits are 0 |
+| Exponent | First, notice that the double precision exponent is 11 bits wide while the single precision exponent is only 8 bits wide. Next, notice the values... 1023 and 127 respectively. The value of 1 is 1 raised to the power of 0 base 2. So why 1023 or 127?<br/>There is no sign bit for the exponent yet the exponent must support negative numbers. It does this by incorporating an offset of 1023 and 127 respectively (where both work out to a value of 0). Anything above 1023 and 127 are positive exponents. Anything below these values are negative exponents.
+| De-biased | These are the values of the exponent with their bias removed. Notice they work out to 0. So, the value of 1 is represented by 1 raised to the power of 0 base 2. |
+| Fraction | Zero??? Where's the 1 that we've been talking about get stored? It isn't. A value of 1 is always assumed to be the only value in front of the decimal place in a `float` or `double`. Every floating point value is 1 plus a fraction all raised to some power base 2. |
+| Halves | There are no halves in the value of 1.|
+| Quarters | There are no quarters in the value of 1.|
+| Eighths | There are no eighths in the value of 1.|
+| Sixteenths | There are no sixteenths in the value of 1.|
+| Thirty Seconds | There are no thirty seconds in the value of 1.|
+
+Of course, there are more fractional values to `float` and `doubles` but listing them all wouldn't be a fun tasks and we're all about fun. :)
+
+How about a value of 1.5?
+
+```text
+Component           Double                   Float                    Comment
+Value:              1.5                      1.5                      Delta(F - D): 0               
+Sign:               0                        0                        
+Exponent (hex):     3ff                      7f                       
+De-biased (dec):    0                        0                        
+Fraction (hex):     8000000000000            400000                   
+Halves:             1                        1                        
+Quarters:           0                        0                        
+Eighths:            0                        0                        
+Sixteenths:         0                        0                        
+Thirty seconds:     0                        0
+```
+
+The only difference is that there is a bit turned on in the fraction. It is the most significant bit... there is a half in one and a half.
+
+1 ^ 0 = 1 +
+
+1 ^ -1 = &#189;
+
+Altogether makes 1.5.
+
+How about 1.875?
+
+```text
+Component           Double                   Float                    Comment
+Value:              1.875                    1.875                    Delta(F - D): 0               
+Sign:               0                        0                        
+Exponent (hex):     3ff                      7f                       
+De-biased (dec):    0                        0                        
+Fraction (hex):     e000000000000            700000                   
+Halves:             1                        1                        
+Quarters:           1                        1                        
+Eighths:            1                        1                        
+Sixteenths:         0                        0                        
+Thirty seconds:     0                        0
+```
+
+This says 1.875 is:
+
+1 ^ 0 = 1 +
+
+1 ^ -1 = &#189; +
+
+1 ^ -2 = &#188; +
+
+1 ^ -3 = &#8539;
+
+How about 8.375? This is the first time we are looking at
+a value which increases the (de-biased) exponent to non-zero.
+Things get a little more complicated.
+
+```text
+Component           Double                   Float                    Comment
+Value:              8.375                    8.375                    Delta(F - D): 0               
+Sign:               0                        0                        
+Exponent (hex):     402                      82                       
+De-biased (dec):    3                        3                        
+Fraction (hex):     c00000000000             60000                    
+Halves:             0                        0                        
+Quarters:           0                        0                        
+Eighths:            0                        0                        
+Sixteenths:         0                        0                        
+Thirty seconds:     1                        1    
+```
+
+Notice the exponent has changed. This says:
+
+1 ^ 0 = 1 +
+
+1 ^ -1 = &#189; +
+
+1 ^ -2 = &#188; +
+
+1 ^ -3 = &#8539;