From dc6ac5a73a24584df2b2c0e4ca02e506ef6c59c1 Mon Sep 17 00:00:00 2001 From: Perry Kivolowitz Date: Fri, 17 Jun 2022 21:30:47 -0500 Subject: [PATCH] beginnings of bit fields --- section_2/bitfields/README.md | 111 +++++++++++++++++++++++ section_2/bitfields/naive.s | 65 ++++++++++++++ section_2/bitfields/test.c | 69 ++++++++++++++ section_2/bitfields/test.s | 164 ++++++++++++++++++++++++++++++++++ 4 files changed, 409 insertions(+) create mode 100644 section_2/bitfields/README.md create mode 100644 section_2/bitfields/naive.s create mode 100644 section_2/bitfields/test.c create mode 100644 section_2/bitfields/test.s diff --git a/section_2/bitfields/README.md b/section_2/bitfields/README.md new file mode 100644 index 0000000..98b081f --- /dev/null +++ b/section_2/bitfields/README.md @@ -0,0 +1,111 @@ +# Section 2 / Bit Fields + +## Overview + +Many C and C++ programmers have never seen bit fields. + +Bit fields are a +feature of the C and C++ language which completely hide what is often +called "bit bashing". + +Bit bashing is the manipulation of individual bits. Bit +bashing goes to the very core of the C language. Remember that C is a +high level assembly language, as we argue in Section 1 of this book. +And C is the (later) language in which Unix was implemented and indeed, +C was +developed specifically to implement Unix. + +Since an operating system directly +interfaces with hardware - the C language grew to have features +to aid Unix implementers. + +*With that said, consider this WARNING: the ordering of bits in a bit +field is not guaranteed to be the same on different platforms and even +between different compilers on the same platform.* + +Bit fields are implemented within a `struct` by appending a colon plus +a number after the declaration of integer types. + +For example: + +```c +struct BF { + unsigned char a : 1; + unsigned char b : 2; + unsigned char c : 5; +}; +``` + +The above declares a `struct` whose size is 1 byte. Members of the `struct` +are `a`, `b` and `c` which are 1, 2 and 5 bits in size, respectively. + +## Bit Fields Aren't Just For Hardware + +Consider a data structure for which there will be potentially millions of +instances in RAM. Or, perhaps billions of instances on disc. Suppose you +need 8 boolean members in every instance. The C++ standard does not +define the size of a `bool` instead leaving it to be implementation +dependent. Some implementations equate `bool` to `int`, four bytes in +length. Some implement `bool` with a `char`, or 1 byte in length. + +Let's assume the smallest case and equate a `bool` with `char`. Our +`struct`, for which there may be millions or billions of instances +requires 8 `bool` so therefore 8 bytes. Times millions or billions. + +Ouch. + +Bit fields can come to your aid here by using a single bit per boolean +value. In the best case, 8 bytes collapse to 1 byte. In a worse case, +8 x 4 = 32 bytes collapsed into 1. + +## Without Bit Fields + +Let's assume we're working with a byte that is comprised of three +fields layed out as in `struct BF` above. That is, a one, two and +five bit field inside one byte. + +Without bit fields, we would have to write this code: + +```c +/* Note the absence of defensive programming such as checking + to ensure that byte is not null and that bit_number is not + too large. +*/ + +void ClearA(unsigned char * byte) { + *byte &= ~1; +} + +void SetA(unsigned char * byte) { + *byte &= ~1; + *byte |= 1; +} + +void ClearB(unsigned char * byte) { + *byte &= ~6; +} + +void SetB(unsigned char * byte, unsigned char value) { + value &= 3; // ensures only bits 0 and 1 can be set + *byte &= ~6; // clears bits 1 and 2 in byte + *byte |= (value << 1); // stores bits 0 and 1 into bits 2 and 3 +} + +void ClearC(unsigned char * byte) { + *byte &= 7; // squashes bits 3 to 7 to 0 +} + +void SetC(unsigned char * byte, unsigned char value) { + value &= 0x1F; // ensures only bits 0 to 4 can be set + *byte &= ~(0x1F << 3); // squashes correct bits in byte + *byte |= (value << 3); // or's in the bits at the right place +} +``` + +In naive assembly language, these functions would look like this: + +```asm +``` + + + diff --git a/section_2/bitfields/naive.s b/section_2/bitfields/naive.s new file mode 100644 index 0000000..c62530c --- /dev/null +++ b/section_2/bitfields/naive.s @@ -0,0 +1,65 @@ + .global SetA + .global SetB + .global SetC + .global ClearA + .global ClearB + .global ClearC + + .text + .align 2 + +ClearA: ldrb w1, [x0] + mov w2, 1 + mvn w2, w2 + and w1, w1, w2 + strb w1, [x0] + ret + +ClearB: ldrb w1, [x0] + mov w2, 6 + mvn w2, w2 + and w1, w1, w2 + strb w1, [x0] + ret + +ClearC: ldrb w1, [x0] + and w1, w1, 7 + strb w1, [x0] + ret + +SetA: ldrb w1, [x0] + mov w2, 1 + mvn w2, w2 + and w1, w1, w2 + orr w1, w1, 1 + strb w1, [x0] + ret + + +SetB: ldrb w3, [x0] + and w1, w1, 3 // value &= 3 + lsl w1, w1, 1 + mov w2, 6 + mvn w2, w2 + and w3, w3, w2 // B is cleared + orr w3, w3, w1 + strb w3, [x0] + ret + +SetC: ldrb w3, [x0] + + mov w2, 0x1F + and w1, w1, w2 + lsl w1, w1, 3 + + lsl w2, w2, 3 + mvn w2, w2 + and w3, w3, w2 + + orr w3, w3, w1 + strb w3, [x0] + ret + + .end + + \ No newline at end of file diff --git a/section_2/bitfields/test.c b/section_2/bitfields/test.c new file mode 100644 index 0000000..7d74546 --- /dev/null +++ b/section_2/bitfields/test.c @@ -0,0 +1,69 @@ +#include + +struct BF { + unsigned char a : 1; + unsigned char b : 2; + unsigned char c : 5; +}; + +unsigned char noBF = 0; + +#undef C +#ifdef C +/* Note the absence of defensive programming such as checking + to ensure that byte is not null and that bit_number is not + too large. +*/ + +void ClearA(unsigned char * byte) { + *byte &= ~1; +} + +void SetA(unsigned char * byte) { + *byte &= ~1; + *byte |= 1; +} + +void ClearB(unsigned char * byte) { + *byte &= ~6; +} + +void SetB(unsigned char * byte, unsigned char value) { + value &= 3; // ensures only bits 0 and 1 can be set + *byte &= ~6; // clears bits 1 and 2 in byte + *byte |= (value << 1); // stores bits 0 and 1 into bits 2 and 3 +} + +void ClearC(unsigned char * byte) { + *byte &= 7; // squashes bits 3 to 7 to 0 +} + +void SetC(unsigned char * byte, unsigned char value) { + value &= 0x1F; // ensures only bits 0 to 4 can be set + *byte &= ~(0x1F << 3); // squashes correct bits in byte + *byte |= (value << 3); // or's in the bits at the right place +} +#else +extern void SetA(unsigned char *); +extern void SetB(unsigned char *, unsigned char); +extern void SetC(unsigned char *, unsigned char); +extern void ClearA(unsigned char *); +extern void ClearB(unsigned char *); +extern void ClearC(unsigned char *); +#endif + +int main() { + volatile struct BF bf; + + bf.a = 1; + bf.b = 2; + bf.c = 3; + + ClearA(&noBF); + SetB(&noBF, 1); + SetC(&noBF, 7); + + printf("noBF should be 0x3A - value: 0x%X\n", (unsigned int) noBF); + printf("bf should be 0x1D - value: 0x%X\n", (unsigned int) *((unsigned char *) &bf)); + return 0; +} diff --git a/section_2/bitfields/test.s b/section_2/bitfields/test.s new file mode 100644 index 0000000..9e006c9 --- /dev/null +++ b/section_2/bitfields/test.s @@ -0,0 +1,164 @@ + .arch armv8-a + .file "test.c" + .text + .align 2 + .p2align 3,,7 + .global Foo + .type Foo, %function +Foo: +.LFB23: + .cfi_startproc + and w0, w0, 255 + mvn w0, w0 + ret + .cfi_endproc +.LFE23: + .size Foo, .-Foo + .align 2 + .p2align 3,,7 + .global ClearA + .type ClearA, %function +ClearA: +.LFB24: + .cfi_startproc + ldrb w1, [x0] + and w1, w1, -2 + strb w1, [x0] + ret + .cfi_endproc +.LFE24: + .size ClearA, .-ClearA + .align 2 + .p2align 3,,7 + .global SetA + .type SetA, %function +SetA: +.LFB25: + .cfi_startproc + ldrb w1, [x0] + orr w1, w1, 1 + strb w1, [x0] + ret + .cfi_endproc +.LFE25: + .size SetA, .-SetA + .align 2 + .p2align 3,,7 + .global ClearB + .type ClearB, %function +ClearB: +.LFB26: + .cfi_startproc + ldrb w1, [x0] + and w1, w1, -7 + strb w1, [x0] + ret + .cfi_endproc +.LFE26: + .size ClearB, .-ClearB + .align 2 + .p2align 3,,7 + .global SetB + .type SetB, %function +SetB: +.LFB27: + .cfi_startproc + ldrb w2, [x0] + bfi w2, w1, 1, 2 + strb w2, [x0] + ret + .cfi_endproc +.LFE27: + .size SetB, .-SetB + .align 2 + .p2align 3,,7 + .global ClearC + .type ClearC, %function +ClearC: +.LFB28: + .cfi_startproc + ldrb w1, [x0] + and w1, w1, 7 + strb w1, [x0] + ret + .cfi_endproc +.LFE28: + .size ClearC, .-ClearC + .align 2 + .p2align 3,,7 + .global SetC + .type SetC, %function +SetC: +.LFB29: + .cfi_startproc + ldrb w2, [x0] + ubfiz w1, w1, 3, 5 + and w2, w2, 7 + orr w2, w2, w1 + strb w2, [x0] + ret + .cfi_endproc +.LFE29: + .size SetC, .-SetC + .section .rodata.str1.8,"aMS",@progbits,1 + .align 3 +.LC0: + .string "noBF should be 0x3A - value: 0x%X\n" + .align 3 +.LC1: + .string "bf should be 0x1D - value: 0x%X\n" + .section .text.startup,"ax",@progbits + .align 2 + .p2align 3,,7 + .global main + .type main, %function +main: +.LFB30: + .cfi_startproc + stp x29, x30, [sp, -32]! + .cfi_def_cfa_offset 32 + .cfi_offset 29, -32 + .cfi_offset 30, -24 + mov w5, 2 + mov w4, 3 + mov x29, sp + ldrb w0, [sp, 24] + adrp x2, .LANCHOR0 + adrp x1, .LC0 + add x1, x1, :lo12:.LC0 + orr w0, w0, 1 + strb w0, [sp, 24] + mov w0, 58 + strb w0, [x2, #:lo12:.LANCHOR0] + ldrb w3, [sp, 24] + mov w2, 58 + mov w0, 1 + bfi w3, w5, 1, 2 + strb w3, [sp, 24] + ldrb w3, [sp, 24] + bfi w3, w4, 3, 5 + strb w3, [sp, 24] + bl __printf_chk + ldrb w2, [sp, 24] + adrp x1, .LC1 + mov w0, 1 + add x1, x1, :lo12:.LC1 + bl __printf_chk + mov w0, 0 + ldp x29, x30, [sp], 32 + .cfi_restore 30 + .cfi_restore 29 + .cfi_def_cfa_offset 0 + ret + .cfi_endproc +.LFE30: + .size main, .-main + .global noBF + .bss + .set .LANCHOR0,. + 0 + .type noBF, %object + .size noBF, 1 +noBF: + .zero 1 + .ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0" + .section .note.GNU-stack,"",@progbits