diff --git a/section_2/bitfields/README.md b/section_2/bitfields/README.md index 031dd41..e59388f 100644 --- a/section_2/bitfields/README.md +++ b/section_2/bitfields/README.md @@ -212,12 +212,72 @@ side effect to changing this order. `lsl` means "left shift logical" which fills the right side recently vacated bits with zero. +Now, we present a more sophisticated version of `SetB`: + +```asm +SetB: ldrb w3, [x0] // 1 + bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3 // 2 + strb w3, [x0] // 3 + ret // 4 +``` + +Whoa. Nine instructions down to four! What the heck is `bfi`? + +`bfi dst, src, start, width` copies `width` bits starting at 0 in `src` +to bits starting at `start` in `dst`. It obviates the need for `line 2` in +the naive code because it plucks only bits 0 and 1 from the original value +of `w1`. The `bfi` then internally does the shift appropriate to move +bit 0 of the original `w1` to bit `start` along with `width - 1` +subsequent bits. + +Some might argue that instructions like `bfi` (and `ubfiz` described +below) is an example of `ISA creep` where ISA's get +more and more cumbersome with the latest instructions du jure. This is +definitely true in the x86 ISA. Perhaps this is true in the AARCH64 ISA +as well, but certainly not to the extent of the x86. Remember that the ARM +family of processors are examples of RISC machines - *reduced instruction +set* architectures. + +UBFIZ dest, src, start, width + +zeros dest +copies src starting at 0 to bits start to start + width - 1. + +Notice this version is two instructions shorter. + +Part of the savings is the use of `ubfiz`. + + + +`ubfiz` stands for Unsigned Bit Field Insert in Zeros. Wow. + +This instruction does the following: + +* zeros the entire destination register +* copies the indicated source register bits to the destination + + +Finally, we come to handling field `c`. Recall `c` is 5 bits long starting +at bit 3. + +Clearing the bits in `c` is easily accomplished: ```c void ClearC(unsigned char * byte) { *byte &= 7; // squashes bits 3 to 7 to 0 } +``` +This is optimally implemented using: + +```asm +ClearC: ldrb w1, [x0] // 1 + and w1, w1, 7 // 2 + strb w1, [x0] // 3 + ret // 4 +``` + +```c void SetC(unsigned char * byte, unsigned char value) { value &= 0x1F; // ensures only bits 0 to 4 can be set *byte &= ~(0x1F << 3); // squashes correct bits in byte @@ -228,7 +288,23 @@ void SetC(unsigned char * byte, unsigned char value) { In naive assembly language, these functions would look like this: ```asm +SetC: ldrb w3, [x0] // 1 + mov w2, 0x1F // 2 + and w1, w1, w2 // 3 + lsl w1, w1, 3 // 4 + lsl w2, w2, 3 // 5 + mvn w2, w2 // 6 + and w3, w3, w2 // 7 + orr w3, w3, w1 // 8 + strb w3, [x0] // 9 + ret // 10 ``` - - +```asm +SetC: ldrb w2, [x0] // put *byte into w2 // 1 + ubfiz w1, w1, 3, 5 // zero new w1, copy bits 0..4 to 3..7 // 2 + and w2, w2, 7 // preserve only 1st 3 bits in *byte // 3 + orr w2, w2, w1 // or in value into *byte // 4 + strb w2, [x0] // 5 + ret // 6 +``` diff --git a/section_2/bitfields/sophisticated.s b/section_2/bitfields/sophisticated.s index b7b00e2..267f1e2 100644 --- a/section_2/bitfields/sophisticated.s +++ b/section_2/bitfields/sophisticated.s @@ -14,7 +14,8 @@ ClearA: ldrb w1, [x0] ret ClearB: ldrb w1, [x0] - and w1, w1, 0xF9 + mov w2, 0xF9 + and w1, w1, w2 strb w1, [x0] ret @@ -30,27 +31,15 @@ SetA: ldrb w1, [x0] SetB: ldrb w3, [x0] - and w1, w1, 3 // value &= 3 - lsl w1, w1, 1 - mov w2, 6 - mvn w2, w2 - and w3, w3, w2 // B is cleared - orr w3, w3, w1 + bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3 strb w3, [x0] ret -SetC: ldrb w3, [x0] - - mov w2, 0x1F - and w1, w1, w2 - lsl w1, w1, 3 - - lsl w2, w2, 3 - mvn w2, w2 - and w3, w3, w2 - - orr w3, w3, w1 - strb w3, [x0] +SetC: ldrb w2, [x0] // put *byte into w2 + ubfiz w1, w1, 3, 5 // zero new w1, copy bits 0..4 to 3..7 + and w2, w2, 7 // preserve only 1st 3 bits in *byte + orr w2, w2, w1 // or in value into *byte + strb w2, [x0] ret .end diff --git a/section_2/bitfields/temp.txt b/section_2/bitfields/temp.txt index 3bbc24e..6036bb0 100644 --- a/section_2/bitfields/temp.txt +++ b/section_2/bitfields/temp.txt @@ -1,10 +1,5 @@ SetB: ldrb w3, [x0] - and w1, w1, 3 // value &= 3 - lsl w1, w1, 1 - mov w2, 6 - mvn w2, w2 - and w3, w3, w2 // B is cleared - orr w3, w3, w1 + bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3 strb w3, [x0] ret \ No newline at end of file diff --git a/section_2/bitfields/test.c b/section_2/bitfields/test.c index fcdad5d..77e351a 100644 --- a/section_2/bitfields/test.c +++ b/section_2/bitfields/test.c @@ -8,7 +8,8 @@ struct BF { unsigned char noBF = 0; -#define C +//#define C +#undef C #ifdef C /* Note the absence of defensive programming such as checking to ensure that byte is not null and that bit_number is not @@ -59,11 +60,11 @@ int main() { bf.b = 2; bf.c = 3; - ClearA(&noBF); - SetB(&noBF, 1); - SetC(&noBF, 7); + SetA(&noBF); + SetB(&noBF, 2); + SetC(&noBF, 3); - printf("noBF should be 0x3A - value: 0x%X\n", (unsigned int) noBF); + printf("noBF should be 0x1D - value: 0x%X\n", (unsigned int) noBF); printf("bf should be 0x1D - value: 0x%X\n", (unsigned int) *((unsigned char *) &bf)); return 0; } diff --git a/section_2/bitfields/test.s b/section_2/bitfields/test.s index 3aa4c91..4fc9ade 100644 --- a/section_2/bitfields/test.s +++ b/section_2/bitfields/test.s @@ -1,129 +1,151 @@ - .section __TEXT,__text,regular,pure_instructions - .build_version macos, 12, 0 sdk_version 12, 3 - .globl _ClearA ; -- Begin function ClearA - .p2align 2 -_ClearA: ; @ClearA + .arch armv8-a + .file "test.c" + .text + .align 2 + .p2align 3,,7 + .global ClearA + .type ClearA, %function +ClearA: +.LFB23: .cfi_startproc -; %bb.0: - ldrb w8, [x0] - and w8, w8, #0xfe - strb w8, [x0] + ldrb w1, [x0] + and w1, w1, -2 + strb w1, [x0] ret .cfi_endproc - ; -- End function - .globl _SetA ; -- Begin function SetA - .p2align 2 -_SetA: ; @SetA +.LFE23: + .size ClearA, .-ClearA + .align 2 + .p2align 3,,7 + .global SetA + .type SetA, %function +SetA: +.LFB24: .cfi_startproc -; %bb.0: - ldrb w8, [x0] - orr w8, w8, #0x1 - strb w8, [x0] + ldrb w1, [x0] + orr w1, w1, 1 + strb w1, [x0] ret .cfi_endproc - ; -- End function - .globl _ClearB ; -- Begin function ClearB - .p2align 2 -_ClearB: ; @ClearB +.LFE24: + .size SetA, .-SetA + .align 2 + .p2align 3,,7 + .global ClearB + .type ClearB, %function +ClearB: +.LFB25: .cfi_startproc -; %bb.0: - ldrb w8, [x0] - and w8, w8, #0xfffffff9 - strb w8, [x0] + ldrb w1, [x0] + and w1, w1, -7 + strb w1, [x0] ret .cfi_endproc - ; -- End function - .globl _SetB ; -- Begin function SetB - .p2align 2 -_SetB: ; @SetB +.LFE25: + .size ClearB, .-ClearB + .align 2 + .p2align 3,,7 + .global SetB + .type SetB, %function +SetB: +.LFB26: .cfi_startproc -; %bb.0: - ldrb w8, [x0] - and w8, w8, #0xfffffff9 - ubfiz w9, w1, #1, #2 - orr w8, w8, w9 - strb w8, [x0] + ldrb w2, [x0] + bfi w2, w1, 1, 2 + strb w2, [x0] ret .cfi_endproc - ; -- End function - .globl _ClearC ; -- Begin function ClearC - .p2align 2 -_ClearC: ; @ClearC +.LFE26: + .size SetB, .-SetB + .align 2 + .p2align 3,,7 + .global ClearC + .type ClearC, %function +ClearC: +.LFB27: .cfi_startproc -; %bb.0: - ldrb w8, [x0] - and w8, w8, #0x7 - strb w8, [x0] + ldrb w1, [x0] + and w1, w1, 7 + strb w1, [x0] ret .cfi_endproc - ; -- End function - .globl _SetC ; -- Begin function SetC - .p2align 2 -_SetC: ; @SetC +.LFE27: + .size ClearC, .-ClearC + .align 2 + .p2align 3,,7 + .global SetC + .type SetC, %function +SetC: +.LFB28: .cfi_startproc -; %bb.0: - ldrb w8, [x0] - bfi w8, w1, #3, #8 - strb w8, [x0] + ldrb w2, [x0] // load *byte into w2 + ubfiz w1, w1, 3, 5 // put zeros in bit 3 to 7 of value + and w2, w2, 7 // blank bits 3 to 7 in *byte + orr w2, w2, w1 + strb w2, [x0] ret .cfi_endproc - ; -- End function - .globl _main ; -- Begin function main - .p2align 2 -_main: ; @main +.LFE28: + .size SetC, .-SetC + .section .rodata.str1.8,"aMS",@progbits,1 + .align 3 +.LC0: + .string "noBF should be 0x3A - value: 0x%X\n" + .align 3 +.LC1: + .string "bf should be 0x1D - value: 0x%X\n" + .section .text.startup,"ax",@progbits + .align 2 + .p2align 3,,7 + .global main + .type main, %function +main: +.LFB29: .cfi_startproc -; %bb.0: - sub sp, sp, #48 - stp x20, x19, [sp, #16] ; 16-byte Folded Spill - stp x29, x30, [sp, #32] ; 16-byte Folded Spill - add x29, sp, #32 - .cfi_def_cfa w29, 16 - .cfi_offset w30, -8 - .cfi_offset w29, -16 - .cfi_offset w19, -24 - .cfi_offset w20, -32 - ldrb w8, [sp, #15] - orr w8, w8, #0x1 - strb w8, [sp, #15] - ldrb w8, [sp, #15] - and w8, w8, #0xfffffff9 - orr w8, w8, #0x4 - strb w8, [sp, #15] - ldrb w8, [sp, #15] - mov w19, #24 - bfxil w19, w8, #0, #3 - strb w19, [sp, #15] - mov w8, #58 - adrp x9, _noBF@PAGE - strb w8, [x9, _noBF@PAGEOFF] - str x8, [sp] -Lloh0: - adrp x0, l_.str@PAGE -Lloh1: - add x0, x0, l_.str@PAGEOFF - bl _printf - str x19, [sp] -Lloh2: - adrp x0, l_.str.1@PAGE -Lloh3: - add x0, x0, l_.str.1@PAGEOFF - bl _printf - mov w0, #0 - ldp x29, x30, [sp, #32] ; 16-byte Folded Reload - ldp x20, x19, [sp, #16] ; 16-byte Folded Reload - add sp, sp, #48 + stp x29, x30, [sp, -32]! + .cfi_def_cfa_offset 32 + .cfi_offset 29, -32 + .cfi_offset 30, -24 + mov w5, 2 + mov w4, 3 + mov x29, sp + ldrb w0, [sp, 24] + adrp x2, .LANCHOR0 + adrp x1, .LC0 + add x1, x1, :lo12:.LC0 + orr w0, w0, 1 + strb w0, [sp, 24] + mov w0, 58 + strb w0, [x2, #:lo12:.LANCHOR0] + ldrb w3, [sp, 24] + mov w2, 58 + mov w0, 1 + bfi w3, w5, 1, 2 + strb w3, [sp, 24] + ldrb w3, [sp, 24] + bfi w3, w4, 3, 5 + strb w3, [sp, 24] + bl __printf_chk + ldrb w2, [sp, 24] + adrp x1, .LC1 + mov w0, 1 + add x1, x1, :lo12:.LC1 + bl __printf_chk + mov w0, 0 + ldp x29, x30, [sp], 32 + .cfi_restore 30 + .cfi_restore 29 + .cfi_def_cfa_offset 0 ret - .loh AdrpAdd Lloh2, Lloh3 - .loh AdrpAdd Lloh0, Lloh1 .cfi_endproc - ; -- End function - .globl _noBF ; @noBF -.zerofill __DATA,__common,_noBF,1,0 - .section __TEXT,__cstring,cstring_literals -l_.str: ; @.str - .asciz "noBF should be 0x3A - value: 0x%X\n" - -l_.str.1: ; @.str.1 - .asciz "bf should be 0x1D - value: 0x%X\n" - -.subsections_via_symbols +.LFE29: + .size main, .-main + .global noBF + .bss + .set .LANCHOR0,. + 0 + .type noBF, %object + .size noBF, 1 +noBF: + .zero 1 + .ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0" + .section .note.GNU-stack,"",@progbits diff --git a/section_2/bitfields/ubfiz.s b/section_2/bitfields/ubfiz.s new file mode 100644 index 0000000..41e2691 --- /dev/null +++ b/section_2/bitfields/ubfiz.s @@ -0,0 +1,11 @@ + .global main + .text + .align 2 + +main: str x30, [sp, -16]! + mov w1, 0xFF + mov w2, 0x0A + ubfiz w1, w2, 0, 4 + ldr x30, [sp], 16 + mov w0, wzr + ret