added complex ARM instructions

This commit is contained in:
Perry Kivolowitz 2022-06-18 11:32:19 -05:00
parent b3f1a63e51
commit 743550c02d
6 changed files with 234 additions and 140 deletions

View file

@ -212,12 +212,72 @@ side effect to changing this order.
`lsl` means "left shift logical" which fills the right side recently `lsl` means "left shift logical" which fills the right side recently
vacated bits with zero. vacated bits with zero.
Now, we present a more sophisticated version of `SetB`:
```asm
SetB: ldrb w3, [x0] // 1
bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3 // 2
strb w3, [x0] // 3
ret // 4
```
Whoa. Nine instructions down to four! What the heck is `bfi`?
`bfi dst, src, start, width` copies `width` bits starting at 0 in `src`
to bits starting at `start` in `dst`. It obviates the need for `line 2` in
the naive code because it plucks only bits 0 and 1 from the original value
of `w1`. The `bfi` then internally does the shift appropriate to move
bit 0 of the original `w1` to bit `start` along with `width - 1`
subsequent bits.
Some might argue that instructions like `bfi` (and `ubfiz` described
below) is an example of `ISA creep` where ISA's get
more and more cumbersome with the latest instructions du jure. This is
definitely true in the x86 ISA. Perhaps this is true in the AARCH64 ISA
as well, but certainly not to the extent of the x86. Remember that the ARM
family of processors are examples of RISC machines - *reduced instruction
set* architectures.
UBFIZ dest, src, start, width
zeros dest
copies src starting at 0 to bits start to start + width - 1.
Notice this version is two instructions shorter.
Part of the savings is the use of `ubfiz`.
`ubfiz` stands for Unsigned Bit Field Insert in Zeros. Wow.
This instruction does the following:
* zeros the entire destination register
* copies the indicated source register bits to the destination
Finally, we come to handling field `c`. Recall `c` is 5 bits long starting
at bit 3.
Clearing the bits in `c` is easily accomplished:
```c ```c
void ClearC(unsigned char * byte) { void ClearC(unsigned char * byte) {
*byte &= 7; // squashes bits 3 to 7 to 0 *byte &= 7; // squashes bits 3 to 7 to 0
} }
```
This is optimally implemented using:
```asm
ClearC: ldrb w1, [x0] // 1
and w1, w1, 7 // 2
strb w1, [x0] // 3
ret // 4
```
```c
void SetC(unsigned char * byte, unsigned char value) { void SetC(unsigned char * byte, unsigned char value) {
value &= 0x1F; // ensures only bits 0 to 4 can be set value &= 0x1F; // ensures only bits 0 to 4 can be set
*byte &= ~(0x1F << 3); // squashes correct bits in byte *byte &= ~(0x1F << 3); // squashes correct bits in byte
@ -228,7 +288,23 @@ void SetC(unsigned char * byte, unsigned char value) {
In naive assembly language, these functions would look like this: In naive assembly language, these functions would look like this:
```asm ```asm
SetC: ldrb w3, [x0] // 1
mov w2, 0x1F // 2
and w1, w1, w2 // 3
lsl w1, w1, 3 // 4
lsl w2, w2, 3 // 5
mvn w2, w2 // 6
and w3, w3, w2 // 7
orr w3, w3, w1 // 8
strb w3, [x0] // 9
ret // 10
``` ```
```asm
SetC: ldrb w2, [x0] // put *byte into w2 // 1
ubfiz w1, w1, 3, 5 // zero new w1, copy bits 0..4 to 3..7 // 2
and w2, w2, 7 // preserve only 1st 3 bits in *byte // 3
orr w2, w2, w1 // or in value into *byte // 4
strb w2, [x0] // 5
ret // 6
```

View file

@ -14,7 +14,8 @@ ClearA: ldrb w1, [x0]
ret ret
ClearB: ldrb w1, [x0] ClearB: ldrb w1, [x0]
and w1, w1, 0xF9 mov w2, 0xF9
and w1, w1, w2
strb w1, [x0] strb w1, [x0]
ret ret
@ -30,27 +31,15 @@ SetA: ldrb w1, [x0]
SetB: ldrb w3, [x0] SetB: ldrb w3, [x0]
and w1, w1, 3 // value &= 3 bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3
lsl w1, w1, 1
mov w2, 6
mvn w2, w2
and w3, w3, w2 // B is cleared
orr w3, w3, w1
strb w3, [x0] strb w3, [x0]
ret ret
SetC: ldrb w3, [x0] SetC: ldrb w2, [x0] // put *byte into w2
ubfiz w1, w1, 3, 5 // zero new w1, copy bits 0..4 to 3..7
mov w2, 0x1F and w2, w2, 7 // preserve only 1st 3 bits in *byte
and w1, w1, w2 orr w2, w2, w1 // or in value into *byte
lsl w1, w1, 3 strb w2, [x0]
lsl w2, w2, 3
mvn w2, w2
and w3, w3, w2
orr w3, w3, w1
strb w3, [x0]
ret ret
.end .end

View file

@ -1,10 +1,5 @@
SetB: ldrb w3, [x0] SetB: ldrb w3, [x0]
and w1, w1, 3 // value &= 3 bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3
lsl w1, w1, 1
mov w2, 6
mvn w2, w2
and w3, w3, w2 // B is cleared
orr w3, w3, w1
strb w3, [x0] strb w3, [x0]
ret ret

View file

@ -8,7 +8,8 @@ struct BF {
unsigned char noBF = 0; unsigned char noBF = 0;
#define C //#define C
#undef C
#ifdef C #ifdef C
/* Note the absence of defensive programming such as checking /* Note the absence of defensive programming such as checking
to ensure that byte is not null and that bit_number is not to ensure that byte is not null and that bit_number is not
@ -59,11 +60,11 @@ int main() {
bf.b = 2; bf.b = 2;
bf.c = 3; bf.c = 3;
ClearA(&noBF); SetA(&noBF);
SetB(&noBF, 1); SetB(&noBF, 2);
SetC(&noBF, 7); SetC(&noBF, 3);
printf("noBF should be 0x3A - value: 0x%X\n", (unsigned int) noBF); printf("noBF should be 0x1D - value: 0x%X\n", (unsigned int) noBF);
printf("bf should be 0x1D - value: 0x%X\n", (unsigned int) *((unsigned char *) &bf)); printf("bf should be 0x1D - value: 0x%X\n", (unsigned int) *((unsigned char *) &bf));
return 0; return 0;
} }

View file

@ -1,129 +1,151 @@
.section __TEXT,__text,regular,pure_instructions .arch armv8-a
.build_version macos, 12, 0 sdk_version 12, 3 .file "test.c"
.globl _ClearA ; -- Begin function ClearA .text
.p2align 2 .align 2
_ClearA: ; @ClearA .p2align 3,,7
.global ClearA
.type ClearA, %function
ClearA:
.LFB23:
.cfi_startproc .cfi_startproc
; %bb.0: ldrb w1, [x0]
ldrb w8, [x0] and w1, w1, -2
and w8, w8, #0xfe strb w1, [x0]
strb w8, [x0]
ret ret
.cfi_endproc .cfi_endproc
; -- End function .LFE23:
.globl _SetA ; -- Begin function SetA .size ClearA, .-ClearA
.p2align 2 .align 2
_SetA: ; @SetA .p2align 3,,7
.global SetA
.type SetA, %function
SetA:
.LFB24:
.cfi_startproc .cfi_startproc
; %bb.0: ldrb w1, [x0]
ldrb w8, [x0] orr w1, w1, 1
orr w8, w8, #0x1 strb w1, [x0]
strb w8, [x0]
ret ret
.cfi_endproc .cfi_endproc
; -- End function .LFE24:
.globl _ClearB ; -- Begin function ClearB .size SetA, .-SetA
.p2align 2 .align 2
_ClearB: ; @ClearB .p2align 3,,7
.global ClearB
.type ClearB, %function
ClearB:
.LFB25:
.cfi_startproc .cfi_startproc
; %bb.0: ldrb w1, [x0]
ldrb w8, [x0] and w1, w1, -7
and w8, w8, #0xfffffff9 strb w1, [x0]
strb w8, [x0]
ret ret
.cfi_endproc .cfi_endproc
; -- End function .LFE25:
.globl _SetB ; -- Begin function SetB .size ClearB, .-ClearB
.p2align 2 .align 2
_SetB: ; @SetB .p2align 3,,7
.global SetB
.type SetB, %function
SetB:
.LFB26:
.cfi_startproc .cfi_startproc
; %bb.0: ldrb w2, [x0]
ldrb w8, [x0] bfi w2, w1, 1, 2
and w8, w8, #0xfffffff9 strb w2, [x0]
ubfiz w9, w1, #1, #2
orr w8, w8, w9
strb w8, [x0]
ret ret
.cfi_endproc .cfi_endproc
; -- End function .LFE26:
.globl _ClearC ; -- Begin function ClearC .size SetB, .-SetB
.p2align 2 .align 2
_ClearC: ; @ClearC .p2align 3,,7
.global ClearC
.type ClearC, %function
ClearC:
.LFB27:
.cfi_startproc .cfi_startproc
; %bb.0: ldrb w1, [x0]
ldrb w8, [x0] and w1, w1, 7
and w8, w8, #0x7 strb w1, [x0]
strb w8, [x0]
ret ret
.cfi_endproc .cfi_endproc
; -- End function .LFE27:
.globl _SetC ; -- Begin function SetC .size ClearC, .-ClearC
.p2align 2 .align 2
_SetC: ; @SetC .p2align 3,,7
.global SetC
.type SetC, %function
SetC:
.LFB28:
.cfi_startproc .cfi_startproc
; %bb.0: ldrb w2, [x0] // load *byte into w2
ldrb w8, [x0] ubfiz w1, w1, 3, 5 // put zeros in bit 3 to 7 of value
bfi w8, w1, #3, #8 and w2, w2, 7 // blank bits 3 to 7 in *byte
strb w8, [x0] orr w2, w2, w1
strb w2, [x0]
ret ret
.cfi_endproc .cfi_endproc
; -- End function .LFE28:
.globl _main ; -- Begin function main .size SetC, .-SetC
.p2align 2 .section .rodata.str1.8,"aMS",@progbits,1
_main: ; @main .align 3
.LC0:
.string "noBF should be 0x3A - value: 0x%X\n"
.align 3
.LC1:
.string "bf should be 0x1D - value: 0x%X\n"
.section .text.startup,"ax",@progbits
.align 2
.p2align 3,,7
.global main
.type main, %function
main:
.LFB29:
.cfi_startproc .cfi_startproc
; %bb.0: stp x29, x30, [sp, -32]!
sub sp, sp, #48 .cfi_def_cfa_offset 32
stp x20, x19, [sp, #16] ; 16-byte Folded Spill .cfi_offset 29, -32
stp x29, x30, [sp, #32] ; 16-byte Folded Spill .cfi_offset 30, -24
add x29, sp, #32 mov w5, 2
.cfi_def_cfa w29, 16 mov w4, 3
.cfi_offset w30, -8 mov x29, sp
.cfi_offset w29, -16 ldrb w0, [sp, 24]
.cfi_offset w19, -24 adrp x2, .LANCHOR0
.cfi_offset w20, -32 adrp x1, .LC0
ldrb w8, [sp, #15] add x1, x1, :lo12:.LC0
orr w8, w8, #0x1 orr w0, w0, 1
strb w8, [sp, #15] strb w0, [sp, 24]
ldrb w8, [sp, #15] mov w0, 58
and w8, w8, #0xfffffff9 strb w0, [x2, #:lo12:.LANCHOR0]
orr w8, w8, #0x4 ldrb w3, [sp, 24]
strb w8, [sp, #15] mov w2, 58
ldrb w8, [sp, #15] mov w0, 1
mov w19, #24 bfi w3, w5, 1, 2
bfxil w19, w8, #0, #3 strb w3, [sp, 24]
strb w19, [sp, #15] ldrb w3, [sp, 24]
mov w8, #58 bfi w3, w4, 3, 5
adrp x9, _noBF@PAGE strb w3, [sp, 24]
strb w8, [x9, _noBF@PAGEOFF] bl __printf_chk
str x8, [sp] ldrb w2, [sp, 24]
Lloh0: adrp x1, .LC1
adrp x0, l_.str@PAGE mov w0, 1
Lloh1: add x1, x1, :lo12:.LC1
add x0, x0, l_.str@PAGEOFF bl __printf_chk
bl _printf mov w0, 0
str x19, [sp] ldp x29, x30, [sp], 32
Lloh2: .cfi_restore 30
adrp x0, l_.str.1@PAGE .cfi_restore 29
Lloh3: .cfi_def_cfa_offset 0
add x0, x0, l_.str.1@PAGEOFF
bl _printf
mov w0, #0
ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
add sp, sp, #48
ret ret
.loh AdrpAdd Lloh2, Lloh3
.loh AdrpAdd Lloh0, Lloh1
.cfi_endproc .cfi_endproc
; -- End function .LFE29:
.globl _noBF ; @noBF .size main, .-main
.zerofill __DATA,__common,_noBF,1,0 .global noBF
.section __TEXT,__cstring,cstring_literals .bss
l_.str: ; @.str .set .LANCHOR0,. + 0
.asciz "noBF should be 0x3A - value: 0x%X\n" .type noBF, %object
.size noBF, 1
l_.str.1: ; @.str.1 noBF:
.asciz "bf should be 0x1D - value: 0x%X\n" .zero 1
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
.subsections_via_symbols .section .note.GNU-stack,"",@progbits

View file

@ -0,0 +1,11 @@
.global main
.text
.align 2
main: str x30, [sp, -16]!
mov w1, 0xFF
mov w2, 0x0A
ubfiz w1, w2, 0, 4
ldr x30, [sp], 16
mov w0, wzr
ret