added complex ARM instructions

This commit is contained in:
Perry Kivolowitz 2022-06-18 11:32:19 -05:00
parent b3f1a63e51
commit 743550c02d
6 changed files with 234 additions and 140 deletions

View file

@ -212,12 +212,72 @@ side effect to changing this order.
`lsl` means "left shift logical" which fills the right side recently
vacated bits with zero.
Now, we present a more sophisticated version of `SetB`:
```asm
SetB: ldrb w3, [x0] // 1
bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3 // 2
strb w3, [x0] // 3
ret // 4
```
Whoa. Nine instructions down to four! What the heck is `bfi`?
`bfi dst, src, start, width` copies `width` bits starting at 0 in `src`
to bits starting at `start` in `dst`. It obviates the need for `line 2` in
the naive code because it plucks only bits 0 and 1 from the original value
of `w1`. The `bfi` then internally does the shift appropriate to move
bit 0 of the original `w1` to bit `start` along with `width - 1`
subsequent bits.
Some might argue that instructions like `bfi` (and `ubfiz` described
below) is an example of `ISA creep` where ISA's get
more and more cumbersome with the latest instructions du jure. This is
definitely true in the x86 ISA. Perhaps this is true in the AARCH64 ISA
as well, but certainly not to the extent of the x86. Remember that the ARM
family of processors are examples of RISC machines - *reduced instruction
set* architectures.
UBFIZ dest, src, start, width
zeros dest
copies src starting at 0 to bits start to start + width - 1.
Notice this version is two instructions shorter.
Part of the savings is the use of `ubfiz`.
`ubfiz` stands for Unsigned Bit Field Insert in Zeros. Wow.
This instruction does the following:
* zeros the entire destination register
* copies the indicated source register bits to the destination
Finally, we come to handling field `c`. Recall `c` is 5 bits long starting
at bit 3.
Clearing the bits in `c` is easily accomplished:
```c
void ClearC(unsigned char * byte) {
*byte &= 7; // squashes bits 3 to 7 to 0
}
```
This is optimally implemented using:
```asm
ClearC: ldrb w1, [x0] // 1
and w1, w1, 7 // 2
strb w1, [x0] // 3
ret // 4
```
```c
void SetC(unsigned char * byte, unsigned char value) {
value &= 0x1F; // ensures only bits 0 to 4 can be set
*byte &= ~(0x1F << 3); // squashes correct bits in byte
@ -228,7 +288,23 @@ void SetC(unsigned char * byte, unsigned char value) {
In naive assembly language, these functions would look like this:
```asm
SetC: ldrb w3, [x0] // 1
mov w2, 0x1F // 2
and w1, w1, w2 // 3
lsl w1, w1, 3 // 4
lsl w2, w2, 3 // 5
mvn w2, w2 // 6
and w3, w3, w2 // 7
orr w3, w3, w1 // 8
strb w3, [x0] // 9
ret // 10
```
```asm
SetC: ldrb w2, [x0] // put *byte into w2 // 1
ubfiz w1, w1, 3, 5 // zero new w1, copy bits 0..4 to 3..7 // 2
and w2, w2, 7 // preserve only 1st 3 bits in *byte // 3
orr w2, w2, w1 // or in value into *byte // 4
strb w2, [x0] // 5
ret // 6
```

View file

@ -14,7 +14,8 @@ ClearA: ldrb w1, [x0]
ret
ClearB: ldrb w1, [x0]
and w1, w1, 0xF9
mov w2, 0xF9
and w1, w1, w2
strb w1, [x0]
ret
@ -30,27 +31,15 @@ SetA: ldrb w1, [x0]
SetB: ldrb w3, [x0]
and w1, w1, 3 // value &= 3
lsl w1, w1, 1
mov w2, 6
mvn w2, w2
and w3, w3, w2 // B is cleared
orr w3, w3, w1
bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3
strb w3, [x0]
ret
SetC: ldrb w3, [x0]
mov w2, 0x1F
and w1, w1, w2
lsl w1, w1, 3
lsl w2, w2, 3
mvn w2, w2
and w3, w3, w2
orr w3, w3, w1
strb w3, [x0]
SetC: ldrb w2, [x0] // put *byte into w2
ubfiz w1, w1, 3, 5 // zero new w1, copy bits 0..4 to 3..7
and w2, w2, 7 // preserve only 1st 3 bits in *byte
orr w2, w2, w1 // or in value into *byte
strb w2, [x0]
ret
.end

View file

@ -1,10 +1,5 @@
SetB: ldrb w3, [x0]
and w1, w1, 3 // value &= 3
lsl w1, w1, 1
mov w2, 6
mvn w2, w2
and w3, w3, w2 // B is cleared
orr w3, w3, w1
bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3
strb w3, [x0]
ret

View file

@ -8,7 +8,8 @@ struct BF {
unsigned char noBF = 0;
#define C
//#define C
#undef C
#ifdef C
/* Note the absence of defensive programming such as checking
to ensure that byte is not null and that bit_number is not
@ -59,11 +60,11 @@ int main() {
bf.b = 2;
bf.c = 3;
ClearA(&noBF);
SetB(&noBF, 1);
SetC(&noBF, 7);
SetA(&noBF);
SetB(&noBF, 2);
SetC(&noBF, 3);
printf("noBF should be 0x3A - value: 0x%X\n", (unsigned int) noBF);
printf("noBF should be 0x1D - value: 0x%X\n", (unsigned int) noBF);
printf("bf should be 0x1D - value: 0x%X\n", (unsigned int) *((unsigned char *) &bf));
return 0;
}

View file

@ -1,129 +1,151 @@
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 12, 0 sdk_version 12, 3
.globl _ClearA ; -- Begin function ClearA
.p2align 2
_ClearA: ; @ClearA
.arch armv8-a
.file "test.c"
.text
.align 2
.p2align 3,,7
.global ClearA
.type ClearA, %function
ClearA:
.LFB23:
.cfi_startproc
; %bb.0:
ldrb w8, [x0]
and w8, w8, #0xfe
strb w8, [x0]
ldrb w1, [x0]
and w1, w1, -2
strb w1, [x0]
ret
.cfi_endproc
; -- End function
.globl _SetA ; -- Begin function SetA
.p2align 2
_SetA: ; @SetA
.LFE23:
.size ClearA, .-ClearA
.align 2
.p2align 3,,7
.global SetA
.type SetA, %function
SetA:
.LFB24:
.cfi_startproc
; %bb.0:
ldrb w8, [x0]
orr w8, w8, #0x1
strb w8, [x0]
ldrb w1, [x0]
orr w1, w1, 1
strb w1, [x0]
ret
.cfi_endproc
; -- End function
.globl _ClearB ; -- Begin function ClearB
.p2align 2
_ClearB: ; @ClearB
.LFE24:
.size SetA, .-SetA
.align 2
.p2align 3,,7
.global ClearB
.type ClearB, %function
ClearB:
.LFB25:
.cfi_startproc
; %bb.0:
ldrb w8, [x0]
and w8, w8, #0xfffffff9
strb w8, [x0]
ldrb w1, [x0]
and w1, w1, -7
strb w1, [x0]
ret
.cfi_endproc
; -- End function
.globl _SetB ; -- Begin function SetB
.p2align 2
_SetB: ; @SetB
.LFE25:
.size ClearB, .-ClearB
.align 2
.p2align 3,,7
.global SetB
.type SetB, %function
SetB:
.LFB26:
.cfi_startproc
; %bb.0:
ldrb w8, [x0]
and w8, w8, #0xfffffff9
ubfiz w9, w1, #1, #2
orr w8, w8, w9
strb w8, [x0]
ldrb w2, [x0]
bfi w2, w1, 1, 2
strb w2, [x0]
ret
.cfi_endproc
; -- End function
.globl _ClearC ; -- Begin function ClearC
.p2align 2
_ClearC: ; @ClearC
.LFE26:
.size SetB, .-SetB
.align 2
.p2align 3,,7
.global ClearC
.type ClearC, %function
ClearC:
.LFB27:
.cfi_startproc
; %bb.0:
ldrb w8, [x0]
and w8, w8, #0x7
strb w8, [x0]
ldrb w1, [x0]
and w1, w1, 7
strb w1, [x0]
ret
.cfi_endproc
; -- End function
.globl _SetC ; -- Begin function SetC
.p2align 2
_SetC: ; @SetC
.LFE27:
.size ClearC, .-ClearC
.align 2
.p2align 3,,7
.global SetC
.type SetC, %function
SetC:
.LFB28:
.cfi_startproc
; %bb.0:
ldrb w8, [x0]
bfi w8, w1, #3, #8
strb w8, [x0]
ldrb w2, [x0] // load *byte into w2
ubfiz w1, w1, 3, 5 // put zeros in bit 3 to 7 of value
and w2, w2, 7 // blank bits 3 to 7 in *byte
orr w2, w2, w1
strb w2, [x0]
ret
.cfi_endproc
; -- End function
.globl _main ; -- Begin function main
.p2align 2
_main: ; @main
.LFE28:
.size SetC, .-SetC
.section .rodata.str1.8,"aMS",@progbits,1
.align 3
.LC0:
.string "noBF should be 0x3A - value: 0x%X\n"
.align 3
.LC1:
.string "bf should be 0x1D - value: 0x%X\n"
.section .text.startup,"ax",@progbits
.align 2
.p2align 3,,7
.global main
.type main, %function
main:
.LFB29:
.cfi_startproc
; %bb.0:
sub sp, sp, #48
stp x20, x19, [sp, #16] ; 16-byte Folded Spill
stp x29, x30, [sp, #32] ; 16-byte Folded Spill
add x29, sp, #32
.cfi_def_cfa w29, 16
.cfi_offset w30, -8
.cfi_offset w29, -16
.cfi_offset w19, -24
.cfi_offset w20, -32
ldrb w8, [sp, #15]
orr w8, w8, #0x1
strb w8, [sp, #15]
ldrb w8, [sp, #15]
and w8, w8, #0xfffffff9
orr w8, w8, #0x4
strb w8, [sp, #15]
ldrb w8, [sp, #15]
mov w19, #24
bfxil w19, w8, #0, #3
strb w19, [sp, #15]
mov w8, #58
adrp x9, _noBF@PAGE
strb w8, [x9, _noBF@PAGEOFF]
str x8, [sp]
Lloh0:
adrp x0, l_.str@PAGE
Lloh1:
add x0, x0, l_.str@PAGEOFF
bl _printf
str x19, [sp]
Lloh2:
adrp x0, l_.str.1@PAGE
Lloh3:
add x0, x0, l_.str.1@PAGEOFF
bl _printf
mov w0, #0
ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
add sp, sp, #48
stp x29, x30, [sp, -32]!
.cfi_def_cfa_offset 32
.cfi_offset 29, -32
.cfi_offset 30, -24
mov w5, 2
mov w4, 3
mov x29, sp
ldrb w0, [sp, 24]
adrp x2, .LANCHOR0
adrp x1, .LC0
add x1, x1, :lo12:.LC0
orr w0, w0, 1
strb w0, [sp, 24]
mov w0, 58
strb w0, [x2, #:lo12:.LANCHOR0]
ldrb w3, [sp, 24]
mov w2, 58
mov w0, 1
bfi w3, w5, 1, 2
strb w3, [sp, 24]
ldrb w3, [sp, 24]
bfi w3, w4, 3, 5
strb w3, [sp, 24]
bl __printf_chk
ldrb w2, [sp, 24]
adrp x1, .LC1
mov w0, 1
add x1, x1, :lo12:.LC1
bl __printf_chk
mov w0, 0
ldp x29, x30, [sp], 32
.cfi_restore 30
.cfi_restore 29
.cfi_def_cfa_offset 0
ret
.loh AdrpAdd Lloh2, Lloh3
.loh AdrpAdd Lloh0, Lloh1
.cfi_endproc
; -- End function
.globl _noBF ; @noBF
.zerofill __DATA,__common,_noBF,1,0
.section __TEXT,__cstring,cstring_literals
l_.str: ; @.str
.asciz "noBF should be 0x3A - value: 0x%X\n"
l_.str.1: ; @.str.1
.asciz "bf should be 0x1D - value: 0x%X\n"
.subsections_via_symbols
.LFE29:
.size main, .-main
.global noBF
.bss
.set .LANCHOR0,. + 0
.type noBF, %object
.size noBF, 1
noBF:
.zero 1
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
.section .note.GNU-stack,"",@progbits

View file

@ -0,0 +1,11 @@
.global main
.text
.align 2
main: str x30, [sp, -16]!
mov w1, 0xFF
mov w2, 0x0A
ubfiz w1, w2, 0, 4
ldr x30, [sp], 16
mov w0, wzr
ret