mirror of
https://github.com/pkivolowitz/asm_book.git
synced 2026-06-25 00:16:44 +08:00
added complex ARM instructions
This commit is contained in:
parent
b3f1a63e51
commit
743550c02d
6 changed files with 234 additions and 140 deletions
|
|
@ -212,12 +212,72 @@ side effect to changing this order.
|
||||||
`lsl` means "left shift logical" which fills the right side recently
|
`lsl` means "left shift logical" which fills the right side recently
|
||||||
vacated bits with zero.
|
vacated bits with zero.
|
||||||
|
|
||||||
|
Now, we present a more sophisticated version of `SetB`:
|
||||||
|
|
||||||
|
```asm
|
||||||
|
SetB: ldrb w3, [x0] // 1
|
||||||
|
bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3 // 2
|
||||||
|
strb w3, [x0] // 3
|
||||||
|
ret // 4
|
||||||
|
```
|
||||||
|
|
||||||
|
Whoa. Nine instructions down to four! What the heck is `bfi`?
|
||||||
|
|
||||||
|
`bfi dst, src, start, width` copies `width` bits starting at 0 in `src`
|
||||||
|
to bits starting at `start` in `dst`. It obviates the need for `line 2` in
|
||||||
|
the naive code because it plucks only bits 0 and 1 from the original value
|
||||||
|
of `w1`. The `bfi` then internally does the shift appropriate to move
|
||||||
|
bit 0 of the original `w1` to bit `start` along with `width - 1`
|
||||||
|
subsequent bits.
|
||||||
|
|
||||||
|
Some might argue that instructions like `bfi` (and `ubfiz` described
|
||||||
|
below) is an example of `ISA creep` where ISA's get
|
||||||
|
more and more cumbersome with the latest instructions du jure. This is
|
||||||
|
definitely true in the x86 ISA. Perhaps this is true in the AARCH64 ISA
|
||||||
|
as well, but certainly not to the extent of the x86. Remember that the ARM
|
||||||
|
family of processors are examples of RISC machines - *reduced instruction
|
||||||
|
set* architectures.
|
||||||
|
|
||||||
|
UBFIZ dest, src, start, width
|
||||||
|
|
||||||
|
zeros dest
|
||||||
|
copies src starting at 0 to bits start to start + width - 1.
|
||||||
|
|
||||||
|
Notice this version is two instructions shorter.
|
||||||
|
|
||||||
|
Part of the savings is the use of `ubfiz`.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
`ubfiz` stands for Unsigned Bit Field Insert in Zeros. Wow.
|
||||||
|
|
||||||
|
This instruction does the following:
|
||||||
|
|
||||||
|
* zeros the entire destination register
|
||||||
|
* copies the indicated source register bits to the destination
|
||||||
|
|
||||||
|
|
||||||
|
Finally, we come to handling field `c`. Recall `c` is 5 bits long starting
|
||||||
|
at bit 3.
|
||||||
|
|
||||||
|
Clearing the bits in `c` is easily accomplished:
|
||||||
|
|
||||||
```c
|
```c
|
||||||
void ClearC(unsigned char * byte) {
|
void ClearC(unsigned char * byte) {
|
||||||
*byte &= 7; // squashes bits 3 to 7 to 0
|
*byte &= 7; // squashes bits 3 to 7 to 0
|
||||||
}
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This is optimally implemented using:
|
||||||
|
|
||||||
|
```asm
|
||||||
|
ClearC: ldrb w1, [x0] // 1
|
||||||
|
and w1, w1, 7 // 2
|
||||||
|
strb w1, [x0] // 3
|
||||||
|
ret // 4
|
||||||
|
```
|
||||||
|
|
||||||
|
```c
|
||||||
void SetC(unsigned char * byte, unsigned char value) {
|
void SetC(unsigned char * byte, unsigned char value) {
|
||||||
value &= 0x1F; // ensures only bits 0 to 4 can be set
|
value &= 0x1F; // ensures only bits 0 to 4 can be set
|
||||||
*byte &= ~(0x1F << 3); // squashes correct bits in byte
|
*byte &= ~(0x1F << 3); // squashes correct bits in byte
|
||||||
|
|
@ -228,7 +288,23 @@ void SetC(unsigned char * byte, unsigned char value) {
|
||||||
In naive assembly language, these functions would look like this:
|
In naive assembly language, these functions would look like this:
|
||||||
|
|
||||||
```asm
|
```asm
|
||||||
|
SetC: ldrb w3, [x0] // 1
|
||||||
|
mov w2, 0x1F // 2
|
||||||
|
and w1, w1, w2 // 3
|
||||||
|
lsl w1, w1, 3 // 4
|
||||||
|
lsl w2, w2, 3 // 5
|
||||||
|
mvn w2, w2 // 6
|
||||||
|
and w3, w3, w2 // 7
|
||||||
|
orr w3, w3, w1 // 8
|
||||||
|
strb w3, [x0] // 9
|
||||||
|
ret // 10
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```asm
|
||||||
|
SetC: ldrb w2, [x0] // put *byte into w2 // 1
|
||||||
|
ubfiz w1, w1, 3, 5 // zero new w1, copy bits 0..4 to 3..7 // 2
|
||||||
|
and w2, w2, 7 // preserve only 1st 3 bits in *byte // 3
|
||||||
|
orr w2, w2, w1 // or in value into *byte // 4
|
||||||
|
strb w2, [x0] // 5
|
||||||
|
ret // 6
|
||||||
|
```
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,8 @@ ClearA: ldrb w1, [x0]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
ClearB: ldrb w1, [x0]
|
ClearB: ldrb w1, [x0]
|
||||||
and w1, w1, 0xF9
|
mov w2, 0xF9
|
||||||
|
and w1, w1, w2
|
||||||
strb w1, [x0]
|
strb w1, [x0]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
@ -30,27 +31,15 @@ SetA: ldrb w1, [x0]
|
||||||
|
|
||||||
|
|
||||||
SetB: ldrb w3, [x0]
|
SetB: ldrb w3, [x0]
|
||||||
and w1, w1, 3 // value &= 3
|
bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3
|
||||||
lsl w1, w1, 1
|
|
||||||
mov w2, 6
|
|
||||||
mvn w2, w2
|
|
||||||
and w3, w3, w2 // B is cleared
|
|
||||||
orr w3, w3, w1
|
|
||||||
strb w3, [x0]
|
strb w3, [x0]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
SetC: ldrb w3, [x0]
|
SetC: ldrb w2, [x0] // put *byte into w2
|
||||||
|
ubfiz w1, w1, 3, 5 // zero new w1, copy bits 0..4 to 3..7
|
||||||
mov w2, 0x1F
|
and w2, w2, 7 // preserve only 1st 3 bits in *byte
|
||||||
and w1, w1, w2
|
orr w2, w2, w1 // or in value into *byte
|
||||||
lsl w1, w1, 3
|
strb w2, [x0]
|
||||||
|
|
||||||
lsl w2, w2, 3
|
|
||||||
mvn w2, w2
|
|
||||||
and w3, w3, w2
|
|
||||||
|
|
||||||
orr w3, w3, w1
|
|
||||||
strb w3, [x0]
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.end
|
.end
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,5 @@
|
||||||
SetB: ldrb w3, [x0]
|
SetB: ldrb w3, [x0]
|
||||||
and w1, w1, 3 // value &= 3
|
bfi w3, w1, 1, 2 // copy bit 0..1 in w1 to bit 1..2 in w3
|
||||||
lsl w1, w1, 1
|
|
||||||
mov w2, 6
|
|
||||||
mvn w2, w2
|
|
||||||
and w3, w3, w2 // B is cleared
|
|
||||||
orr w3, w3, w1
|
|
||||||
strb w3, [x0]
|
strb w3, [x0]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
@ -8,7 +8,8 @@ struct BF {
|
||||||
|
|
||||||
unsigned char noBF = 0;
|
unsigned char noBF = 0;
|
||||||
|
|
||||||
#define C
|
//#define C
|
||||||
|
#undef C
|
||||||
#ifdef C
|
#ifdef C
|
||||||
/* Note the absence of defensive programming such as checking
|
/* Note the absence of defensive programming such as checking
|
||||||
to ensure that byte is not null and that bit_number is not
|
to ensure that byte is not null and that bit_number is not
|
||||||
|
|
@ -59,11 +60,11 @@ int main() {
|
||||||
bf.b = 2;
|
bf.b = 2;
|
||||||
bf.c = 3;
|
bf.c = 3;
|
||||||
|
|
||||||
ClearA(&noBF);
|
SetA(&noBF);
|
||||||
SetB(&noBF, 1);
|
SetB(&noBF, 2);
|
||||||
SetC(&noBF, 7);
|
SetC(&noBF, 3);
|
||||||
|
|
||||||
printf("noBF should be 0x3A - value: 0x%X\n", (unsigned int) noBF);
|
printf("noBF should be 0x1D - value: 0x%X\n", (unsigned int) noBF);
|
||||||
printf("bf should be 0x1D - value: 0x%X\n", (unsigned int) *((unsigned char *) &bf));
|
printf("bf should be 0x1D - value: 0x%X\n", (unsigned int) *((unsigned char *) &bf));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,129 +1,151 @@
|
||||||
.section __TEXT,__text,regular,pure_instructions
|
.arch armv8-a
|
||||||
.build_version macos, 12, 0 sdk_version 12, 3
|
.file "test.c"
|
||||||
.globl _ClearA ; -- Begin function ClearA
|
.text
|
||||||
.p2align 2
|
.align 2
|
||||||
_ClearA: ; @ClearA
|
.p2align 3,,7
|
||||||
|
.global ClearA
|
||||||
|
.type ClearA, %function
|
||||||
|
ClearA:
|
||||||
|
.LFB23:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
; %bb.0:
|
ldrb w1, [x0]
|
||||||
ldrb w8, [x0]
|
and w1, w1, -2
|
||||||
and w8, w8, #0xfe
|
strb w1, [x0]
|
||||||
strb w8, [x0]
|
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
; -- End function
|
.LFE23:
|
||||||
.globl _SetA ; -- Begin function SetA
|
.size ClearA, .-ClearA
|
||||||
.p2align 2
|
.align 2
|
||||||
_SetA: ; @SetA
|
.p2align 3,,7
|
||||||
|
.global SetA
|
||||||
|
.type SetA, %function
|
||||||
|
SetA:
|
||||||
|
.LFB24:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
; %bb.0:
|
ldrb w1, [x0]
|
||||||
ldrb w8, [x0]
|
orr w1, w1, 1
|
||||||
orr w8, w8, #0x1
|
strb w1, [x0]
|
||||||
strb w8, [x0]
|
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
; -- End function
|
.LFE24:
|
||||||
.globl _ClearB ; -- Begin function ClearB
|
.size SetA, .-SetA
|
||||||
.p2align 2
|
.align 2
|
||||||
_ClearB: ; @ClearB
|
.p2align 3,,7
|
||||||
|
.global ClearB
|
||||||
|
.type ClearB, %function
|
||||||
|
ClearB:
|
||||||
|
.LFB25:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
; %bb.0:
|
ldrb w1, [x0]
|
||||||
ldrb w8, [x0]
|
and w1, w1, -7
|
||||||
and w8, w8, #0xfffffff9
|
strb w1, [x0]
|
||||||
strb w8, [x0]
|
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
; -- End function
|
.LFE25:
|
||||||
.globl _SetB ; -- Begin function SetB
|
.size ClearB, .-ClearB
|
||||||
.p2align 2
|
.align 2
|
||||||
_SetB: ; @SetB
|
.p2align 3,,7
|
||||||
|
.global SetB
|
||||||
|
.type SetB, %function
|
||||||
|
SetB:
|
||||||
|
.LFB26:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
; %bb.0:
|
ldrb w2, [x0]
|
||||||
ldrb w8, [x0]
|
bfi w2, w1, 1, 2
|
||||||
and w8, w8, #0xfffffff9
|
strb w2, [x0]
|
||||||
ubfiz w9, w1, #1, #2
|
|
||||||
orr w8, w8, w9
|
|
||||||
strb w8, [x0]
|
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
; -- End function
|
.LFE26:
|
||||||
.globl _ClearC ; -- Begin function ClearC
|
.size SetB, .-SetB
|
||||||
.p2align 2
|
.align 2
|
||||||
_ClearC: ; @ClearC
|
.p2align 3,,7
|
||||||
|
.global ClearC
|
||||||
|
.type ClearC, %function
|
||||||
|
ClearC:
|
||||||
|
.LFB27:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
; %bb.0:
|
ldrb w1, [x0]
|
||||||
ldrb w8, [x0]
|
and w1, w1, 7
|
||||||
and w8, w8, #0x7
|
strb w1, [x0]
|
||||||
strb w8, [x0]
|
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
; -- End function
|
.LFE27:
|
||||||
.globl _SetC ; -- Begin function SetC
|
.size ClearC, .-ClearC
|
||||||
.p2align 2
|
.align 2
|
||||||
_SetC: ; @SetC
|
.p2align 3,,7
|
||||||
|
.global SetC
|
||||||
|
.type SetC, %function
|
||||||
|
SetC:
|
||||||
|
.LFB28:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
; %bb.0:
|
ldrb w2, [x0] // load *byte into w2
|
||||||
ldrb w8, [x0]
|
ubfiz w1, w1, 3, 5 // put zeros in bit 3 to 7 of value
|
||||||
bfi w8, w1, #3, #8
|
and w2, w2, 7 // blank bits 3 to 7 in *byte
|
||||||
strb w8, [x0]
|
orr w2, w2, w1
|
||||||
|
strb w2, [x0]
|
||||||
ret
|
ret
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
; -- End function
|
.LFE28:
|
||||||
.globl _main ; -- Begin function main
|
.size SetC, .-SetC
|
||||||
.p2align 2
|
.section .rodata.str1.8,"aMS",@progbits,1
|
||||||
_main: ; @main
|
.align 3
|
||||||
|
.LC0:
|
||||||
|
.string "noBF should be 0x3A - value: 0x%X\n"
|
||||||
|
.align 3
|
||||||
|
.LC1:
|
||||||
|
.string "bf should be 0x1D - value: 0x%X\n"
|
||||||
|
.section .text.startup,"ax",@progbits
|
||||||
|
.align 2
|
||||||
|
.p2align 3,,7
|
||||||
|
.global main
|
||||||
|
.type main, %function
|
||||||
|
main:
|
||||||
|
.LFB29:
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
; %bb.0:
|
stp x29, x30, [sp, -32]!
|
||||||
sub sp, sp, #48
|
.cfi_def_cfa_offset 32
|
||||||
stp x20, x19, [sp, #16] ; 16-byte Folded Spill
|
.cfi_offset 29, -32
|
||||||
stp x29, x30, [sp, #32] ; 16-byte Folded Spill
|
.cfi_offset 30, -24
|
||||||
add x29, sp, #32
|
mov w5, 2
|
||||||
.cfi_def_cfa w29, 16
|
mov w4, 3
|
||||||
.cfi_offset w30, -8
|
mov x29, sp
|
||||||
.cfi_offset w29, -16
|
ldrb w0, [sp, 24]
|
||||||
.cfi_offset w19, -24
|
adrp x2, .LANCHOR0
|
||||||
.cfi_offset w20, -32
|
adrp x1, .LC0
|
||||||
ldrb w8, [sp, #15]
|
add x1, x1, :lo12:.LC0
|
||||||
orr w8, w8, #0x1
|
orr w0, w0, 1
|
||||||
strb w8, [sp, #15]
|
strb w0, [sp, 24]
|
||||||
ldrb w8, [sp, #15]
|
mov w0, 58
|
||||||
and w8, w8, #0xfffffff9
|
strb w0, [x2, #:lo12:.LANCHOR0]
|
||||||
orr w8, w8, #0x4
|
ldrb w3, [sp, 24]
|
||||||
strb w8, [sp, #15]
|
mov w2, 58
|
||||||
ldrb w8, [sp, #15]
|
mov w0, 1
|
||||||
mov w19, #24
|
bfi w3, w5, 1, 2
|
||||||
bfxil w19, w8, #0, #3
|
strb w3, [sp, 24]
|
||||||
strb w19, [sp, #15]
|
ldrb w3, [sp, 24]
|
||||||
mov w8, #58
|
bfi w3, w4, 3, 5
|
||||||
adrp x9, _noBF@PAGE
|
strb w3, [sp, 24]
|
||||||
strb w8, [x9, _noBF@PAGEOFF]
|
bl __printf_chk
|
||||||
str x8, [sp]
|
ldrb w2, [sp, 24]
|
||||||
Lloh0:
|
adrp x1, .LC1
|
||||||
adrp x0, l_.str@PAGE
|
mov w0, 1
|
||||||
Lloh1:
|
add x1, x1, :lo12:.LC1
|
||||||
add x0, x0, l_.str@PAGEOFF
|
bl __printf_chk
|
||||||
bl _printf
|
mov w0, 0
|
||||||
str x19, [sp]
|
ldp x29, x30, [sp], 32
|
||||||
Lloh2:
|
.cfi_restore 30
|
||||||
adrp x0, l_.str.1@PAGE
|
.cfi_restore 29
|
||||||
Lloh3:
|
.cfi_def_cfa_offset 0
|
||||||
add x0, x0, l_.str.1@PAGEOFF
|
|
||||||
bl _printf
|
|
||||||
mov w0, #0
|
|
||||||
ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
|
|
||||||
ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
|
|
||||||
add sp, sp, #48
|
|
||||||
ret
|
ret
|
||||||
.loh AdrpAdd Lloh2, Lloh3
|
|
||||||
.loh AdrpAdd Lloh0, Lloh1
|
|
||||||
.cfi_endproc
|
.cfi_endproc
|
||||||
; -- End function
|
.LFE29:
|
||||||
.globl _noBF ; @noBF
|
.size main, .-main
|
||||||
.zerofill __DATA,__common,_noBF,1,0
|
.global noBF
|
||||||
.section __TEXT,__cstring,cstring_literals
|
.bss
|
||||||
l_.str: ; @.str
|
.set .LANCHOR0,. + 0
|
||||||
.asciz "noBF should be 0x3A - value: 0x%X\n"
|
.type noBF, %object
|
||||||
|
.size noBF, 1
|
||||||
l_.str.1: ; @.str.1
|
noBF:
|
||||||
.asciz "bf should be 0x1D - value: 0x%X\n"
|
.zero 1
|
||||||
|
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
|
||||||
.subsections_via_symbols
|
.section .note.GNU-stack,"",@progbits
|
||||||
|
|
|
||||||
11
section_2/bitfields/ubfiz.s
Normal file
11
section_2/bitfields/ubfiz.s
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
.global main
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
main: str x30, [sp, -16]!
|
||||||
|
mov w1, 0xFF
|
||||||
|
mov w2, 0x0A
|
||||||
|
ubfiz w1, w2, 0, 4
|
||||||
|
ldr x30, [sp], 16
|
||||||
|
mov w0, wzr
|
||||||
|
ret
|
||||||
Loading…
Reference in a new issue