2

I am trying to figure out the details of how TBB works in arm assembly. Im just trying to figure out a simple example but no matter what my code goes to infinite loop or doesn't compile.

.syntax unified             
.thumb      

BranchTable_Byte:
 .byte 0 @; Case1 offset calculation
 .byte ((Case2-Case1)/2) @; Case2 offset calculation
 .byte ((Case3-Case1)/2) @; Case3 offset calculation

.text
.global example_TBB
.thumb_func
 example_TBB:
 mov r1, #1

 ADR.W r0, BranchTable_Byte
 TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table

 Case1:
    @; an instruction sequence follows
    mov r2, #1
    b endTBB
 Case2:
    @; an instruction sequence follows
    mov r3, #2
    b endTBB
 Case3:
    @; an instruction sequence follows
    mov r4, #3
    b endTBB

 endTBB:

 bx lr

I believe what should happen is that since r1=1, the tbb op code should branch to case 2, but I am getting infinite loops and/or compilation errors no matter how long ive played with it for.

DarkLink
  • 355
  • 4
  • 16
  • For starters you have to preserve r4 on the stack within this function, you cant modify it. r0,r1,r2,r3 you can modify. – old_timer Oct 07 '18 at 04:09
  • if you return right after the adr.w do you see the address of the table in r0 as desired? – old_timer Oct 07 '18 at 04:12

1 Answers1

2

so.s

.globl _start
_start:
    bl example_TBB
    b .

tbb.s

.syntax unified
.thumb

BranchTable_Byte:
 .byte 0 @; Case1 offset calculation
 .byte ((Case2-Case1)/2) @; Case2 offset calculation
 .byte ((Case3-Case1)/2) @; Case3 offset calculation

.text
.global example_TBB
.thumb_func
 example_TBB:
 mov r1, #1

 ADR.W r0, BranchTable_Byte
 TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table

 Case1:
    @; an instruction sequence follows
    mov r0, #1
    b endTBB
 Case2:
    @; an instruction sequence follows
    mov r0, #2
    b endTBB
 Case3:
    @; an instruction sequence follows
    mov r0, #3
    b endTBB

 endTBB:

 bx lr

To create a place for the text vs data address doesn't really matter this is to see what the tools are doing:

arm-none-eabi-ld -Ttext=0x1000 -Tdata=0x2000 so.o tbb.o -o so.elf

00001000 <_start>:
    1000:   fb000000    blx 100a <example_TBB>
    1004:   eafffffe    b   1004 <_start+0x4>

00001008 <BranchTable_Byte>:
    1008:       svcmi   0x00060300

0000100a <example_TBB>:
    100a:   01f04f06    mvnseq  r4, r6, lsl #30
    100e:   af01        add r7, sp, #4
    1010:   08f2        lsrs    r2, r6, #3

and there you go, there is a huge problem. How can 3 bytes of data from a table fit in two bytes?

Your code implies you maybe wanted to do this:

.syntax unified
.thumb
.data
BranchTable_Byte:
 .byte 0 @; Case1 offset calculation
 .byte ((Case2-Case1)/2) @; Case2 offset calculation
 .byte ((Case3-Case1)/2) @; Case3 offset calculation

.text
.global example_TBB
.thumb_func
 example_TBB:
 mov r1, #1

 ADR.W r0, BranchTable_Byte
 TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table

 Case1:
    @; an instruction sequence follows
    mov r0, #1
    b endTBB
 Case2:
    @; an instruction sequence follows
    mov r0, #2
    b endTBB
 Case3:
    @; an instruction sequence follows
    mov r0, #3
    b endTBB

 endTBB:

 bx lr

Wow, that's even worse (well it's an adr, not a load address):

00001000 <_start>:
    1000:   fa000000    blx 1008 <example_TBB>
    1004:   eafffffe    b   1004 <_start+0x4>

00001008 <example_TBB>:
    1008:   f04f 0101   mov.w   r1, #1
    100c:   f2af 0004   subw    r0, pc, #4
    1010:   e8d0 f001   tbb [r0, r1]

00001014 <Case1>:
    1014:   f04f 0001   mov.w   r0, #1
    1018:   e005        b.n 1026 <endTBB>

0000101a <Case2>:
    101a:   f04f 0002   mov.w   r0, #2
    101e:   e002        b.n 1026 <endTBB>

00001020 <Case3>:
    1020:   f04f 0003   mov.w   r0, #3
    1024:   e7ff        b.n 1026 <endTBB>

00001026 <endTBB>:
    1026:   4770        bx  lr

Disassembly of section .data:

00002000 <__data_start>:
    2000:   Address 0x0000000000002000 is out of bounds.

Your table is 3 bytes deep make it four for alignment purposes

.syntax unified
.thumb
BranchTable_Byte:
 .byte 0 @; Case1 offset calculation
 .byte ((Case2-Case1)/2) @; Case2 offset calculation
 .byte ((Case3-Case1)/2) @; Case3 offset calculation
 .byte 0
 
.text
.global example_TBB
.thumb_func
 example_TBB:
 mov r1, #1

 ADR.W r0, BranchTable_Byte
 TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table

 Case1:
    @; an instruction sequence follows
    mov r0, #1
    b endTBB
 Case2:
    @; an instruction sequence follows
    mov r0, #2
    b endTBB
 Case3:
    @; an instruction sequence follows
    mov r0, #3
    b endTBB

 endTBB:

 bx lr

gives

00001000 <_start>:
    1000:   fa000001    blx 100c <example_TBB>
    1004:   eafffffe    b   1004 <_start+0x4>

00001008 <BranchTable_Byte>:
    1008:   00060300    andeq   r0, r6, r0, lsl #6

0000100c <example_TBB>:
    100c:   f04f 0101   mov.w   r1, #1
    1010:   f2af 000c   subw    r0, pc, #12
    1014:   e8d0 f001   tbb [r0, r1]

Much better: 4 bytes fit in 4 bytes now. that is good. But what's better is if you put data inline with code you should align or maybe put the data after:

.syntax unified
.thumb
BranchTable_Byte:
 .byte 0 @; Case1 offset calculation
 .byte ((Case2-Case1)/2) @; Case2 offset calculation
 .byte ((Case3-Case1)/2) @; Case3 offset calculation

.text
.align
.global example_TBB
.thumb_func

 example_TBB:
 mov r1, #1
...

and that fixes it too:

00001000 <_start>:
    1000:   fa000001    blx 100c <example_TBB>
    1004:   eafffffe    b   1004 <_start+0x4>

00001008 <BranchTable_Byte>:
    1008:   00060300    andeq   r0, r6, r0, lsl #6

0000100c <example_TBB>:
    100c:   f04f 0101   mov.w   r1, #1
    1010:   f2af 000c   subw    r0, pc, #12
    1014:   e8d0 f001   tbb [r0, r1]

You probably want your table in .text which is where you specified it. if you put it in .data then you have to get it from flash to ram assuming this is a microcontroller. But you would need to do things slightly differently.

.syntax unified
.thumb

.data
BranchTable_Byte:
 .byte 0 @; Case1 offset calculation
 .byte ((Case2-Case1)/2) @; Case2 offset calculation
 .byte ((Case3-Case1)/2) @; Case3 offset calculation

.text
.global example_TBB
.thumb_func

 example_TBB:
 mov r1, #1

 ldr r0,=BranchTable_Byte
 TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table

 Case1:
    @; an instruction sequence follows
    mov r0, #1
    b endTBB
 Case2:
    @; an instruction sequence follows
    mov r0, #2
    b endTBB
 Case3:
    @; an instruction sequence follows
    mov r0, #3
    b endTBB

 endTBB:

 bx lr


Disassembly of section .text:

00001000 <_start>:
    1000:   fa000000    blx 1008 <example_TBB>
    1004:   eafffffe    b   1004 <_start+0x4>

00001008 <example_TBB>:
    1008:   f04f 0101   mov.w   r1, #1
    100c:   4806        ldr r0, [pc, #24]   ; (1028 <endTBB+0x4>)
    100e:   e8d0 f001   tbb [r0, r1]

00001012 <Case1>:
    1012:   f04f 0001   mov.w   r0, #1
    1016:   e005        b.n 1024 <endTBB>

00001018 <Case2>:
    1018:   f04f 0002   mov.w   r0, #2
    101c:   e002        b.n 1024 <endTBB>

0000101e <Case3>:
    101e:   f04f 0003   mov.w   r0, #3
    1022:   e7ff        b.n 1024 <endTBB>

00001024 <endTBB>:
    1024:   4770        bx  lr
    1026:   20000000    andcs   r0, r0, r0
    ...

Disassembly of section .data:

00002000 <__data_start>:
    2000:   Address 0x0000000000002000 is out of bounds.

Don't you hate it when they do that?

.syntax unified
.thumb

.data
BranchTable_Byte:
 .byte 0 @; Case1 offset calculation
 .byte ((Case2-Case1)/2) @; Case2 offset calculation
 .byte ((Case3-Case1)/2) @; Case3 offset calculation

.text
.global example_TBB
.thumb_func

 example_TBB:
 mov r1, #1

 ldr r0,btbadd
 TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table

 Case1:
    @; an instruction sequence follows
    mov r0, #1
    b endTBB
 Case2:
    @; an instruction sequence follows
    mov r0, #2
    b endTBB
 Case3:
    @; an instruction sequence follows
    mov r0, #3
    b endTBB

.align
btbadd: .word BranchTable_Byte

 endTBB:

 bx lr

and that's better as far as that approach goes:

Disassembly of section .text:

00001000 <_start>:
    1000:   fa000000    blx 1008 <example_TBB>
    1004:   eafffffe    b   1004 <_start+0x4>

00001008 <example_TBB>:
    1008:   f04f 0101   mov.w   r1, #1
    100c:   4805        ldr r0, [pc, #20]   ; (1024 <btbadd>)
    100e:   e8d0 f001   tbb [r0, r1]

00001012 <Case1>:
    1012:   f04f 0001   mov.w   r0, #1
    1016:   e007        b.n 1028 <endTBB>

00001018 <Case2>:
    1018:   f04f 0002   mov.w   r0, #2
    101c:   e004        b.n 1028 <endTBB>

0000101e <Case3>:
    101e:   f04f 0003   mov.w   r0, #3
    1022:   e001        b.n 1028 <endTBB>

00001024 <btbadd>:
    1024:   00002000    andeq   r2, r0, r0

00001028 <endTBB>:
    1028:   4770        bx  lr
    102a:   46c0        nop         ; (mov r8, r8)

Disassembly of section .data:

00002000 <__data_start>:
    2000:   Address 0x0000000000002000 is out of bounds.

but now you have .data out there for something like this you don't need it to be .data.

And note that if you are linking this in with compiled code your compiler likely conforms to the arm calling convention which says you cant modify r4 in your function, you must preserve it. And that's why I modified your code (which I think you got from me when I ported it to gas for you?)

I forgot .thumb in so.s. That's fine not the code of interest, not going to repair above but will below you might want to add some more paranoia to the code and why not sprinkle it with .aligns...

so.s

.thumb
.globl _start
_start:
    .word 0x20001000
    .word reset
    .word loop
    .word loop

.thumb_func
loop: b loop
.thumb_func
reset:
    mov r0,#1
    bl example_TBB
    b .

tbb.s

.syntax unified
.thumb

.align
BranchTable_Byte:
    .byte ((Case0-Case0)/2)
    .byte ((Case1-Case0)/2)
    .byte ((Case2-Case0)/2)
    .byte ((Case3-Case0)/2)

.align
.global example_TBB
.thumb_func
example_TBB:
    and r0,#3
    adr.w r1, BranchTable_Byte
    tbb [r1, r0]

.align
Case0:
    mov r0, #1
    b endTBB
Case1:
    mov r0, #2
    b endTBB
Case2:
    mov r0, #3
    b endTBB
Case3:
    mov r0, #4
    b endTBB

.align
endTBB:
    bx lr

gives

Disassembly of section .text:

08000000 <_start>:
 8000000:   20001000    andcs   r1, r0, r0
 8000004:   08000013    stmdaeq r0, {r0, r1, r4}
 8000008:   08000011    stmdaeq r0, {r0, r4}
 800000c:   08000011    stmdaeq r0, {r0, r4}

08000010 <loop>:
 8000010:   e7fe        b.n 8000010 <loop>

08000012 <reset>:
 8000012:   2001        movs    r0, #1
 8000014:   f000 f804   bl  8000020 <example_TBB>
 8000018:   e7fe        b.n 8000018 <reset+0x6>
    ...

0800001c <BranchTable_Byte>:
 800001c:   09060300    stmdbeq r6, {r8, r9}

08000020 <example_TBB>:
 8000020:   f000 0003   and.w   r0, r0, #3
 8000024:   f2af 010c   subw    r1, pc, #12
 8000028:   e8d1 f000   tbb [r1, r0]

0800002c <Case0>:
 800002c:   f04f 0001   mov.w   r0, #1
 8000030:   e008        b.n 8000044 <endTBB>

08000032 <Case1>:
 8000032:   f04f 0002   mov.w   r0, #2
 8000036:   e005        b.n 8000044 <endTBB>

08000038 <Case2>:
 8000038:   f04f 0003   mov.w   r0, #3
 800003c:   e002        b.n 8000044 <endTBB>

0800003e <Case3>:
 800003e:   f04f 0004   mov.w   r0, #4
 8000042:   e7ff        b.n 8000044 <endTBB>

08000044 <endTBB>:
 8000044:   4770        bx  lr
 8000046:   46c0        nop         ; (mov r8, r8)

And that is a complete program that you can run on your stm32 and use openocd to stop and examine the registers when finished to see what r0 is set to. You can also do this

.syntax unified
.thumb
.globl _start
_start:
    mov r0,#1
    bl example_TBB
    b .

.align
BranchTable_Byte:
    .byte ((Case0-Case0)/2)
    .byte ((Case1-Case0)/2)
    .byte ((Case2-Case0)/2)
    .byte ((Case3-Case0)/2)

.align
.global example_TBB
.thumb_func
example_TBB:
    and r0,#3
    adr.w r1, BranchTable_Byte
    tbb [r1, r0]

.align
Case0:
    mov r0, #1
    b endTBB
Case1:
    mov r0, #2
    b endTBB
Case2:
    mov r0, #3
    b endTBB
Case3:
    mov r0, #4
    b endTBB

.align
endTBB:
    bx lr

link for ram at 0x20000000

Disassembly of section .text:

20000000 <_start>:
20000000:   f04f 0001   mov.w   r0, #1
20000004:   f000 f804   bl  20000010 <example_TBB>
20000008:   e7fe        b.n 20000008 <_start+0x8>
2000000a:   46c0        nop         ; (mov r8, r8)

2000000c <BranchTable_Byte>:
2000000c:   09060300    stmdbeq r6, {r8, r9}

20000010 <example_TBB>:
20000010:   f000 0003   and.w   r0, r0, #3
20000014:   f2af 010c   subw    r1, pc, #12
20000018:   e8d1 f000   tbb [r1, r0]

2000001c <Case0>:
2000001c:   f04f 0001   mov.w   r0, #1
20000020:   e008        b.n 20000034 <endTBB>

20000022 <Case1>:
20000022:   f04f 0002   mov.w   r0, #2
20000026:   e005        b.n 20000034 <endTBB>

20000028 <Case2>:
20000028:   f04f 0003   mov.w   r0, #3
2000002c:   e002        b.n 20000034 <endTBB>

2000002e <Case3>:
2000002e:   f04f 0004   mov.w   r0, #4
20000032:   e7ff        b.n 20000034 <endTBB>

20000034 <endTBB>:
20000034:   4770        bx  lr
20000036:   46c0        nop         

Then you can load it, run it, halt and examine r0 from openocd talking to your STM32...

halfer
  • 19,824
  • 17
  • 99
  • 186
old_timer
  • 69,149
  • 8
  • 89
  • 168