0

I'm writing an amateur operating system for ARM-based devices and currently trying to make it working in QEMU's versatilepb (ARM926EJ-S).

The problem arrives when I try to implement syscalls to my kernel. The idea is pretty simple: to implement system calls via SVC (SWI) instruction. So applications work in user mode, and to call a kernel function, they do SVC <code> instruction, so ARM processor switches to supervisor mode and calls the appropriate SVC handler.

But the problem is that when I call __asm__("SVC #0x08");, the device just resets and calls RESET_HANDLER, so it looks like the emulator just reboots.

I spent a few hours already to figure out what is the problem, but still got no idea.

Here is the code of ivt.s (the initial code with handlers):

.global __RESET

__RESET:
    B RESET_HANDLER /* Reset */
    B . /* Undefined */
    B SWI_HANDLER   /* SWI */
    B . /* Prefetch Abort */
    B . /* Data Abort */
    B . /* reserved */
    B . /* IRQ */
    B . /* FIQ */

RESET_HANDLER:
    MSR CPSR_c, 0x13 /* Supervisor mode */
    LDR SP, =stack_top
    MSR CPSR_c, 0x10 /* User mode */
    LDR SP, =usr_stack_top
    BL  usermode_function
    B   .

SWI_HANDLER:
    PUSH    {LR}
    BL      syscall
    POP     {LR}
    MOVS    PC, LR

This is how I make the syscall:

void usermode_function() {
    __asm__("SVC #0x00"); // Make syscall
}

And syscall implementation:

void syscall() {
    // NEVER CALLED
    __asm__("PUSH {r0-r7}");
    __asm__("POP {r0-r7}");
}

But the code under SWI_HANDLER even never invoked.

I really even don't know how to ask the question, since it looks like I'm missing some very basic information in my mind.

So what could be the problem? Which information I should provide to make you able to help me?

Here is also the linker script:

ENTRY(__RESET)
SECTIONS
{
    . = 0x10000;
    .ivt .  : { ivt.o(.text) }
    .text   : { *(.text) }
    .data   : { *(.data) }
    .bss    : { *(.bss COMMON) }
    . = ALIGN(8);
    . = . + 0x1000; /* 4KB of stack memory */
    stack_top = .;
    . = . + 0x100;
    usr_stack_top = .;
}
Alexander Perechnev
  • 2,797
  • 3
  • 21
  • 35
  • 1
    Probably unrelated but the code in your `malloc` is broken. For starters, it should be a single asm block. Also the `r` constraint in both cases may very well pick a register conflicting with your usage. The `r0` handling in your `syscall` is also broken. Just use normal calling conventions there. – Jester Jan 03 '20 at 17:19
  • @Jester I just save `r0-r7` registers in stack, use `r7` to pass `syscall` number and `r0` for parameter. Why it should be broken? Only because it's not in one `__asm__` block? – Alexander Perechnev Jan 03 '20 at 17:23
  • 1
    For example the `r` constraint may pick `r7` for input and the compiler may load `r7` before your `MOV r7, #0x01` hence you overwrite the size argument. Similarly for the result, the compiler may pick any of `r0-r7` but you then `pop` all those back and hence destroy the result. This has no effect on the actual `svc` call resetting. – Jester Jan 03 '20 at 17:27
  • @Jester ah okay thank you, I understand. Will fix that. But as I see that still can't be the reason why emulator just resets. – Alexander Perechnev Jan 03 '20 at 17:28
  • 2
    you copied the exception table to address 0 somewhere? normally qemu loads at 0x80000 or something yes? – old_timer Jan 03 '20 at 18:28
  • @old_timer no I didn't copy it anywhere. It stays at 0x0. – Alexander Perechnev Jan 03 '20 at 18:31
  • You did not show [mcve] but what I came up with works fine. – Jester Jan 03 '20 at 18:33
  • @Jester I already updated the question so the minimal reproducible example is already there. If you want, you can check it by cloning the sample GitHub repo: https://github.com/perechnev/syscall-example – Alexander Perechnev Jan 03 '20 at 18:39
  • 2
    That linker script puts the ivt at `0x10000` not `0x0` which is what @old_timer specifically asked already. – Jester Jan 03 '20 at 18:41
  • Yes, I confused the address. It puts it at `0x10000`. – Alexander Perechnev Jan 03 '20 at 18:42
  • 2
    And that is wrong. It must be at `0x0` because that's where the cpu looks for it. If you insist on using a raw binary, you must copy the ivt to address 0 at runtime. Otherwise you can just edit the linker script to address 0 and use the ELF image because qemu knows to load that properly. – Jester Jan 03 '20 at 18:42
  • 1
    it doesn that is interesting will have to try that, was going to do this experiment when I get home. will try elf files, I have always used bin files. – old_timer Jan 03 '20 at 23:11

2 Answers2

1

Many thanks to @Jester and @old_timer, the problem is solved.

The problem was not with code, but with linker script. I have put my vector table at 0x10000, as you can see in the linker script, but it should be placed at 0x0. So SVC was not handled properly because the handler was placed in a wrong place.

When I changed the base address in my ld script and tried to load the firmware as ELF, everything starts to work perfectly.

Alexander Perechnev
  • 2,797
  • 3
  • 21
  • 35
1

You solved it one way but I'll still write my answer.

Very bare bare metal example...

strap.s

.globl _start
_start:
    b reset
    b hang
    b swi_handler
    b hang

reset:
    msr cpsr_c, 0x13 /* Supervisor mode */
    mov sp,#0x10000
    msr cpsr_c, 0x10 /* User mode */
    mov sp,#0x9000
    bl  notmain
hang:
    b hang

swi_handler:
    push {r0,r1,r2,r3,r4,lr}
    pop  {r0,r1,r2,r3,r4,lr}
    movs pc,lr

.globl GETPC
GETPC:
    mov r0,pc
    bx lr

.globl PUT32
PUT32:
    str r1,[r0]
    bx lr

.globl GET32
GET32:
    ldr r0,[r0]
    bx lr

notmain.c

void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
unsigned int GETPC ( void );

#define UART_BASE 0x101F1000
#define UARTDR    (UART_BASE+0x000)

static void uart_send ( unsigned int x )
{
    PUT32(UARTDR,x);
}

static void hexstrings ( unsigned int d )
{
    unsigned int rb;
    unsigned int rc;

    rb=32;
    while(1)
    {
        rb-=4;
        rc=(d>>rb)&0xF;
        if(rc>9) rc+=0x37; else rc+=0x30;
        uart_send(rc);
        if(rb==0) break;
    }
    uart_send(0x20);
}

static void hexstring ( unsigned int d )
{
    hexstrings(d);
    uart_send(0x0D);
    uart_send(0x0A);
}

int notmain ( void )
{
    unsigned int ra;

    hexstring(0x12345678);
    hexstring(GETPC());
    for(ra=0;ra<0x20;ra+=4)
    {
        hexstrings(ra);
        hexstring(GET32(ra));
    }

    return(0);
}

memmap

MEMORY
{
    ram  : ORIGIN = 0x00010000, LENGTH = 32K
}
SECTIONS
{
   .text : { *(.text*) } > ram
   .bss  : { *(.text*) } > ram
}

Build

arm-linux-gnueabi-as --warn --fatal-warnings -march=armv5t strap.s -o strap.o
arm-linux-gnueabi-gcc -c -Wall -O2 -nostdlib -nostartfiles -ffreestanding -march=armv5t notmain.c -o notmain.o
arm-linux-gnueabi-ld strap.o notmain.o -T memmap -o notmain.elf
arm-linux-gnueabi-objdump -D notmain.elf > notmain.list
arm-linux-gnueabi-objcopy notmain.elf -O binary notmain.bin

Execute

qemu-system-arm -M versatilepb -m 128M -nographic -kernel notmain.bin

Output

12345678 
0001003C 
00000000 E3A00000 
00000004 E59F1004 
00000008 E59F2004 
0000000C E59FF004 
00000010 00000183 
00000014 00000100 
00000018 00010000 
0000001C 00000000 

Examine, assemble disassemble

.word 0xE3A00000
.word 0xE59F1004
.word 0xE59F2004
.word 0xE59FF004
.word 0x00000183
.word 0x00000100
.word 0x00010000
.word 0x00000000

   0:   e3a00000    mov r0, #0
   4:   e59f1004    ldr r1, [pc, #4]    ; 10 <.text+0x10>
   8:   e59f2004    ldr r2, [pc, #4]    ; 14 <.text+0x14>
   c:   e59ff004    ldr pc, [pc, #4]    ; 18 <.text+0x18>
  10:   00000183    andeq   r0, r0, r3, lsl #3
  14:   00000100    andeq   r0, r0, r0, lsl #2
  18:   00010000    andeq   r0, r1, r0
  1c:   00000000    andeq   r0, r0, r0

So you can see that they are basically launching a Linux kernel the ATAGS/dtb is in ram at 0x100 perhaps. And they jump to 0x10000. 0001003C being the pc shown by the program as loaded with that command line using the -O binary version was loaded at 0x10000 and executed there. If you were to have an swi event then you would execute starting with the ldr r2 instruction and land on the rest handler in your code.

(Note incidentally that qemu doesn't properly model uarts, at least so far as I have found so you don't have to initialize them you don't have to wait for the tx buffer to be empty you just jam bytes into the tx buffer and they come out).

If you run the elf without changing the linker script

qemu-system-arm -M versatilepb -m 128M -nographic -kernel notmain.elf

12345678 
0001003C 
00000000 00000000 
00000004 00000000 
00000008 00000000 
0000000C 00000000 
00000010 00000000 
00000014 00000000 
00000018 00000000 
0000001C 00000000 

Interesting it loads and runs at 0x10000 which is what it was linked for but doesn't bother to setup for coming out of reset at 0x00000000 and/or this is that linker issue that makes for bad elf files and it padded with zeros which is

  1c:   00000000    andeq   r0, r0, r0

So it could have executed from 0x00000000 to 0x10000 and run into our code.

If we change the linker script

ram  : ORIGIN = 0x00000000, LENGTH = 32K

Run the elf not the bin

qemu-system-arm -M versatilepb -m 128M -nographic -kernel notmain.elf

12345678 
0000003C 
00000000 EA000002 
00000004 EA000006 
00000008 EA000006 
0000000C EA000004 
00000010 E321F013 
00000014 E3A0D801 
00000018 E321F010 
0000001C E3A0DA09 

as expected.

Now for the swi.

strap.s

.globl _start
_start:
    b reset
    b hang
    b swi_handler
    b hang

reset:
    msr cpsr_c, 0x13 /* Supervisor mode */
    mov sp,#0x10000
    msr cpsr_c, 0x10 /* User mode */
    mov sp,#0x9000
    bl  notmain
hang:
    b hang

swi_handler:
    push {r0,r1,r2,r3,r4,lr}
    bl handler
    pop  {r0,r1,r2,r3,r4,lr}
    movs pc,lr

.globl GETPC
GETPC:
    mov r0,pc
    bx lr

.globl PUT32
PUT32:
    str r1,[r0]
    bx lr

.globl GET32
GET32:
    ldr r0,[r0]
    bx lr

.globl do_swi
do_swi:
    svc #0x08
    bx lr

notmain.c

void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
unsigned int GETPC ( void );
void do_swi ( void );

#define UART_BASE 0x101F1000
#define UARTDR    (UART_BASE+0x000)

static void uart_send ( unsigned int x )
{
    PUT32(UARTDR,x);
}

static void hexstring ( unsigned int d )
{
    unsigned int rb;
    unsigned int rc;

    rb=32;
    while(1)
    {
        rb-=4;
        rc=(d>>rb)&0xF;
        if(rc>9) rc+=0x37; else rc+=0x30;
        uart_send(rc);
        if(rb==0) break;
    }
    uart_send(0x0D);
    uart_send(0x0A);
}

void handler ( void )
{
    hexstring(0x11223344);
}

int notmain ( void )
{
    hexstring(0x12345678);
    do_swi();
    hexstring(0x12345678);
    return(0);
}

memmap

MEMORY
{
    ram  : ORIGIN = 0x00000000, LENGTH = 32K
}
SECTIONS
{
   .text : { *(.text*) } > ram
   .bss  : { *(.text*) } > ram
}

Run the elf, output is

12345678
11223344
12345678

as desired. But you could have also done this

strap.s

.globl _start
_start:
    ldr pc,reset_addr
    ldr pc,hang_addr
    ldr pc,swi_handler_addr
    ldr pc,hang_addr
reset_addr:         .word reset
hang_addr:          .word hang
swi_handler_addr:   .word swi_handler

reset:
    mov r0,#0x10000
    mov r1,#0x00000
    ldmia r0!,{r2,r3,r4,r5}
    stmia r1!,{r2,r3,r4,r5}
    ldmia r0!,{r2,r3,r4,r5}
    stmia r1!,{r2,r3,r4,r5}

    msr cpsr_c, 0x13 /* Supervisor mode */
    mov sp,#0x10000
    msr cpsr_c, 0x10 /* User mode */
    mov sp,#0x9000
    bl  notmain
hang:
    b hang

swi_handler:
    push {r0,r1,r2,r3,r4,lr}
    bl handler
    pop  {r0,r1,r2,r3,r4,lr}
    movs pc,lr

.globl GETPC
GETPC:
    mov r0,pc
    bx lr

.globl PUT32
PUT32:
    str r1,[r0]
    bx lr

.globl GET32
GET32:
    ldr r0,[r0]
    bx lr

.globl do_swi
do_swi:
    svc #0x08
    bx lr

notmain.c

void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
unsigned int GETPC ( void );
void do_swi ( void );

#define UART_BASE 0x101F1000
#define UARTDR    (UART_BASE+0x000)

static void uart_send ( unsigned int x )
{
    PUT32(UARTDR,x);
}

static void hexstring ( unsigned int d )
{
    unsigned int rb;
    unsigned int rc;

    rb=32;
    while(1)
    {
        rb-=4;
        rc=(d>>rb)&0xF;
        if(rc>9) rc+=0x37; else rc+=0x30;
        uart_send(rc);
        if(rb==0) break;
    }
    uart_send(0x0D);
    uart_send(0x0A);
}

void handler ( void )
{
    hexstring(0x11223344);
}

int notmain ( void )
{
    unsigned int ra;

    hexstring(0x12345678);
    for(ra=0x10000;ra<0x10020;ra+=4) hexstring(GET32(ra));
    for(ra=0x00000;ra<0x00020;ra+=4) hexstring(GET32(ra));
    do_swi();
    hexstring(0x12345678);
    return(0);
}

memmap

MEMORY
{
    ram  : ORIGIN = 0x00010000, LENGTH = 32K
}
SECTIONS
{
   .text : { *(.text*) } > ram
   .bss  : { *(.text*) } > ram
}

And now both the elf and the binary image versions work. I let the toolchain do the work for me:

00010010 <reset_addr>:
   10010:   0001001c

00010014 <hang_addr>:
   10014:   00010048

00010018 <swi_handler_addr>:
   10018:   0001004c

The ldr pc, is position independent. I copy the four entries plus the four (well three) addresses so that 0x00000 matches 0x10000 and now the exception table (it is not a vector table btw) works.

With newer arm processors you could instead set VTOR to 0x10000 and it would use the one built into the binary, no copying necessary. Or as you solved just build and run your program from 0x00000 and there you go. I wanted to show the alternatives as well as how to figure out (by cheating, you have to love uarts in qemu) what qemu is doing and where it is loading without having to use a debugger.

halfer
  • 19,824
  • 17
  • 99
  • 186
old_timer
  • 69,149
  • 8
  • 89
  • 168