0

I am trying to implement multitasking within my OS and I'm running into a problem where my task switching code causes the OS to crash, sometimes from QEMU trying to access out of bounds memory, and sometimes from an Invalid opcode (with eip getting set to 76), and sometimes from a General Protection Fault. Point is, it's very inconsistent at crashing.

My guess is that somewhere it is destroying ESP(maybe in the task switch) because in one of my register dumps from QEMU I noticed that ESP had been set to 8.

Also, the code works for around 30 seconds before crashing, so presumably its something to do with too many pushes and not enough pops, or the opposite.

The code is designed so that on an IRQ, it will check if a variable got set (this happens when the timer fires and a tasks time slice is over), and if it did, then it will jump to some code which stores the data that was acquired from the interrupt firing, and then pops it off the stack (This was to avoid another problem I had with the stack previously).

This is the code:

Code that calls irq handler:

; IRQ handlers
irq0:
    cli
    push byte 0
    push byte 32
    jmp irq_common_stub

There is many more similar "functions", each for a different irq

; Common IRQ code. Identical to ISR code except for the 'call' 
; and the 'pop ebx'
irq_common_stub:
    pushad
    mov ax, ds
    push eax
    mov ax, 0x10 ;0x10
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    mov eax, dr6
    push eax
    push esp                 ; At this point ESP is a pointer to where DS (and the rest
                             ; of the interrupt handler state resides)
                             ; Push ESP as 1st parameter as it's a 
                             ; pointer to a registers_t  
    call irq_handler
    mov eax, [switch_task]
    cmp eax, 1
    je changeTasks
    add esp, 8                 ; Remove the saved ESP on the stack. Efficient to just pop it 
                             ; into any register. You could have done: add esp, 4 as well
    pop ebx
    mov ds, bx
    mov es, bx
    mov fs, bx
    mov gs, bx
    popad
    add esp, 8
    sti
    iret

changeTasks:
    mov eax, 1234567
    push eax
    mov eax, 0
    mov [switch_task], eax
    call store_global ; Set a global variable with C
    mov eax, [call_counter]
    add esp, eax ; "Pop" 18 values off the stack
    jmp irq_schedule ; Switch task

Task switching code:

switchTask:
    cli #Clear interrupt flag, to prevent an interrupt from firing while task is switching
    mov 4(%esp), %eax # Move the address of the struct to eax
    pop %ebx # Clean up after caller
    mov 4(%eax), %ebx # EBX
    mov 8(%eax), %ecx # ECX
    mov 12(%eax), %edx # EDX
    mov 16(%eax), %esi # ESI
    mov 20(%eax), %edi # EDI
    mov 24(%eax), %esp # ESP
    push %eax # Save eax
    mov 32(%eax), %eax # Move EIP to EAX
    # Move the new EIP value into esp, and eax is returned to its previous state
    mov (%esp), %ebp
    mov %eax, (%esp)
    mov %ebp, %eax
    mov 36(%eax), %ebp # EFLAGS
    push %ebp # Push the flags(stored in ebp)
    popf # Pop the value we just pushed into the flags register, restores interrupt flag, because each process is started with the interrupt flag on, and disabling it would kill the task system (secret info dont tell processes)
    mov 28(%eax), %ebp # EBP
    mov (%eax), %eax # Set EAX
    sti
    ret # Return to the EIP in stack

store_global:

void store_global(uint32_t f, registers_t *ok) {
    /* Set all 17 */
    global_regs->dr6 = f;
    global_regs->cs = ok->cs;
    global_regs->ds = ok->ds;
    global_regs->ecx = ok->ecx;
    global_regs->eax = ok->eax;
    global_regs->ebx = ok->ebx;
    global_regs->edx = ok->edx;
    global_regs->esp = ok->esp;
    global_regs->ebp = ok->ebp;
    global_regs->eip = ok->eip;
    global_regs->esi = ok->esi;
    global_regs->eflags = ok->eflags;
    global_regs->edi = ok->edi;
    global_regs->useresp = ok->useresp;
    global_regs->err_code = ok->err_code;
    global_regs->ss = ok->ss;
    global_regs->int_no = ok->int_no;
    global_regs->dr6 = ok->dr6;
}

The extra argument was for testing the stack

irq_schedule and schedule:

void schedule(registers_t *from) {
    Registers *regs = &runningTask->regs; // Get registers
    /* Set old registers */
    regs->eflags = from->eflags;
    regs->eax = from->eax;
    regs->ebx = from->ebx;
    regs->ecx = from->ecx;
    regs->edx = from->edx;
    regs->edi = from->edi;
    regs->esi = from->esi;
    regs->eip = from->eip;
    regs->esp = from->esp;
    regs->ebp = from->ebp;
    // Select new running task
    runningTask = runningTask->next;
    while (runningTask->state != RUNNING) {
        runningTask = runningTask->next;
    }
    // Switch
    switchTask(&runningTask->regs);
}
void irq_schedule() {
    schedule(global_regs);
}

ESP over 8 switches, at the beginning of changeTask, and every four switches is all of the tasks

Last switch
0x127758
0x7fde4
0x12b77c
0x12f7f0
Split
0x12776c
0x7fdf8
0x12b77c
0x12f7c0
First switch

Does anyone spot any problems, especially ones that would break the stack?

Menotdan
  • 130
  • 1
  • 11
  • One obvious issue is the `add esp, eax ; "Pop" 18 values off the stack`. `eax` is not set, so what is that doing? – Jester Dec 08 '19 at 00:07
  • Oh let me move the ```mov eax, [call_counter]``` down, but eax is supposed to be set to call_counter, which is 72 @Jester – Menotdan Dec 08 '19 at 00:10
  • 2
    If your tasks are running at ring 0, then all you need to do for a context switch is to save and restore ESP for each task. You can then restore all the registers by returning from the interrupt the same way as if you weren't switching tasks, popping them off the stack. When starting a new task, initialize the new task's stack with a stack frame that looks the same as gets created during an interrupt, so you can start it just by scheduling it. – Ross Ridge Dec 08 '19 at 00:10
  • 1
    If your tasks are running at ring 3, then your ring 0 interrupt handler uses a different stack. The ring 3 SS:ESP is stored on the ring 0 stack before the saved ring 3 CS:EIP. Your interrupt can save and restore all the registers to and from the per-task Registers struct. To switch tasks you just need to change the saved ring 3 SS:ESP and CS:EIP on the ring 0 stack. – Ross Ridge Dec 08 '19 at 00:17
  • @RossRidge Yeah, but I am still curious what's wrong with my current code :/ – Menotdan Dec 08 '19 at 00:19
  • 1
    It's hard to follow what you are doing. `irq_common_stub` ends with `add esp, 8` but there is no obvious counterpart to that. – Jester Dec 08 '19 at 00:23
  • Yeah that is just pushed error code, let me add that information. Thanks for pointing that out – Menotdan Dec 08 '19 at 00:24
  • 2
    Anyway, if you suspect stack unbalance just log (or debug) the stack pointer value for a switch or two, don't wait for it to crash. – Jester Dec 08 '19 at 00:26
  • @Jester Yeah that's a good idea, added ESP log at the beginning of changeTask for 8 switches, It does look like stack imbalance – Menotdan Dec 08 '19 at 00:33
  • 2
    Why is there a mix of Intel and AT&T syntax in the same code block?? Are you actually assembling some of this with NASM like you tagged, not GAS `.intel_syntax noprefix`? – Peter Cordes Dec 08 '19 at 07:24
  • It's not actually in the same file I just put the ASM together @PeterCordes – Menotdan Dec 08 '19 at 15:38
  • 1
    That makes more sense. `xchg (%esp), %eax` - you're doing this with interrupts disabled. You don't AFAICT need to use a slow atomic xchg; you can use a tmp register. – Peter Cordes Dec 08 '19 at 15:48
  • @PeterCordes Oh yeah I can just use EBP since I have not stored anything there yet. I still wonder though what is causing the stack to break – Menotdan Dec 08 '19 at 16:09
  • @PeterCordes Yeah, for some reason my code gets ESP set to some broken value, so when it goes to change tasks, it sets EIP to whatever value it finds at ESP. My register dump shows ESP gets set to 0x38 – Menotdan Dec 08 '19 at 16:13
  • Also sometimes interrupts just stop, along with the other problems ive mentioned in the question – Menotdan Dec 08 '19 at 16:15
  • IDK, this is more code than I'm interested in debugging in my head; sorry. If you think something is stepping on a saved value, consider using a watchpoint on that address in your debugger. – Peter Cordes Dec 08 '19 at 16:29
  • @PeterCordes Yeah no worries. I was just curious if there were any super obvious errors, or if someone could find a problem. I'm probably just gonna mess around in gdb until I find something – Menotdan Dec 08 '19 at 16:37
  • Let us [continue this discussion in chat](https://chat.stackoverflow.com/rooms/203854/discussion-between-menotdan-and-peter-cordes). – Menotdan Dec 08 '19 at 18:52

0 Answers0