0

So I was trying to learn some assembly code when I came across this tutorial with source code. Now Ive compiled it and ran it with a working output:

Booting from hard disk...
Hello, World
Goodbye
0x12fe

But the problem is when I review the code, there are call commands used for certain functions however others are not explicitly called. I guess what I mean is that boot_sect_main.asm, the function print is called in the boot_sect_print.asm file yet doesn't call start or done functions but those two functions are still run. Can someone explain why this happens.

boot_sect_main.asm

[org 0x7c00] ; tell the assembler that our offset is bootsector code

; The main routine makes sure the parameters are ready and then calls the function
mov bx, HELLO
call print

call print_nl

mov bx, GOODBYE
call print

call print_nl

mov dx, 0x12fe
call print_hex

; that's it! we can hang now
jmp $

; remember to include subroutines below the hang
%include "boot_sect_print.asm"
%include "boot_sect_print_hex.asm"


; data
HELLO:
    db 'Hello, World', 0

GOODBYE:
    db 'Goodbye', 0

; padding and magic number
times 510-($-$$) db 0
dw 0xaa55

boot_sect_print.asm

print:
    pusha

; keep this in mind:
; while (string[i] != 0) { print string[i]; i++ }

; the comparison for string end (null byte)
start:
    mov al, [bx] ; 'bx' is the base address for the string
    cmp al, 0 
    je done

    ; the part where we print with the BIOS  help
    mov ah, 0x0e
    int 0x10 ; 'al' already contains the char

    ; increment pointer and do next loop
    add bx, 1
    jmp start

done:
    popa
    ret



print_nl:
    pusha

    mov ah, 0x0e
    mov al, 0x0a ; newline char
    int 0x10
    mov al, 0x0d ; carriage return
    int 0x10

popa
ret

boot_sect_print_hex.asm

; receiving the data in 'dx'
; For the examples we'll assume that we're called with dx=0x1234
print_hex:
    pusha

    mov cx, 0 ; our index variable

; Strategy: get the last char of 'dx', then convert to ASCII
; Numeric ASCII values: '0' (ASCII 0x30) to '9' (0x39), so just add 0x30 to byte N.
; For alphabetic characters A-F: 'A' (ASCII 0x41) to 'F' (0x46) we'll add 0x40
; Then, move the ASCII byte to the correct position on the resulting string
hex_loop:
    cmp cx, 4 ; loop 4 times
    je end

    ; 1. convert last char of 'dx' to ascii
    mov ax, dx ; we will use 'ax' as our working register
    and ax, 0x000f  ; 0x1234 -> 0x0004 by masking first three to zeros
    add al, 0x30 ; add 0x30 to N to convert it to ASCII "N"
    cmp al, 0x39 ; if > 9, add extra 8 to represent 'A' to 'F'
    jle step2
    add al, 7 ; 'A' is ASCII 65 instead of 58, so 65-58=7

step2:
    ; 2. get the correct position of the string to place our ASCII char
    ; bx <- base address + string length - index of char
    mov bx, HEX_OUT + 5 ; base + length
    sub bx, cx  ; our index variable
    mov [bx], al ; copy the ASCII char on 'al' to the position pointed by 'bx'
    ror dx, 4 ; 0x1234 -> 0x4123 -> 0x3412 -> 0x2341 -> 0x1234

    ; increment index and loop
    add cx, 1
    jmp hex_loop

end:
    ; prepare the parameter and call the function
    ; remember that print receives parameters in 'bx'
    mov bx, HEX_OUT
    call print

    popa
    ret

HEX_OUT:
    db '0x0000',0 ; reserve memory for our new string
Ross Ridge
  • 38,414
  • 7
  • 81
  • 112
  • 1
    Functions are just labels like any other label. `start` and `done` happen to be labels that are part of the `print` function. – Michael Petch Sep 30 '18 at 03:04
  • @MichaelPetch How do you know that they are apart of the function? There is no indentation to notify you so. Im just curious so that I can identify them when I see them or when I need to use them – Aidan Bradley Sep 30 '18 at 03:07
  • 1
    In this case you can follow the code easily and see that the function is between `print:` and the instruction `ret` Indentation is stylistic but not required. – Michael Petch Sep 30 '18 at 03:44
  • @MichaelPetch Ah I see now, I don't know how I missed that. Thank you for your explanation! – Aidan Bradley Sep 30 '18 at 03:47
  • 1
    Execution always continues to the next instruction in memory, regardless of labels. – Peter Cordes Sep 30 '18 at 04:57
  • the CPU is not aware of "function" concept, it doesn't care where the labels were in the source either (that's not part of resulting machine code, which is executed, all offsets/addresses are hardwired by linker and/or binary loader, in the machine code there's fixed constant value), etc.. the "functions" concept is most often implemented by `call + ret` pair of instructions, but even those are sometimes exploited in different context, for example `call` is often used in exploits to determine current memory position of code, and `ret` allows for "ROP" kind of exploits... – Ped7g Sep 30 '18 at 15:25
  • I mean, every CPU function affects current state of CPU in exactly deterministic way defined in the instruction guide, all of them work pretty much independently except very very few exceptions where previous instruction somewhat affects also next one (like `mov ss,...` will disabled interrupts until next instruction finishes, so it can be followed by `mov sp,...` in real mode to safely change full stack pointer address). There's no instruction giving the CPU any longer context/state, like "here you are in function", that's achieved by the logic of the code itself, CPU doesn't bother with it. – Ped7g Sep 30 '18 at 15:29

0 Answers0