0

I'm trying to follow the OSDev "Higher Half x86 Bare Bones" tutorial (after having done it multiple times) and modify it to send the kernel to the higher half of the PML4 as opposed to the higher half of a 32-bit page table. The reason being because of mixed syntaxes in the tutorials: the BB ones use GNU syntax, while the only 64-bit tutorial on there uses MASM syntax which isn't compatible.

So far, I've got this 235-line mess:

# In 32-bit mode until we get to _start
.code32
# Declare constants for the multiboot header.
.set ALIGN,    1<<0             # align loaded modules on page boundaries
.set MEMINFO,  1<<1             # provide memory map
.set FLAGS,    ALIGN | MEMINFO  # this is the Multiboot 'flag' field
.set MAGIC,    0x1BADB002       # 'magic number' lets bootloader find the header
.set CHECKSUM, -(MAGIC + FLAGS) # checksum of above, to prove we are multiboot

# Declare a header as in the Multiboot Standard.
.section .multiboot
.align 4
.long MAGIC
.long FLAGS
.long CHECKSUM

.section .boot_stack, "aw", @nobits
.align 16
stack_bottom:
.skip 16384 # 16 KiB
stack_top:

.section .bss, "aw", @nobits

# 64-bit higher half page tables
  .align 4096
.global pml4_root
pml4_root:
  .skip 4096

.global pml4_pdptr
pml4_pdptr:
  .skip 4096

.global pml4_dir
pml4_dir:
  .skip 4096

.global pml4_bpt0
pml4_bpt0:
  .skip 4096
#TODO: PML5

#64-bit kernel GDT
.section .gdt
gdt_start:
null:
  .word 0xffff      #Limit
  .word 0       #Base (low)
  .byte 0       #Base (middle)
  .byte 0       #Access
  .byte 1       #Granularity
  .byte 0       #Base (high)
code:
  .word 0       #Limit
  .word 0       #Base (low)
  .byte 0       #Base (middle)
  .byte 0b10011010  #Access (755)
  .byte 0b10101111  #Granularity
  .byte 0       #Base (high)
data:
  .word 0       #Limit
  .word 0       #Base (low)
  .byte 0       #Base (middle)
  .byte 0b10010010  #Access (777)
  .byte 0b00000000  #Granularity
  .byte 0       #Base (high)
gdt_end:

.global gdtp
gdtp:
  .align 8
  .equ gdt_len, gdt_end - gdt_start - 1
  .equ gdt_addr, $0xffff000000000000

# The kernel entry point.
.section .text

.global NoLongMode
NoLongMode:
  .ascii "Error\: Long Mode not detected"
  hlt
  loop NoLongMode #Infinite loop because we've got nothing better to do

.global NoCPUID
NoCPUID:
  .ascii "Error\: could not determine CPUID"
  hlt
  loop NoCPUID #Infinite loop because we've got nothing better to do

.global _start
.type _start, @function
_start:

setup_64:

    #Block interrupts until we have the IDT
    cli

    #CPUID: flags
    pushfl
    popl %eax

    #CPUID: compare
    movl %eax, %ecx

    #CPUID: ID bit
    xorl $(1<<21), %eax

    #FLAGS
    pushl %eax
    popfl
    pushfl
    popl %eax
    pushl %ecx
    popfl

    #If no CPUID functionality exists
    xorl %ecx, %eax
    jz NoCPUID
    ret

    #Long mode detection, part 1
    movl $0x80000000, %eax
    cpuid
    cmpl $0x80000001, %eax
    jb NoLongMode

    #Long mode detection, part 2
    movl $0x80000001, %eax
    cpuid
    testl $(1<<29), %edx
    jz NoLongMode 

    #Temporarily disable paging until we've got it properly set up
    movl %cr0, %eax
    andl $0b01111111111111111111111111111111, %eax
    movl %eax, %cr0

    #PAE
    movl %cr4, %eax
    orl $(1<<5), %eax
    movl %eax, %cr4

    #LM-bit
    movl $0xC0000080, %ecx
    rdmsr
    orl $(1<<8), %eax
    wrmsr

    #Reenable paging
    movl %cr0, %eax
    orl $(1<<31), %eax
    movl %eax, %cr0

    #Clear all 32-bit registers to shut linker up
    movl $0, %eax
    movl $0, %ecx

    #GDT + LM jump
    lgdt (gdt_len)
    jmp longmode

    #Actually enter 64-bit mode for good
    .code64
longmode:       
    #Physical address of first boot page table
    movabsq $(pml4_bpt0 - 0xffff000000000000), %rdi #Physical address of first boot page table
    movabsq $0, %rsi #First address to map

    #64-bit entries are double the size of 32-bit entries but table size is the same
    movabsq $511, %rcx

1:
    #Kernel mapping
    cmpq $(_kernel_start - 0xffff000000000000), %rsi
    jl 2f
    cmpq $(_kernel_end - 0xffff000000000000), %rsi
    jge 3f

    #Map physical address space as present+writable
    movq %rsi, %rdx
    orq $0x003, %rdx
    movq %rdx, (%rdi)

2:
    addq $4096, %rsi  #page size in bytes
    addq $8, %rdi     #size of page entries
    loop 1b           #loop if unfinished

3:
    #Video memory location
    movabsq $(0x00000000000B8000 | 0x003), %rax
    movq %rax, pml4_bpt0 - 0xffff000000000000 + 511 * 8

    #Map first kernel page to the first kernel PDT
    movabsq $(pml4_bpt0 - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_dir - 0xffff000000000000 + 0
    movabsq $(pml4_bpt0 - 0xffff000000000000 + 0x003), %rax
    movq    %rax, pml4_dir - 0xffff000000000000 + 384 * 8

    #Map first kernel PDT to first kernel PDPT
    movabsq $(pml4_dir - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_pdptr - 0xffff000000000000 + 0
    movabsq $(pml4_dir - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_pdptr - 0xffff000000000000 + 384 * 8

    #Map first kernel PDPT to the PML4T
    movabsq $(pml4_pdptr - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_root - 0xffff000000000000 + 0
    movabsq $(pml4_pdptr - 0xffff000000000000 + 0x003), %rax
    movq %rax, pml4_root - 0xffff000000000000 + 384 * 8

    #Set third control register to address of PML4T
    movabsq $(pml4_root - 0xffff000000000000), %rcx
    movq %rcx, %cr3

    #Jump to 64-bit higher half
    leaq 4f, %rcx
    jmpq *%rcx

4:
    #Reload PML4T along with all of its children, incl kernel pages
    movq %cr3, %rcx
    movq %rcx, %cr3
    movabsq $stack_top, %rsp

    #Self-explanatory
    callq kernel_main

    cli
5:  hlt
    jmp 5b

.size _start, . - _start

It had a lot of linker errors before I started using movabs, etc. which got the linker woes from about 20 down to just 1:

boot64.o: in function `longmode':
(.text+0x18b): relocation truncated to fit: R_X86_64_32S against `.text'
collect2: error: ld returned 1 exit status

This would be easy to solve if the linker actually specified line numbers to find errors on ― but it doesn't. So if anyone can help find the offending line, I'd appreciate it.

The linker script is identical to the one used in the tutorial with only one exception (the hardcoded address is 0xFFFF000000000000 instead of 0xC0000000), if that helps any.

realkstrawn93
  • 722
  • 1
  • 6
  • 13
  • use `objdump -S` on the binary to find which line the address `.text+0x18b` corresponds to. – fuz Nov 19 '19 at 20:02
  • Objdump only works on binaries that are fully linked if I’m not mistaken, but hold on… – realkstrawn93 Nov 19 '19 at 20:08
  • By the way, `.ascii` doesn't do what you think it does. It will not print anything it's just defining data and don't put that into execution path. – Jester Nov 19 '19 at 20:09
  • Okay, I ran objdump on the binary and it skips from 187 to 18e. There’s no 18b at all. – realkstrawn93 Nov 19 '19 at 20:10
  • The relocation is inside an instruction. So what is at `187`? Also use `objdump -dr` to see relocations. – Jester Nov 19 '19 at 20:12
  • `leaq 4f, %rcx`. It’s what’s supposed to jump to the higher half according to the tutorial. – realkstrawn93 Nov 19 '19 at 20:13
  • In the source file 0x187 corresponds to line 220. – realkstrawn93 Nov 19 '19 at 20:15
  • Replace with `leaq 4f(%rip), %rcx`. – Jester Nov 19 '19 at 20:15
  • That worked. I wonder why it requires the rip register despite the fact that it wasn’t touched though, that’s weird. Thanks anyway. – realkstrawn93 Nov 19 '19 at 20:18
  • What you're doing doesn't seem to be possible. You can't execute 32-bit code at address 0xFFFF000000000000. – Ross Ridge Nov 19 '19 at 20:19
  • 3
    @realkstrawn93 `4f` is an absolute memory reference whereas `4f(%rip)` is a rip-relative memory reference. The former only fits in the 32 bit available if you load the binary to a sufficiently low address. It won't ever fit in PIC. – fuz Nov 19 '19 at 20:20
  • Thanks for the explanation. Well at least it compiled — now the question is whether or not it will actually run. Time to start debugging. – realkstrawn93 Nov 19 '19 at 20:24
  • 1
    Alternatively `movabsq $4f, %rcx` could have worked too but really no reason to use that. – Jester Nov 19 '19 at 20:24
  • @Ross Ridge It’s 64-bit code with an integrated 32-bit bootstrap to allow GRUB to recognize it. – realkstrawn93 Nov 19 '19 at 20:25
  • Also `lgdt (gdt_len)` won't work given how you've defined `gdt_len`. You should be using `ldgt gdtp(%rip)` except what you defined after `gdtp` is incorrect. – Ross Ridge Nov 19 '19 at 20:25
  • GRUB can't load and execute 32-bit code at address 0xFFFF000000000000. – Ross Ridge Nov 19 '19 at 20:26
  • Again, it’s not 32-bit code. It’s 64-bit code that I’m defining a PML4 to map to lower addresses so that GRUB thinks it’s in a lower address space when it isn’t. – realkstrawn93 Nov 19 '19 at 20:29
  • Everything up to line 166 is bootstrap code. After that, it’s supposed to jump to 64-bit addresses. – realkstrawn93 Nov 19 '19 at 20:32
  • For the record: https://forum.osdev.org/viewtopic.php?f=1&t=32475 This post describes a similar issue. – realkstrawn93 Nov 19 '19 at 20:56
  • 1
    Just something beyond everything else mentioned here. Is there a reason you chose 0xFFFF000000000000 instead of FFFFFFFF80000000 for your higher half? – Michael Petch Nov 19 '19 at 22:54
  • Usually (unless you have a patched version of GRUB), Grub Multiboot v1 won't load 64-bit ELF files. Are you building 32-bit ELF files with 64-bit code? It is possible to do, but you have to jump through some extra hoops when linking (or using objcopy to do some conversions). – Michael Petch Nov 19 '19 at 23:21
  • I’m using `=sys-boot/grub-9999` on a Gentoo host — which would definitely be GRUB2, and the latest pre-release version from Git at that. I’ll know when I start testing this barebones code if it actually boots via KVM-accelerated QEMU, but if I’m not mistaken the 64-bit Linux sources on my machine should have the multiboot header in them — if they don’t, then my machine wouldn’t boot the host kernel either. I do also have a UEFI motherboard (MSI H310-F Pro) on the build machine — don’t know if that makes a difference or not. – realkstrawn93 Nov 20 '19 at 03:13
  • 2
    @Jester: Turns out `movabs $4f, %rcx` is important: at that point RIP doesn't match what the linker-script says, and the purpose of using an absolute address instead of just `jmp 4f` is to change RIP to the high-half address. – Peter Cordes Nov 20 '19 at 04:08
  • 1
    The reason I really asked about 0xFFFF000000000000 is that it isn't even a canonical address in long mode. In 48-bit address space 0xFFFF000000000000 isn't even valid which makes all your page tables very suspect even if you could assemble and boot. With 48-bit address space canonical addresses (valid addresses) go from 0x0000000000000000 to 0x00007FFFFFFFFFFF and from 0xFFFF8000000000000 to 0xFFFFFFFFFFFFFFFF. All addresses from 0x0000800000000000 to 0xFFFF7FFFFFFFFFFF are invalid. Bits 48 through 63 have to be the same value as bit 47. – Michael Petch Nov 20 '19 at 14:20

1 Answers1

2

The original 32-bit code is using lea 4f, %ecx / jmp *%ecx to set EIP to an absolute address that depends on the linker script, not the current EIP. (lea 4f, %ecx is an inefficient equivalent to mov $4f, %ecx, putting a 32-bit absolute address into a register)

lea 4f, %rcx can only work with an absolute address that fits in a 32-bit sign-extended disp32 addressing mode. (Because that's how x86-64 addressing modes work). That's what relocation truncated to fit: R_X86_64_32S against `.text' means: the 32S relocation in the object file metadata specifies that the correct absolute address should be encoded into a 32-bit sign-extended value. But since you presumably adjusted the linker script to put . = 0xFFFF000000000000 instead of . = 0xC0100000;, the label 4 has too many significant digits.


lea 4f(%rip), %rcx will assemble but defeats the entire purpose; you might as well jmp 4f or just nop or nothing. It calculates the address relative to the current RIP, not based on the linker script. If you had single-stepped looked at RIP in a debugger you would have seen that RIP wasn't what you wanted with this suggestion.

You want movabs $4f, %rcx which can use a 64-bit immediate to hold the full 64-bit address. The purpose of that indirect jump is to set RIP to a known absolute high address, so you must not calculate the address relative to the current RIP. You need to avoid position-independent methods here, despite the fact that x86-64 makes position-independent code easier.

Remember that before that jmp *%rcx, your code is execution from a RIP that doesn't match what you used in the linker script. You can see this if you single-step it in the debugger built-in to BOCHS, for example.


If you had put your kernel within 2GiB of the top of virtual address space, lea 4f, %rcx would have Just Worked. (But mov $4f, %rcx would still have been better.) 7-byte mov $sign_extended_imm32, %rcx is more efficient than 10-byte movabs $imm64, %rcx; all else being equal, smaller code-size is better.

High-half kernels are the rare case where mov $sign_extended_imm32, %r64 is a good option for putting a static address in a register; normally (outside of bootstrap / setup code like this) you normally want a RIP-relative LEA. Or mov $imm32, %r32 if your address is known to be in the low 2GiB of virtual address space, e.g. in user-space in a non-PIE Linux executable.

Having your kernel's static code/data within 2GiB of the top of virtual address space also means you can use addressing modes like array(%rdx), where array's address is encoded as a sign-extended disp32. So it's the same as a Linux non-PIE executable except only sign-extended works, not zero-extended.

I'd recommend doing like @MichaelPetch suggested and using 0xFFFFFFFF80000000 as your kernel base address.

BTW, if you know the absolute virtual address where your image will be running from before the jmp, you could use a direct relative jmp rel32 with a large negative displacement to wrap RIP around from small positive to within that 2GiB "high half". Not sure if there's a simple way to get the linker to calculate that for you, though, so it's certainly easier to mov $abs_address, %rcx / jmp *%rcx, and this startup code can be reclaimed once your kernel is up and running. So code-size here only matters for total size of the kernel image.


Other stuff

    #Clear all 32-bit registers to shut the linker up
    movl $0, %eax
    movl $0, %ecx

What? That makes no sense. Also, if you want to zero a register, xor %eax,%eax is the optimal way.

    #64-bit GDT must be loaded BEFORE the switch to actual 64-bit address space ― see https://wiki.osdev.org/Creating_a_64-bit_kernel for more details
    lgdt (gdtp)

GAS accepts that, but the standard syntax for a memory operand is just the bare symbol name. lgdt isn't special, it still uses a ModR/M addressing mode just like add gdtp, %eax. lgdt load a pointer + length from its memory operand.

lgdt gdtp would be more standard syntax for using the absolute address of a symbol as the addressing mode. But if you like (symbol) as a reminder that it's a memory operand, that's ok.

Some of your other code looks inefficient; lots of absolute addresses being used instead of simple pointer increments or offsets.

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
  • When I asked about the address he was using, what I didn't mention originally was the one he chose isn't even canonical in 48-bit address space (I put a comment under the question this morning). Beyond everything else the choice of address and how he constructed the PML4, PDPT, and PD and did the mappings is incorrect. As well when he jumps into 64-bit mode he doesn't use a FAR jmp to set CS so he actually just remains in 32-bit compatibility mode (which is a sub mode of long mode). – Michael Petch Nov 20 '19 at 15:32
  • So even if he got this to assemble and link and loaded by a Mulitboot compliant loader it just wasn't going to work. – Michael Petch Nov 20 '19 at 15:42
  • @MichaelPetch: Oh right, I was thinking `0xFFFF000...` has 16 bits of ones, but bit 47 has to match the top 16, not bit 48 matching the top 15. I also wondered if I was missing something when looking at this code, glad to hear it's broken. Although I think most of my confusion was figuring out the `pml4_bpt0 - 0xffff000000000000` stuff: turns out that's just converting linker-script addresses back to low identity-mapped addresses. Which might be a plausible part of an inefficient way to do the init. – Peter Cordes Nov 20 '19 at 15:46
  • Yeah the reason he did all the calculations (subtracting the higher half VMA) was because the BSS section where all the page table structures would have been placed in the higher half by the linker script and he needed their physical address which was in the lower half in the area above address 0x100000 where a Multiboot loader would have loaded it. – Michael Petch Nov 20 '19 at 15:58
  • 1
    The other thing is those `movabs` instructions would have computed a physical address below 4gb so the address should have been representable in 32-bits.The other glaring error was that he entered long mode without enabling paging first which would have failed. He tried to set up paging after. – Michael Petch Nov 20 '19 at 16:00