I'm trying to follow the OSDev "Higher Half x86 Bare Bones" tutorial (after having done it multiple times) and modify it to send the kernel to the higher half of the PML4 as opposed to the higher half of a 32-bit page table. The reason being because of mixed syntaxes in the tutorials: the BB ones use GNU syntax, while the only 64-bit tutorial on there uses MASM syntax which isn't compatible.
So far, I've got this 235-line mess:
# In 32-bit mode until we get to _start
.code32
# Declare constants for the multiboot header.
.set ALIGN, 1<<0 # align loaded modules on page boundaries
.set MEMINFO, 1<<1 # provide memory map
.set FLAGS, ALIGN | MEMINFO # this is the Multiboot 'flag' field
.set MAGIC, 0x1BADB002 # 'magic number' lets bootloader find the header
.set CHECKSUM, -(MAGIC + FLAGS) # checksum of above, to prove we are multiboot
# Declare a header as in the Multiboot Standard.
.section .multiboot
.align 4
.long MAGIC
.long FLAGS
.long CHECKSUM
.section .boot_stack, "aw", @nobits
.align 16
stack_bottom:
.skip 16384 # 16 KiB
stack_top:
.section .bss, "aw", @nobits
# 64-bit higher half page tables
.align 4096
.global pml4_root
pml4_root:
.skip 4096
.global pml4_pdptr
pml4_pdptr:
.skip 4096
.global pml4_dir
pml4_dir:
.skip 4096
.global pml4_bpt0
pml4_bpt0:
.skip 4096
#TODO: PML5
#64-bit kernel GDT
.section .gdt
gdt_start:
null:
.word 0xffff #Limit
.word 0 #Base (low)
.byte 0 #Base (middle)
.byte 0 #Access
.byte 1 #Granularity
.byte 0 #Base (high)
code:
.word 0 #Limit
.word 0 #Base (low)
.byte 0 #Base (middle)
.byte 0b10011010 #Access (755)
.byte 0b10101111 #Granularity
.byte 0 #Base (high)
data:
.word 0 #Limit
.word 0 #Base (low)
.byte 0 #Base (middle)
.byte 0b10010010 #Access (777)
.byte 0b00000000 #Granularity
.byte 0 #Base (high)
gdt_end:
.global gdtp
gdtp:
.align 8
.equ gdt_len, gdt_end - gdt_start - 1
.equ gdt_addr, $0xffff000000000000
# The kernel entry point.
.section .text
.global NoLongMode
NoLongMode:
.ascii "Error\: Long Mode not detected"
hlt
loop NoLongMode #Infinite loop because we've got nothing better to do
.global NoCPUID
NoCPUID:
.ascii "Error\: could not determine CPUID"
hlt
loop NoCPUID #Infinite loop because we've got nothing better to do
.global _start
.type _start, @function
_start:
setup_64:
#Block interrupts until we have the IDT
cli
#CPUID: flags
pushfl
popl %eax
#CPUID: compare
movl %eax, %ecx
#CPUID: ID bit
xorl $(1<<21), %eax
#FLAGS
pushl %eax
popfl
pushfl
popl %eax
pushl %ecx
popfl
#If no CPUID functionality exists
xorl %ecx, %eax
jz NoCPUID
ret
#Long mode detection, part 1
movl $0x80000000, %eax
cpuid
cmpl $0x80000001, %eax
jb NoLongMode
#Long mode detection, part 2
movl $0x80000001, %eax
cpuid
testl $(1<<29), %edx
jz NoLongMode
#Temporarily disable paging until we've got it properly set up
movl %cr0, %eax
andl $0b01111111111111111111111111111111, %eax
movl %eax, %cr0
#PAE
movl %cr4, %eax
orl $(1<<5), %eax
movl %eax, %cr4
#LM-bit
movl $0xC0000080, %ecx
rdmsr
orl $(1<<8), %eax
wrmsr
#Reenable paging
movl %cr0, %eax
orl $(1<<31), %eax
movl %eax, %cr0
#Clear all 32-bit registers to shut linker up
movl $0, %eax
movl $0, %ecx
#GDT + LM jump
lgdt (gdt_len)
jmp longmode
#Actually enter 64-bit mode for good
.code64
longmode:
#Physical address of first boot page table
movabsq $(pml4_bpt0 - 0xffff000000000000), %rdi #Physical address of first boot page table
movabsq $0, %rsi #First address to map
#64-bit entries are double the size of 32-bit entries but table size is the same
movabsq $511, %rcx
1:
#Kernel mapping
cmpq $(_kernel_start - 0xffff000000000000), %rsi
jl 2f
cmpq $(_kernel_end - 0xffff000000000000), %rsi
jge 3f
#Map physical address space as present+writable
movq %rsi, %rdx
orq $0x003, %rdx
movq %rdx, (%rdi)
2:
addq $4096, %rsi #page size in bytes
addq $8, %rdi #size of page entries
loop 1b #loop if unfinished
3:
#Video memory location
movabsq $(0x00000000000B8000 | 0x003), %rax
movq %rax, pml4_bpt0 - 0xffff000000000000 + 511 * 8
#Map first kernel page to the first kernel PDT
movabsq $(pml4_bpt0 - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_dir - 0xffff000000000000 + 0
movabsq $(pml4_bpt0 - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_dir - 0xffff000000000000 + 384 * 8
#Map first kernel PDT to first kernel PDPT
movabsq $(pml4_dir - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_pdptr - 0xffff000000000000 + 0
movabsq $(pml4_dir - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_pdptr - 0xffff000000000000 + 384 * 8
#Map first kernel PDPT to the PML4T
movabsq $(pml4_pdptr - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_root - 0xffff000000000000 + 0
movabsq $(pml4_pdptr - 0xffff000000000000 + 0x003), %rax
movq %rax, pml4_root - 0xffff000000000000 + 384 * 8
#Set third control register to address of PML4T
movabsq $(pml4_root - 0xffff000000000000), %rcx
movq %rcx, %cr3
#Jump to 64-bit higher half
leaq 4f, %rcx
jmpq *%rcx
4:
#Reload PML4T along with all of its children, incl kernel pages
movq %cr3, %rcx
movq %rcx, %cr3
movabsq $stack_top, %rsp
#Self-explanatory
callq kernel_main
cli
5: hlt
jmp 5b
.size _start, . - _start
It had a lot of linker errors before I started using movabs, etc. which got the linker woes from about 20 down to just 1:
boot64.o: in function `longmode':
(.text+0x18b): relocation truncated to fit: R_X86_64_32S against `.text'
collect2: error: ld returned 1 exit status
This would be easy to solve if the linker actually specified line numbers to find errors on ― but it doesn't. So if anyone can help find the offending line, I'd appreciate it.
The linker script is identical to the one used in the tutorial with only one exception (the hardcoded address is 0xFFFF000000000000 instead of 0xC0000000), if that helps any.