Note: I'm running on FreeBSD, but I've also included Linux as a tag since the problem is somewhat general and Linux-specific solutions are of interest to me.
Edit: just to confirm that the issue was not FreeBSD specific, I ported the module to Linux, and indeed got the exact same behavior. The code for the Linux version of the module is given below; it is essentially exactly the same, the only major difference being that the IDT is evidently given read-only protection in Linux, and so I had to disable the Write Protection bit in cr0
for the code to work.
I'm learning a bit about kernel development on an x86-64 architecture, and at present have been reading about interrupt handling in the Intel developers' manual. As practice I'm trying to write a small kernel module that hooks entries in the IDT, but have been running into a problem. My general question is this: how do you ensure that the code for your hook (or the data for your new IDT table if you're using lidt
to change the entire idtr
rather than just overwrite individual entries of the IDT) is always present in RAM? The issue I've been running into is that I'll change an IDT entry, trigger the corresponding interrupt, and then get a double fault because the code for my hook isn't mapped into RAM. In general are there ways of avoiding this problem?
For specifics of my situation, the following is code for a FreeBSD LKM I've written that simply overwrites the listed address in the IDT entry for handling zero-divisor faults and replaces it with the address of asm_hook
, which at present just unconditionally jmp
s back into the original interrupt handler. (In the future I'll of course add more functionality.)
#include <sys/types.h>
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/module.h>
#include <sys/sysent.h>
#include <sys/kernel.h>
#include <sys/syscall.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
//idt entry
struct idte_t {
unsigned short offset_0_15;
unsigned short segment_selector;
unsigned char ist; //interrupt stack table
unsigned char type:4;
unsigned char zero_12:1;
unsigned char dpl:2; //descriptor privilege level
unsigned char p:1; //present flag
unsigned short offset_16_31;
unsigned int offset_32_63;
unsigned int rsv; }
__attribute__((packed))
*zd_idte;
#define ZD_INT 0x00
unsigned long idte_offset; //contains absolute address of original interrupt handler
//idt register
struct idtr_t {
unsigned short lim_val;
struct idte_t *addr; }
__attribute__((packed))
idtr;
__asm__(
".text;"
".global asm_hook;"
"asm_hook:;"
"jmp *(idte_offset);");
extern void asm_hook(void);
static int
init() {
__asm__ __volatile__ (
"cli;"
"sidt %0;"
"sti;"
:: "m"(idtr));
uprintf("[*] idtr dump\n"
"[**] address:\t%p\n"
"[**] lim val:\t0x%x\n"
"[*] end dump\n\n",
idtr.addr, idtr.lim_val);
zd_idte=(idtr.addr)+ZD_INT;
idte_offset=(long)(zd_idte->offset_0_15)|((long)(zd_idte->offset_16_31)<<16)|((long)(zd_idte->offset_32_63)<<32);
uprintf("[*] old idt entry %d:\n"
"[**] addr:\t%p\n"
"[**] segment:\t0x%x\n"
"[**] ist:\t%d\n"
"[**] type:\t%d\n"
"[**] dpl:\t%d\n"
"[**] p:\t\t%d\n"
"[*] end dump\n\n",
ZD_INT, (void *)idte_offset, zd_idte->segment_selector,
zd_idte->ist, zd_idte->type, zd_idte->dpl, zd_idte->p);
if(!zd_idte->p) {
uprintf("[*] fatal: handler segment not present\n");
return ENOSYS; }
__asm__ __volatile__("cli");
zd_idte->offset_0_15=((unsigned long)(&asm_hook))&0xffff;
zd_idte->offset_16_31=((unsigned long)(&asm_hook)>>16)&0xffff;
zd_idte->offset_32_63=((unsigned long)(&asm_hook)>>32)&0xffffffff;
__asm__ __volatile__("sti");
uprintf("[*] new idt entry %d:\n"
"[**] addr:\t%p\n"
"[**] segment:\t0x%x\n"
"[**] ist:\t%d\n"
"[**] type:\t%d\n"
"[**] dpl:\t%d\n"
"[**] p:\t\t%d\n"
"[*] end dump\n\n",
ZD_INT, (void *)(\
(long)zd_idte->offset_0_15|((long)zd_idte->offset_16_31<<16)|((long)zd_idte->offset_32_63<<32)),
zd_idte->segment_selector, zd_idte->ist, zd_idte->type, zd_idte->dpl, zd_idte->p);
return 0; }
static void
fini() {
__asm__ __volatile__("cli");
zd_idte->offset_0_15=idte_offset&0xffff;
zd_idte->offset_16_31=(idte_offset>>16)&0xffff;
zd_idte->offset_32_63=(idte_offset>>32)&0xffffffff;
__asm__ __volatile__("sti"); }
static int
load(struct module *module, int cmd, void *arg) {
int error=0;
switch(cmd) {
case MOD_LOAD:
error=init();
break;
case MOD_UNLOAD:
fini();
break;
default:
error=EOPNOTSUPP;
break; }
return error; }
static moduledata_t idt_hook_mod = {
"idt_hook",
load,
NULL };
DECLARE_MODULE(idt_hook, idt_hook_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
(I have also written another LKM that creates an entire new IDT table using malloc(9)
and uses lidt
to load that table into idtr
, but that seems to me an inferior approach as it will only alter the IDT on the particular CPU core it's running on, and hence won't work reliably in multiprocessor systems. Unless there's something I'm missing is this an accurate assessment?)
Anyway, compiling the code and loading the kernel module cause no issues:
# kldload ./idt_hook.ko
[*] idtr dump
[**] address: 0xffffffff81fb2c40
[**] lim val: 0xfff
[*] end dump
[*] old idt entry 0:
[**] addr: 0xffffffff81080f90
[**] segment: 0x20
[**] ist: 0
[**] type: 14
[**] dpl: 0
[**] p: 1
[*] end dump
[*] new idt entry 0:
[**] addr: 0xffffffff8281d000
[**] segment: 0x20
[**] ist: 0
[**] type: 14
[**] dpl: 0
[**] p: 1
[*] end dump
However, when I test the hook with the following, the kernel hangs:
#include <stdio.h>
int main() {
int x=1, y=0;
printf("x/y=%d\n", x/y);
return 0; }
To understand what's going on I spun up the VirtualBox built-in debugger, and set a breakpoint on the IDT's double fault exception handler (entry 8). Debugging showed that my LKM alters the IDT correctly, but running the zero-divisor code above triggers a double fault. I realized the reason for this when I tried to access the memory at 0xffffffff8281d000
(the address of my asm_hook
code), which triggered a VERR_PAGE_TABLE_NOT_PRESENT
error in the VirtualBox debugger. So, unless I'm misunderstanding something, evidently the issue is indeed that my asm_hook
gets removed from memory at some point. Any ideas on how to address this problem? For instance, is there a way to tell the FreeBSD kernel that a particular page should never be unmapped from RAM?
Edit: Nate Eldredge in the comments below helped me find some errors in my code (now corrected), but unfortunately the problem still persists. To give greater debugging detail: first I load the kernel module, and then I set a breakpoint on the listed address of my asm_hook
code (0xffffffff8281d000
) in the VirtualBox debugger. I've confirmed by disassembling memory at that address that it does indeed contain the code of asm_hook
. (Although, as Nate points out, it's slightly odd that it's placed exactly on a page boundary – anyone have any ideas why this might be?)
In any case, when I trigger the zero-divisor interrupt, the breakpoint is unfortunately never hit, and, once I'm inside the double fault interrupt handler, when I try to access the memory at 0xffffffff8281d000
the VERR_PAGE_TABLE_NOT_PRESENT
error still flags up.
It is true that it would be an unusual (?) feature of the FreeBSD's design to swap out/unmap portions of its kernel from RAM, so maybe a better question is "what's causing this page fault?"
Edit: Here is a version of the module ported to Linux:
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <asm/io.h>
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Hooks the zero divisor IDT entry");
MODULE_VERSION("0.01");
struct idte_t {
unsigned short offset_0_15;
unsigned short segment_selector;
unsigned char ist; //interrupt stack table
unsigned char type:4;
unsigned char zero_12:1;
unsigned char dpl:2; //descriptor privilege level
unsigned char p:1; //present flag
unsigned short offset_16_31;
unsigned int offset_32_63;
unsigned int rsv; }
__attribute__((packed))
*zd_idte;
#define ZD_INT 0x00
unsigned long idte_offset; //contains absolute address of original interrupt handler
struct idtr_t {
unsigned short lim_val;
struct idte_t *addr; }
__attribute__((packed))
idtr;
__asm__(
".text;"
".global asm_hook;"
"asm_hook:;"
"jmp *(idte_offset);");
extern void asm_hook(void);
static int __init
idt_init(void) {
__asm__ __volatile__ (
"cli;"
"sidt %0;"
"sti;"
:: "m"(idtr));
printk("[*] idtr dump\n"
"[**] address:\t%px\n"
"[**] lim val:\t0x%x\n"
"[*] end dump\n\n",
idtr.addr, idtr.lim_val);
zd_idte=(idtr.addr)+ZD_INT;
idte_offset=(long)(zd_idte->offset_0_15)|((long)(zd_idte->offset_16_31)<<16)|((long)(zd_idte->offset_32_63)<<32);
printk("[*] old idt entry %d:\n"
"[**] addr:\t%px\n"
"[**] segment:\t0x%x\n"
"[**] ist:\t%d\n"
"[**] type:\t%d\n"
"[**] dpl:\t%d\n"
"[**] p:\t\t%d\n"
"[*] end dump\n\n",
ZD_INT, (void *)idte_offset, zd_idte->segment_selector,
zd_idte->ist, zd_idte->type, zd_idte->dpl, zd_idte->p);
if(!zd_idte->p) {
printk("[*] fatal: handler segment not present\n");
return ENOSYS; }
unsigned long cr0;
__asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0));
cr0 &= ~(long)0x10000;
__asm__ __volatile__("mov %0, %%cr0" :: "r"(cr0));
__asm__ __volatile__("cli");
zd_idte->offset_0_15=((unsigned long)(&asm_hook))&0xffff;
zd_idte->offset_16_31=((unsigned long)(&asm_hook)>>16)&0xffff;
zd_idte->offset_32_63=((unsigned long)(&asm_hook)>>32)&0xffffffff;
__asm__ __volatile__("sti");
cr0 |= 0x10000;
__asm__ __volatile__("mov %0, %%cr0" :: "r"(cr0));
printk("[*] new idt entry %d:\n"
"[**] addr:\t%px\n"
"[**] segment:\t0x%x\n"
"[**] ist:\t%d\n"
"[**] type:\t%d\n"
"[**] dpl:\t%d\n"
"[**] p:\t\t%d\n"
"[*] end dump\n\n",
ZD_INT, (void *)(\
(long)zd_idte->offset_0_15|((long)zd_idte->offset_16_31<<16)|((long)zd_idte->offset_32_63<<32)),
zd_idte->segment_selector, zd_idte->ist, zd_idte->type, zd_idte->dpl, zd_idte->p);
return 0; }
static void __exit
idt_fini(void) {
unsigned long cr0;
__asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0));
cr0 &= ~(long)0x10000;
__asm__ __volatile__("mov %0, %%cr0" :: "r"(cr0));
__asm__ __volatile__("cli");
zd_idte->offset_0_15=idte_offset&0xffff;
zd_idte->offset_16_31=(idte_offset>>16)&0xffff;
zd_idte->offset_32_63=(idte_offset>>32)&0xffffffff;
__asm__ __volatile__("sti");
cr0 |= 0x10000;
__asm__ __volatile__("mov %0, %%cr0" :: "r"(cr0)); }
module_init(idt_init);
module_exit(idt_fini);