4

I am writing a small kernel module for measuring the time that a network packet takes to exit a node. This module is a hook in the netfilter library.

For each packet it receives it calculates an hash, gets the tstamp from skbuff and the actual timestamp, and saves all this data in a linked list. To pass this data to userspace I've created a proc device, and when the user reads from the device I send one of the entries of the linked list.

To make changes to the list (read and write) I have a spinlock. The problem is that sometimes when I read from the proc device while I am processing packets the system crash.

I think that the problem is in the function "dump_data_to_proc", more specifically when try to acquire the spinlock. I've made some tests and it only crashes(softlockup) when running in a tplink router. When I run the module in a "normal" pc(single core) it don't crash,

#include <linux/module.h>    /* Needed by all modules */ 
#include <linux/kernel.h>   /* Needed for KERN_INFO */ 
#include <linux/init.h>   /* Needed for the macros */ 
#include <linux/skbuff.h> 
#include <linux/netfilter.h> 
#include <linux/netfilter_ipv4.h> 
#include <linux/ip.h> 
#include <linux/spinlock.h> 

#include <net/ipv6.h>

#include <linux/proc_fs.h>  /* Necessary because of proc fs */
#include <asm/uaccess.h>    /* for copy_from_user */

#include "kmodule_measure_process_time.h" 
#include "hash.c" 

//DEBUG >=5 is very slow in the tplink
#define DEBUG 2 
#define PROCFS_MAX_SIZE     64
#define PROCFS_NAME         "measures"
#define MAXIMUM_SAMPLES     10000


static struct nf_hook_ops nfho;
unsigned int total_packets_processed= 0;
unsigned int total_packets_discarded=0;
int temp_counter=0;

struct values_list *HEAD;

spinlock_t list_lock  ;


static int hello_proc(struct seq_file *m, void *v) {
  seq_printf(m, " stats Mod initialized.\n");
  return 0;
}

static int proc_open(struct inode *inode, struct  file *file) {
  return single_open(file, hello_proc, NULL);
}



ssize_t dump_data_to_proc(struct file *filp, char  *buffer, size_t length, loff_t *offset){

  int bytesRead = 0;
  struct values_list *temp=NULL;
  int bytesError=0;
  char buff[PROCFS_MAX_SIZE];

  spin_lock(&list_lock);
  temp=HEAD;
  if(temp!=NULL){
    HEAD = temp->next;
}
    spin_unlock(&list_lock);


if(temp!=NULL){
    bytesRead = snprintf(buff, PROCFS_MAX_SIZE ,"%u|%llu|%llu\n", temp->hash,temp->arrival_timestap, temp->exit_timestap);
    length = length - bytesRead+1;
    kfree(temp);
    temp_counter--;
}

bytesError= copy_to_user(buffer, buff, bytesRead);

if(bytesError!=0){
#if DEBUG >0
  printk(KERN_INFO "Error: failed to copy to user");
#endif
}
return bytesRead;
}


static const struct file_operations proc_fops = {
  .owner = THIS_MODULE,
  .open = proc_open,
  .read = dump_data_to_proc,
  .llseek = seq_lseek,
  .release = single_release,
};


static unsigned int hook_func(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
{   

    uint32_t hash=0;
    ktime_t now_timeval;
    struct timespec now;
    u64 timestamp_arrival_time=0;
    u64 timestamp_now=0;
    struct ipv6hdr * ipheader;
    struct values_list *node;
    int number_of_samples=0;

    spin_lock(&list_lock);   
    number_of_samples=temp_counter;        
    spin_unlock(&list_lock);           

    if(number_of_samples > MAXIMUM_SAMPLES){
        #if DEBUG > 5
        printk(KERN_INFO "Discarded one sample because the list is full.\n");
        #endif
        total_packets_discarded++; // probably this should be inside a spinlock
        return NF_ACCEPT;
    }

    //calculate arrival time and actual time in ns
    timestamp_arrival_time =  ktime_to_ns(skb->tstamp);
    getnstimeofday(&now);
    now_timeval = timespec_to_ktime(now);
    timestamp_now =  ktime_to_ns(now_timeval);

    //get Ipv6 addresses
    ipheader = (struct ipv6hdr *)skb_network_header(skb);

    hash=simple_hash((char *)&ipheader->saddr,sizeof(struct in6_addr)*2,hash);
    total_packets_processed++;


    node = (struct values_list *) kmalloc(sizeof(struct values_list),GFP_ATOMIC);
    if(!node){
        #if DEBUG >0
        printk(KERN_INFO "Error cannot malloc\n");
        #endif
        return NF_ACCEPT;
    }

    node->hash=hash;
    node->arrival_timestap=timestamp_arrival_time;
    node->exit_timestap=timestamp_now;

    spin_lock(&list_lock);           
    node->next=HEAD;
    HEAD=node;
    temp_counter++;
    spin_unlock(&list_lock); 

    return NF_ACCEPT;

}

static int __init init_main(void)
{
    nfho.hook = hook_func;
    nfho.hooknum = NF_INET_POST_ROUTING;
    nfho.pf = PF_INET6;
    nfho.priority = NF_IP_PRI_FIRST;
    nf_register_hook(&nfho);
#if DEBUG >0
    printk(KERN_INFO " kernel module: Successfully inserted protocol module into kernel.\n");
#endif 

    proc_create(PROCFS_NAME, 0, NULL, &proc_fops);

    spin_lock_init(&list_lock);

    //Some distros/devices disable timestamping of packets
    net_enable_timestamp(); 

    return 0;

}


static void __exit cleanup_main(void)
{

   struct values_list *temp;

    nf_unregister_hook(&nfho);
#if DEBUG >0
    printk(KERN_INFO " kernel module: Successfully unloaded protocol module.\n");
    printk(KERN_INFO "Number of packets processed:%d\n",total_packets_processed);
    printk(KERN_INFO "Number of packets discarded:%d\n",total_packets_discarded);
#endif

    remove_proc_entry(PROCFS_NAME, NULL);

    while(HEAD!=NULL){
        temp=HEAD;
        HEAD= HEAD->next;
        kfree(temp);
    }


}


module_init(init_main);
module_exit(cleanup_main);
/* *    Declaring code as GPL. */ 
MODULE_LICENSE("GPLv3");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
Hugo Alves
  • 188
  • 1
  • 10
  • In `dump_data_to_proc()` the `snprintf()` returns the number of characters required to write the full string, **not** counting the null terminating character. You're not checking that the return is < PROCFS_MAX_SIZE... presumably because you are sure the buffer is big enough -- but, if it were to be too short, then `copy_to_user()` will overrun the end of `buff`. I cannot see the `length` being used for anything... but I am suspicious about `length = length - bytesRead+1;` I note that the `hook_func()` is adding things to the front of the `HEAD` list, which are processed last-in-first-out. –  Jul 17 '14 at 22:42
  • Thanks. I've added a verification to check if the copy_to_user buffer is big enough. – Hugo Alves Jul 18 '14 at 11:13

1 Answers1

2

There are 2 problems with your code:

  1. Use Linux kernel macro for your code. http://makelinux.com/ldd3/chp-11-sect-5 . Just add struct list_head as element to your struct values_list and use list_entry, list_add and other

  2. Netfilter hools are run in softirq context, so you must use spin_lock_irqsave and spin_unlock_irqrestore. This is most likely reason why your system crashes with softlockup. Read carefully http://makelinux.com/ldd3/chp-5-sect-5

Alexander Dzyoba
  • 4,009
  • 1
  • 24
  • 29
  • Thanks. Problem solved! 1. I didn't know that there was a generic list implementation in kernel. This is pretty usefull 2. Now I am using spin_lock_irqsave and everything works fine. Thanks for your help – Hugo Alves Jul 18 '14 at 11:06