I've created the following network topology in Mininet to run an algorithm I've implemented using the Linux kernel eXpress Data Path.
The objective is to sample packets on the incoming link s1-eth1
on Switch 1
using XDP and store metadata in a shared BPF map. The execution is successful when run on multiple VMs (instead of using Mininet to create an emulation).
However, when using XDP on Mininet (to listen on the emulated network interface), packets aren't recorded.
To further diagnose the cause, I ran Wireshark to listen on the s1-eth1
interface, which does record packets hitting the interface, but for some reason these same packets aren't being registered through the XDP pipeline.
#define KBUILD_MODNAME "foo"
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
//BPF_TABLE("percpu_array", uint32_t, long, dropcnt, 256);
BPF_HASH(proto_map, uint32_t, uint32_t, 256);
//Packet Counter to keep track of number of packets flowing through XDP
BPF_ARRAY(pkt_count, uint64_t, 1);
//Map to keep track of the current EPOCH SIZE
BPF_ARRAY(epoch_size_map, uint64_t, 1);
static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
__be32 *src, __be32 *dest)
{
struct iphdr *iph = data + nh_off;
if (iph + 1 > data_end)
return 0;
*src = iph->saddr;
*dest = iph->daddr;
return iph->protocol;
}
static inline int bitXor(int* x, int* y)
{
int a = *x & *y;
int b = ~*x & ~*y;
int z = ~a & ~b;
return z;
}
int xdp_dsa(struct CTXTYPE *ctx) {
void* data_end = (void*)(long)ctx->data_end;
void* data = (void*)(long)ctx->data;
struct ethhdr *eth = data;
// drop packets
int rc = RETURNCODE; // let pass XDP_PASS or redirect to tx via XDP_TX
uint32_t *value;
uint32_t *counter_value;
uint32_t *epoch_size;
uint16_t h_proto;
uint64_t nh_off = 0;
uint32_t ipproto;
uint64_t magic_value = 12345678;
uint32_t packet = 0;
__be32 src_ip = 0, dest_ip = 0;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
pkt_count.increment(packet);
return rc;
h_proto = eth->h_proto;
if (h_proto == htons(ETH_P_IP))
ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
/*
else if (h_proto == htons(ETH_P_IPV6))
index = parse_ipv6(data, nh_off, data_end);
*/
else
ipproto = 0; //i.e. unknown protocol
/*XOR the srcIP, destIP, and ipproto to encode, then hash*/
int xor_src_dest = bitXor(&src_ip, &dest_ip);
int xor_srcdst_ipproto = bitXor(&xor_src_dest, &ipproto);
uint32_t zero = 0;
//Predecided initial epoch size
uint32_t init_epoch_size = 10;
//Variable to store the current epoch size (to check end of epoch)
uint32_t cur_epoch_size;
//Lookup epoch size from shared map (to check whether intialized else read)
epoch_size = epoch_size_map.lookup(&zero);
// Start condition (epoch size map is initialized with zero), then set to initial epoch size
// Else read the current epoch size into a variable
if(epoch_size)
{
if(*epoch_size == 0)
{
*epoch_size = init_epoch_size;
}
else
{
cur_epoch_size = *epoch_size;
}
}
counter_value = pkt_count.lookup(&packet);
if (counter_value)
{
if (*counter_value < cur_epoch_size)
{
value = proto_map.lookup_or_init(&xor_srcdst_ipproto, &zero);
if (value)
{
pkt_count.increment(packet);
*value += 1;
}
}
else if (*counter_value == cur_epoch_size)
{
pkt_count.update(&packet, &magic_value);
}
else if(*counter_value == magic_value)
{
return rc;
}
}
return rc;
}
Any ideas?