I am trying to communicate between two Linux based devices (i.e. PowerPC (Big-Endian 32-bit) & ARM (Little-Endian 32-bit)) connected over PCIe. One of the device acts as Root Complex and the other as Endpoint. The devices successfully negotiate to create a PCIe x1 gen-1 link. To communicate between the devices, I have written userspace C applications that make use of mmap()
to read/write data to PCIe memory space.
The issue I am facing is that the average data transfer rate is around 5Mb/s which is no way near the theoretical PCIe 1.1 x1 speed (250MB/s). I am unable to find the data rate bottleneck yet and wanted to have some suggestions. Can someone give an idea what might be the reason behind this.
Please find below C code for both Root Complex (RC) side and Endpoint (EP) side:
RC:
#include <endian.h>
#include <stdio.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <signal.h>
#include <stdint.h>
#include <errno.h>
#include <string.h>
#include <termios.h>
#include <unistd.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/socket.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <asm/ioctl.h>
#define MEM_MAP_LEN 0x1000000 // Map 16-MB of PCIe BAR space
#define EP_WR_START 0x0 // EP write region from start of BAR space
#define EP_SET_DATA_READY EP_WR_START + 0x0
#define EP_DATA_LEN_OFFSET EP_WR_START + 0x4
#define EP_DATA_OFFSET EP_WR_START + 0x8
#define RC_WR_START 0x800000 // RC write region starts from mid of BAR space
#define RC_SET_DATA_READY RC_WR_START + 0x0
#define RC_DATA_LEN_OFFSET RC_WR_START + 0x4
#define RC_DATA_OFFSET RC_WR_START + 0x8
uint32_t sod_id = 0xFFFFAAAA; // Start of Data ID
typedef struct
{
int tap_fd;
void * pcie_ptr;
} THREAD_ARGS;
void * init_pcie()
{
uint8_t * pcie_file = "/sys/bus/pci/devices/0000:03:00.0/resource2";
int fd = open(pcie_file, O_RDWR | O_SYNC);
void * res_ptr = NULL;
res_ptr = mmap(0, MEM_MAP_LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
close(fd);
if(res_ptr == MAP_FAILED)
{
printf("Error: Failed to map memory to PCIe resource\n");
return NULL;
}
return res_ptr;
}
int write_pcie(volatile void * ptr, unsigned char * buffer, unsigned int buffer_len)
{
uint32_t tmp_flag = htobe32(sod_id);
uint32_t tmp_len = htobe32(buffer_len);
uint32_t tmp_val;
memcpy(&tmp_val, ((uint8_t *)ptr + RC_SET_DATA_READY), sizeof(tmp_val));
if(tmp_val != htobe32(sod_id)) // check if there's already pending data
{
memcpy(((uint8_t *)ptr + RC_DATA_LEN_OFFSET), &tmp_len, sizeof(tmp_len));
memcpy((uint8_t *)((uint8_t *)ptr + RC_DATA_OFFSET), buffer, buffer_len);
memcpy(((uint8_t *)ptr + RC_SET_DATA_READY), &tmp_flag, sizeof(tmp_flag));
return 0;
}
else
return -1;
}
int read_pcie(volatile void * ptr, unsigned char * buffer)
{
uint32_t tmp_val;
uint32_t data_len;
memcpy(&tmp_val, ((uint8_t *)ptr + EP_SET_DATA_READY), sizeof(tmp_val));
if ( tmp_val == htobe32(sod_id))
{
memcpy(&data_len, ((uint8_t *)ptr + EP_DATA_LEN_OFFSET), sizeof(data_len));
data_len = be32toh(data_len);
memcpy(buffer, (uint8_t *)((uint8_t *)ptr + EP_DATA_OFFSET), data_len);
memset(((uint8_t *)ptr + EP_SET_DATA_READY), 0x0, sizeof(uint32_t));
return data_len;
}
else
{
return -1;
}
}
int tun_alloc(char *dev, int flags) {
struct ifreq ifr;
int fd, err;
char *clonedev = "/dev/net/tun";
if ( (fd = open(clonedev, O_RDWR)) < 0 ) {
return fd;
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = flags;
if (*dev) {
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
}
if ( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
close(fd);
return err;
}
strcpy(dev, ifr.ifr_name);
return fd;
}
void* ethernet_read_task(void * thread_args)
{
THREAD_ARGS args = *(THREAD_ARGS *)thread_args;
int tap_fd = args.tap_fd;
void * pcie_ptr = args.pcie_ptr;
unsigned char tx_buffer[1800] = {0};
int ret_val = 0;
unsigned int nread = 0;
while (1)
{
nread = read(tap_fd, tx_buffer, sizeof(tx_buffer));
if (nread < 0) {
perror("Reading from interface");
}
else if (nread > 0)
{
ret_val = write_pcie(pcie_ptr, tx_buffer, nread);
}
}
}
int main()
{
unsigned char tap_name[32] = {0};
strcpy(tap_name, "virt0");
int tap_fd = tun_alloc(tap_name, IFF_TAP); /* tap interface */
if (tap_fd < 0) {
perror("Allocating interface");
exit(1);
}
unsigned char rootcomplex_ip[16] = "192.168.1.1";
unsigned char ipconfig_cmd[128] = {0};
sprintf(ipconfig_cmd, "/sbin/ifconfig %s %s", tap_name, rootcomplex_ip);
system(ipconfig_cmd);
void *pcie_ptr = init_pcie();
if(pcie_ptr == NULL)
{
printf("Failed to initialize PCIe mapping. Exiting...\n");
}
THREAD_ARGS thread_args;
thread_args.tap_fd = tap_fd;
thread_args.pcie_ptr = pcie_ptr;
pthread_t ethernet_read_thread;
int err = pthread_create(ðernet_read_thread, NULL, ðernet_read_task, &thread_args);
if (err != 0)
{
printf("Can't start Ethernet handler thread :[%s]", strerror(err));
return 1;
}
char * buffer;
int data_len = 0;
int32_t ret = 0;
unsigned char rx_buffer[1800] = {0};
while(1)
{
ret = read_pcie(pcie_ptr, rx_buffer);
if(ret > 0)
{
ret = write(tap_fd, rx_buffer, ret);
memset(rx_buffer, 0, sizeof(rx_buffer));
}
usleep(1);
}
munmap(pcie_ptr, MEM_MAP_LEN);
return 0;
}
EP:
#include <sys/socket.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <fcntl.h>
#include <asm/ioctl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>
#define PCIE_MAP 0x7f000000
#define MAP_SIZE 0x1000000 // Map 16-MB of PCIe BAR space
#define EP_WR_START 0x0 // EP write region from start of BAR space
#define EP_SET_DATA_READY EP_WR_START + 0x0
#define EP_DATA_LEN_OFFSET EP_WR_START + 0x4
#define EP_DATA_OFFSET EP_WR_START + 0x8
#define RC_WR_START 0x800000 // RC write region starts from mid of BAR space
#define RC_SET_DATA_READY RC_WR_START + 0x0
#define RC_DATA_LEN_OFFSET RC_WR_START + 0x4
#define RC_DATA_OFFSET RC_WR_START + 0x8
uint32_t sod_id = 0xFFFFAAAA;
typedef struct
{
int tap_fd;
void * pcie_ptr;
}THREAD_ARGS;
int tun_alloc(char *dev, int flags) {
struct ifreq ifr;
int fd, err;
char *clonedev = "/dev/net/tun";
if ( (fd = open(clonedev, O_RDWR)) < 0 ) {
return fd;
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = flags;
if (*dev) {
strncpy(ifr.ifr_name, dev, IFNAMSIZ);
}
if ( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
close(fd);
return err;
}
strcpy(dev, ifr.ifr_name);
return fd;
}
void * init_pcie()
{
int fd = open("/dev/mem", O_RDWR);
/* Map physical memory address specified with PCIE_MAP to virtual address and set it in ptr */
void* ptr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, PCIE_MAP);
close(fd);
if (ptr == MAP_FAILED)
{
printf("Failure mapping: %s\n", strerror(errno));
return NULL;
}
return ptr;
}
void reset_pcie_space(void * ptr)
{
memset((uint8_t *)ptr, 0xff, MAP_SIZE);
}
int write_pcie(volatile void * ptr, unsigned char * buffer, unsigned int buffer_len)
{
uint32_t tmp_val;
memcpy(&tmp_val, ((uint8_t *)ptr + EP_SET_DATA_READY), sizeof(tmp_val));
if ( tmp_val != sod_id)
{
memcpy(((uint8_t *)ptr + EP_DATA_LEN_OFFSET), &buffer_len, sizeof(buffer_len));
memcpy((uint8_t *)((uint8_t *)ptr + EP_DATA_OFFSET), buffer, buffer_len);
memcpy(((uint8_t *)ptr + EP_SET_DATA_READY), &sod_id, sizeof(sod_id));
return 0;
}
else
return -1;
}
int read_pcie(volatile void * ptr, unsigned char * buffer)
{
uint32_t tmp_val;
memcpy(&tmp_val, ((uint8_t *)ptr + RC_SET_DATA_READY), sizeof(tmp_val));
if ( tmp_val == sod_id)
{
memcpy(&tmp_val, ((uint8_t *)ptr + RC_DATA_LEN_OFFSET), sizeof(tmp_val));
memcpy(buffer, (uint8_t *)((uint8_t *)ptr + RC_DATA_OFFSET), tmp_val);
memset(((uint8_t *)ptr + RC_SET_DATA_READY), 0x0, sizeof(uint32_t));
return tmp_val;
}
else
return -1;
}
void* ethernet_read_task(void * thread_args)
{
THREAD_ARGS args = *(THREAD_ARGS *)thread_args;
int tap_fd = args.tap_fd;
void * pcie_ptr = args.pcie_ptr;
unsigned char tx_buffer[1800] = {0};
int ret = 0;
unsigned int nread = 0;
while(1)
{
nread = read(tap_fd, tx_buffer, sizeof(tx_buffer));
if (nread < 0) {
perror("Reading from interface");
}
else if(nread > 0)
{
ret = write_pcie(pcie_ptr, tx_buffer, nread);
//memset(tx_buffer, 0, sizeof(tx_buffer));
}
}
}
int main()
{
char tap_name[32];
int nread = 0;
unsigned char rx_buffer[1800] = {0};
unsigned int rx_len = 0;
void * pcie_ptr = NULL;
strcpy(tap_name, "virt0");
int tap_fd = tun_alloc(tap_name, IFF_TAP);
if (tap_fd < 0) {
perror("Allocating interface");
exit(1);
}
unsigned char endpoint_ip[16] = "192.168.1.100";
unsigned char ipconfig_cmd[128] = {0};
sprintf(ipconfig_cmd, "/sbin/ifconfig %s %s", tap_name, endpoint_ip);
system(ipconfig_cmd);
pcie_ptr = init_pcie();
if(pcie_ptr == NULL)
{
printf("Unable to initiazlize PCIe memory map. Exiting...\n");
exit(1);
}
THREAD_ARGS thread_args;
thread_args.tap_fd = tap_fd;
thread_args.pcie_ptr = pcie_ptr;
pthread_t ethernet_read_thread;
int err = pthread_create(ðernet_read_thread, NULL, ðernet_read_task, &thread_args);
if (err != 0)
{
printf("Can't start Ethernet handler thread :[%s]", strerror(err));
return 1;
}
int ret = 0;
while (1) {
ret = read_pcie(pcie_ptr, rx_buffer);
if(ret > 0)
{
write(tap_fd, rx_buffer, ret);
memset(rx_buffer, 0, sizeof(rx_buffer));
}
usleep(1);
}
}