1

I am trying to communicate between two Linux based devices (i.e. PowerPC (Big-Endian 32-bit) & ARM (Little-Endian 32-bit)) connected over PCIe. One of the device acts as Root Complex and the other as Endpoint. The devices successfully negotiate to create a PCIe x1 gen-1 link. To communicate between the devices, I have written userspace C applications that make use of mmap() to read/write data to PCIe memory space.

The issue I am facing is that the average data transfer rate is around 5Mb/s which is no way near the theoretical PCIe 1.1 x1 speed (250MB/s). I am unable to find the data rate bottleneck yet and wanted to have some suggestions. Can someone give an idea what might be the reason behind this.

Please find below C code for both Root Complex (RC) side and Endpoint (EP) side:

RC:

#include <endian.h>
#include <stdio.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <signal.h>
#include <stdint.h>
#include <errno.h>
#include <string.h>
#include <termios.h>
#include <unistd.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/socket.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <asm/ioctl.h>

#define MEM_MAP_LEN             0x1000000       //  Map 16-MB of PCIe BAR space
#define EP_WR_START             0x0             //  EP write region from start of BAR space
#define EP_SET_DATA_READY       EP_WR_START + 0x0
#define EP_DATA_LEN_OFFSET      EP_WR_START + 0x4
#define EP_DATA_OFFSET          EP_WR_START + 0x8
#define RC_WR_START             0x800000        //  RC write region starts from mid of BAR space
#define RC_SET_DATA_READY       RC_WR_START + 0x0
#define RC_DATA_LEN_OFFSET      RC_WR_START + 0x4
#define RC_DATA_OFFSET          RC_WR_START + 0x8


uint32_t sod_id = 0xFFFFAAAA;               // Start of Data ID

typedef struct
{
    int tap_fd;
    void * pcie_ptr;
} THREAD_ARGS;

void * init_pcie()
{
    uint8_t * pcie_file = "/sys/bus/pci/devices/0000:03:00.0/resource2";

    int fd = open(pcie_file, O_RDWR | O_SYNC);

    void * res_ptr = NULL;
    res_ptr = mmap(0, MEM_MAP_LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    close(fd);

    if(res_ptr == MAP_FAILED)
    {
        printf("Error: Failed to map memory to PCIe resource\n");
        return NULL;
    }

    return res_ptr;
}

int write_pcie(volatile void * ptr, unsigned char * buffer, unsigned int buffer_len)
{
    uint32_t tmp_flag = htobe32(sod_id);
    uint32_t tmp_len = htobe32(buffer_len);
    uint32_t tmp_val;

    memcpy(&tmp_val, ((uint8_t *)ptr + RC_SET_DATA_READY), sizeof(tmp_val));
    if(tmp_val != htobe32(sod_id))  // check if there's already pending data
    {
        memcpy(((uint8_t *)ptr + RC_DATA_LEN_OFFSET), &tmp_len, sizeof(tmp_len));
        memcpy((uint8_t *)((uint8_t *)ptr + RC_DATA_OFFSET), buffer, buffer_len);
        memcpy(((uint8_t *)ptr + RC_SET_DATA_READY), &tmp_flag, sizeof(tmp_flag));
        return 0;
    }
    else
        return -1;
}

int read_pcie(volatile void * ptr, unsigned char * buffer)
{
    uint32_t tmp_val;
    uint32_t data_len;
    memcpy(&tmp_val, ((uint8_t *)ptr + EP_SET_DATA_READY), sizeof(tmp_val));

    if ( tmp_val == htobe32(sod_id))
    {
        memcpy(&data_len, ((uint8_t *)ptr + EP_DATA_LEN_OFFSET), sizeof(data_len));
        data_len = be32toh(data_len);
        memcpy(buffer, (uint8_t *)((uint8_t *)ptr + EP_DATA_OFFSET), data_len);
        memset(((uint8_t *)ptr + EP_SET_DATA_READY), 0x0, sizeof(uint32_t));
        return data_len;
    }
    else
    {
        return -1;
    }
}

int tun_alloc(char *dev, int flags) {

    struct ifreq ifr;
    int fd, err;
    char *clonedev = "/dev/net/tun";

    if ( (fd = open(clonedev, O_RDWR)) < 0 ) {
        return fd;
    }

    memset(&ifr, 0, sizeof(ifr));

    ifr.ifr_flags = flags;

    if (*dev) {
        strncpy(ifr.ifr_name, dev, IFNAMSIZ);
    }

    if ( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
        close(fd);
        return err;
    }

    strcpy(dev, ifr.ifr_name);

    return fd;
}

void* ethernet_read_task(void * thread_args)
{
    THREAD_ARGS args = *(THREAD_ARGS *)thread_args;
    int tap_fd = args.tap_fd;
    void * pcie_ptr = args.pcie_ptr;
    unsigned char tx_buffer[1800] = {0};
    int ret_val = 0;
    unsigned int nread = 0;

    while (1)
    {
        nread = read(tap_fd, tx_buffer, sizeof(tx_buffer));
        if (nread < 0) {
            perror("Reading from interface");
        }
        else if (nread > 0)
        {
            ret_val = write_pcie(pcie_ptr, tx_buffer, nread);
        }
    }

}

int main()
{
    unsigned char tap_name[32] = {0};
    strcpy(tap_name, "virt0");
    int tap_fd = tun_alloc(tap_name, IFF_TAP);  /* tap interface */

    if (tap_fd < 0) {
        perror("Allocating interface");
        exit(1);
    }

    unsigned char rootcomplex_ip[16] = "192.168.1.1";
    unsigned char ipconfig_cmd[128] = {0};
    sprintf(ipconfig_cmd, "/sbin/ifconfig %s %s", tap_name, rootcomplex_ip);
    system(ipconfig_cmd);

    void *pcie_ptr = init_pcie();

    if(pcie_ptr == NULL)
    {
        printf("Failed to initialize PCIe mapping. Exiting...\n");
    }

    THREAD_ARGS thread_args;
    thread_args.tap_fd = tap_fd;
    thread_args.pcie_ptr = pcie_ptr;

    pthread_t ethernet_read_thread;
    int err = pthread_create(&ethernet_read_thread, NULL, &ethernet_read_task, &thread_args);
    if (err != 0)
    {
        printf("Can't start Ethernet handler thread :[%s]", strerror(err));
        return 1;
    }

    char * buffer;
    int data_len = 0;
    int32_t ret = 0;

    unsigned char rx_buffer[1800] = {0};

    while(1)
    {
        ret = read_pcie(pcie_ptr, rx_buffer);

        if(ret > 0)
        {
            ret = write(tap_fd, rx_buffer, ret);
            memset(rx_buffer, 0, sizeof(rx_buffer));
        }

        usleep(1);
    }
    munmap(pcie_ptr, MEM_MAP_LEN);
    return 0;
}

EP:

#include <sys/socket.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <fcntl.h>
#include <asm/ioctl.h>
#include <stdint.h>
#include <sys/mman.h>
#include <errno.h>


#define PCIE_MAP                            0x7f000000
#define MAP_SIZE                            0x1000000                   //  Map 16-MB of PCIe BAR space
#define EP_WR_START                     0x0                         //  EP write region from start of BAR space
#define EP_SET_DATA_READY           EP_WR_START + 0x0
#define EP_DATA_LEN_OFFSET      EP_WR_START + 0x4
#define EP_DATA_OFFSET              EP_WR_START + 0x8
#define RC_WR_START                     0x800000                        //  RC write region starts from mid of BAR space
#define RC_SET_DATA_READY           RC_WR_START + 0x0
#define RC_DATA_LEN_OFFSET      RC_WR_START + 0x4
#define RC_DATA_OFFSET              RC_WR_START + 0x8


uint32_t sod_id = 0xFFFFAAAA;

typedef struct
{
        int tap_fd;
        void * pcie_ptr;
}THREAD_ARGS;


int tun_alloc(char *dev, int flags) {

    struct ifreq ifr;
    int fd, err;
    char *clonedev = "/dev/net/tun";

    if ( (fd = open(clonedev, O_RDWR)) < 0 ) {
        return fd;
    }

    memset(&ifr, 0, sizeof(ifr));

    ifr.ifr_flags = flags;

    if (*dev) {
        strncpy(ifr.ifr_name, dev, IFNAMSIZ);
    }

    if ( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) {
        close(fd);
        return err;
    }

    strcpy(dev, ifr.ifr_name);

    return fd;
}

void * init_pcie()
{
    int fd = open("/dev/mem", O_RDWR);

    /* Map physical memory address specified with PCIE_MAP to virtual address and set it in ptr */

    void* ptr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, PCIE_MAP);
    close(fd);

    if (ptr == MAP_FAILED)
    {
        printf("Failure mapping: %s\n", strerror(errno));
        return NULL;
    }

    return ptr;
}

void reset_pcie_space(void * ptr)
{
    memset((uint8_t *)ptr, 0xff, MAP_SIZE);
}

int write_pcie(volatile void * ptr, unsigned char * buffer, unsigned int buffer_len)
{
    uint32_t tmp_val;
    memcpy(&tmp_val, ((uint8_t *)ptr + EP_SET_DATA_READY), sizeof(tmp_val));

    if ( tmp_val != sod_id)
    {
        memcpy(((uint8_t *)ptr + EP_DATA_LEN_OFFSET), &buffer_len, sizeof(buffer_len));
        memcpy((uint8_t *)((uint8_t *)ptr + EP_DATA_OFFSET), buffer, buffer_len);
        memcpy(((uint8_t *)ptr + EP_SET_DATA_READY), &sod_id, sizeof(sod_id));
        return 0;
    }
    else
        return -1;
}

int read_pcie(volatile void * ptr, unsigned char * buffer)
{
    uint32_t tmp_val;
    memcpy(&tmp_val, ((uint8_t *)ptr + RC_SET_DATA_READY), sizeof(tmp_val));
    if ( tmp_val == sod_id)
    {
        memcpy(&tmp_val, ((uint8_t *)ptr + RC_DATA_LEN_OFFSET), sizeof(tmp_val));
        memcpy(buffer, (uint8_t *)((uint8_t *)ptr + RC_DATA_OFFSET), tmp_val);
        memset(((uint8_t *)ptr + RC_SET_DATA_READY), 0x0, sizeof(uint32_t));
        return tmp_val;
    }
    else
        return -1;
}

void* ethernet_read_task(void * thread_args)
{
    THREAD_ARGS args = *(THREAD_ARGS *)thread_args; 
    int tap_fd = args.tap_fd;
    void * pcie_ptr = args.pcie_ptr;
    unsigned char tx_buffer[1800] = {0};
    int ret = 0;
    unsigned int nread = 0;

    while(1)
    {
        nread = read(tap_fd, tx_buffer, sizeof(tx_buffer));
        if (nread < 0) {
            perror("Reading from interface");
        }
        else if(nread > 0)
        {
            ret = write_pcie(pcie_ptr, tx_buffer, nread);
            //memset(tx_buffer, 0, sizeof(tx_buffer));
        }
    }

}

int main()
{
    char tap_name[32];
    int nread = 0;
    unsigned char rx_buffer[1800] = {0};
    unsigned int rx_len = 0;

    void * pcie_ptr = NULL;

    strcpy(tap_name, "virt0");
    int tap_fd = tun_alloc(tap_name, IFF_TAP);

    if (tap_fd < 0) {
        perror("Allocating interface");
        exit(1);
    }

    unsigned char endpoint_ip[16] = "192.168.1.100";
    unsigned char ipconfig_cmd[128] = {0};
    sprintf(ipconfig_cmd, "/sbin/ifconfig %s %s", tap_name, endpoint_ip);
    system(ipconfig_cmd);

    pcie_ptr = init_pcie();

    if(pcie_ptr == NULL)
    {
        printf("Unable to initiazlize PCIe memory map. Exiting...\n");
        exit(1);
    }

    THREAD_ARGS thread_args;
    thread_args.tap_fd = tap_fd;
    thread_args.pcie_ptr = pcie_ptr;

    pthread_t ethernet_read_thread;
    int err = pthread_create(&ethernet_read_thread, NULL, &ethernet_read_task, &thread_args);
    if (err != 0)
    {
        printf("Can't start Ethernet handler thread :[%s]", strerror(err));
        return 1;
    }

    int ret = 0;
    while (1) {

        ret = read_pcie(pcie_ptr, rx_buffer);
        if(ret > 0)
        {  
            write(tap_fd, rx_buffer, ret);
            memset(rx_buffer, 0, sizeof(rx_buffer));
        }
        usleep(1);
    }
}

Biffen
  • 6,249
  • 6
  • 28
  • 36
Arshan
  • 736
  • 6
  • 19
  • @jww please check my edit, I have added source code for both RC and EP side. Exact problem is that why I am unable to achieve high data rate using above code while the link supports upto 250MB/s? – Arshan Aug 29 '19 at 05:34
  • 1
    After running these processes on both sides, I use `iperf` to test link speed. – Arshan Aug 29 '19 at 05:36

0 Answers0