0

I construct DPDK packets with following code:

#define PKG_GEN_COUNT 1
#define EIU_HEADER_LEN 42
#define ETHERNET_HEADER_LEN 14
#define IP_DEFTTL 64 /* from RFC 1340. */
#define IP_VERSION 0x40
#define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
#define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)

#define MEGA_JOB_GET 0x2
#define MEGA_JOB_SET 0x3
#define MEGA_END_MARK_LEN 2
#define PROTOCOL_TYPE_LEN 2U
#define KEY_LEN 8
#define VAL_LEN 8
#define PROTOCOL_KEYLEN_LEN 2U
#define PROTOCOL_VALLEN_LEN 4U
#define PROTOCOL_HEADER_LEN 8U

struct rte_mbuf *tx_bufs_pt[PKG_GEN_COUNT];
struct rte_ether_hdr *ethh;
struct rte_ipv4_hdr *ip_hdr;
struct rte_udp_hdr *udph;
for (int i = 0; i < PKG_GEN_COUNT; i++) {
    struct rte_mbuf *pkt = (struct rte_mbuf *)rte_pktmbuf_alloc(
        (struct rte_mempool *)send_mbuf_pool);
    if (pkt == NULL)
        rte_exit(EXIT_FAILURE,
                    "Cannot alloc storage memory in  port %" PRIu16 "\n",
                    port);
    pkt->data_len = 1484;
    pkt->nb_segs = 1;  // nb_segs
    pkt->pkt_len = pkt->data_len;
    pkt->ol_flags = PKT_TX_IPV4;  // ol_flags
    pkt->vlan_tci = 0;            // vlan_tci
    pkt->vlan_tci_outer = 0;      // vlan_tci_outer
    pkt->l2_len = sizeof(struct rte_ether_hdr);
    pkt->l3_len = sizeof(struct rte_ipv4_hdr);

    ethh = (struct rte_ether_hdr *)rte_pktmbuf_mtod(pkt, unsigned char *);
    ethh->s_addr = S_Addr;
    ethh->d_addr = D_Addr;
    ethh->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);

    ip_hdr = (struct rte_ipv4_hdr *)((unsigned char *)ethh +
                                        sizeof(struct rte_ether_hdr));
    ip_hdr->version_ihl = IP_VHL_DEF;
    ip_hdr->type_of_service = 0;
    ip_hdr->fragment_offset = 0;
    ip_hdr->time_to_live = IP_DEFTTL;
    ip_hdr->next_proto_id = IPPROTO_UDP;
    ip_hdr->packet_id = 0;
    ip_hdr->total_length = rte_cpu_to_be_16(pktlen);
    ip_hdr->src_addr = rte_cpu_to_be_32(IP_SRC_ADDR);
    ip_hdr->dst_addr = rte_cpu_to_be_32(IP_DST_ADDR);
    ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);

    udph = (struct rte_udp_hdr *)((unsigned char *)ip_hdr +
                                    sizeof(struct rte_ipv4_hdr));
    udph->src_port = 123;
    udph->dst_port = 123;
    udph->dgram_len =
        rte_cpu_to_be_16((uint16_t)(pktlen - sizeof(struct rte_ether_hdr) -
                                    sizeof(struct rte_ipv4_hdr)));
    tx_bufs_pt[i] = pkt;
}
char *ptr = NULL;
uint64_t set_key = 1;
while (1) {
    for (i = 0; i < PKG_GEN_COUNT; i++) {
        /* Load request */
        ptr = (char *)((char *)rte_pktmbuf_mtod(tx_bufs_pt[i], char *) +
                        EIU_HEADER_LEN);
        tx_pkt_load(ptr, &set_key);
    }
    int nb_tx = rte_eth_tx_burst(port, queue_id, tx_bufs_pt, PKG_GEN_COUNT);
}

The tx_pkt_load function fills IP packets' content.

static void tx_pkt_load(char *ptr, uint64_t *start_set_key) {
    uint64_t k, get_key, set_key = *start_set_key;
    for (k = 0; k < number_packet_set[WORKLOAD_ID]; k++) {
        *(uint16_t *)ptr = MEGA_JOB_SET;
        ptr += sizeof(uint16_t);
        *(uint16_t *)ptr = KEY_LEN;
        ptr += sizeof(uint16_t);
        *(uint32_t *)ptr = VALUE_LEN;
        ptr += sizeof(uint32_t);

        set_key++;

        *(uint64_t *)(ptr) = set_key;
        ptr += KEY_LEN;
        *(uint64_t *)(ptr) = set_key + 1;
        ptr += VALUE_LEN;

        *(uint16_t *)ptr = MEGA_JOB_GET;
        ptr += sizeof(uint16_t);
        *(uint16_t *)ptr = KEY_LEN;
        ptr += sizeof(uint16_t);

        get_key = set_key;

        *(uint64_t *)(ptr) = get_key;
        ptr += KEY_LEN;
    }
    *start_set_key = set_key;
    /* pkt ending mark */
    *(uint16_t *)ptr = 0xFFFF;
}

Before I call rte_eth_tx_burst, I use the show_pkt function to dump the IP pakcets' content.

void show_pkt(struct rte_mbuf *pkt) {
    int pktlen = pkt->data_len - EIU_HEADER_LEN;
    uint8_t *ptr = (uint8_t *)((uint8_t *)rte_pktmbuf_mtod(pkt, uint8_t *) +
                               EIU_HEADER_LEN);
    while (*(uint16_t *)ptr != 0xFFFF) {
        uint32_t key_len = *(uint16_t *)(ptr + PROTOCOL_TYPE_LEN);
        if (*(uint16_t *)ptr == MEGA_JOB_GET) {
            fprintf(
                fp[sched_getcpu()], "GET\t%lu\n",
                *(uint64_t *)(ptr + PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN));
            ptr += PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN + key_len;
        } else if (*(uint16_t *)ptr == MEGA_JOB_SET) {
            uint32_t val_len =
                *(uint16_t *)(ptr + PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN);
            fprintf(fp[sched_getcpu()], "SET\t%lu\t%lu\n",
                    *(uint64_t *)(ptr + PROTOCOL_HEADER_LEN),
                    *(uint64_t *)(ptr + PROTOCOL_HEADER_LEN + key_len));
            ptr += PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN +
                   PROTOCOL_VALLEN_LEN + key_len + val_len;
        }
    }
    fprintf(fp[sched_getcpu()], "END_MARK: %04x \n", *(uint16_t *)ptr);
    fprintf(fp[sched_getcpu()], "\n");
    fflush(fp[sched_getcpu()]);
}

The generated file shows the expected packet content. Each GET has the same argument of last SET's first argument and GET's argument should be incremental. The SET's sencond argument equals to its first argument plus one and SET's arguments should be also be incremental respectively.

SET     82      83
GET     82
SET     83      84
GET     83
SET     84      85
GET     84
SET     85      86
GET     85
SET     86      87
GET     86
SET     87      88
GET     87
SET     88      89
GET     88
SET     89      90
GET     89
SET     90      91
GET     90
SET     91      92
GET     91
SET     92      93
GET     92
SET     93      94
GET     93
SET     94      95
GET     94
SET     95      96
GET     95
SET     96      97
GET     96
SET     97      98
GET     97
SET     98      99
GET     98
SET     99      100
GET     99
SET     100     101
GET     100
SET     101     102
GET     101
SET     102     103
GET     102
SET     103     104
GET     103
SET     104     105
GET     104
SET     105     106
GET     105
SET     106     107
GET     106
SET     107     108
GET     107
SET     108     109
GET     108
SET     109     110
GET     109
SET     110     111
GET     110
SET     111     112
GET     111
SET     112     113
GET     112
SET     113     114
GET     113
SET     114     115
GET     114
SET     115     116
GET     115
SET     116     117
GET     116
SET     117     118
GET     117
SET     118     119
GET     118
SET     119     120
GET     119
SET     120     121
GET     120
SET     121     122
GET     121
END_MARK: ffff

However, when I use tcpdump to capture received pakcets on the target machine, the packets captured do not contain expected content. I also tried to use rte_eth_rx_burst to receive packets and dump packets' content through the same function show_pkt. It shows the same result as the following. It's so odd.

SET     82      83
GET     82
SET     83      84
GET     83
SET     84      85
GET     84
SET     85      86
GET     85
SET     86      87
GET     86
SET     87      88
GET     87
SET     88      89
GET     88
SET     89      90
GET     89
SET     90      91
GET     90
SET     91      92
GET     91
SET     92      93
GET     92
SET     93      94
GET     93
SET     94      95
GET     94
SET     95      96
GET     95
SET     96      97
GET     96
SET     97      98
GET     97
SET     98      99
GET     98
SET     99      100
GET     99
SET     100     101
GET     100
SET     101     102
GET     101
SET     102     103
GET     102
SET     103     104
GET     103
SET     104     105
GET     104
SET     105     106
GET     105
SET     106     107
GET     106
SET     107     108
GET     107
SET     108     109
GET     108
SET     109     110
GET     109
SET     110     111
GET     110
SET     111     112
GET     111
SET     112     113
GET     112
SET     73      74
GET     73
SET     74      75
GET     74
SET     75      76
GET     75
SET     76      77
GET     76
SET     77      78
GET     77
SET     78      79
GET     78
SET     79      80
GET     79
SET     80      81
GET     80
SET     81      82
GET     81
END_MARK: ffff

[UPDATE] The packets dumped through rte_pktmbuf_dump contain excepted content. And the packets captured by tcpdump is odd. The packets' content has the following pattern.

uint16_t (0x03)
uint16_t (0x08)
uint32_t (0x08)
uint64_t (x)
uint64_t (x + 1)
uint16_t (0x02)
uint16_t (0x8)
uint64_t (x)

The x should be monotonically increasing through all packets. The second packet captured by tcpdump failed to conform to this law. The starting x is 82 and at the end of packet, the x is 81.

[UPDATE]

Part of the second packet dumped by rte_pktmbuf_dump:

00000030: 00 00 2A 00 00 00 00 00 00 00 2B 00 00 00 00 00
...
000005C0: 08 00 51 00 00 00 00 00 00 00 FF FF

Part of the second packet captured through tcpdump:

0x0020:  0800 0000 5200 0000 0000 0000 5300 0000
...
0x05b0:  0200 0800 5100 0000 0000 0000 ffff 

Compared to the 0x32-th byte of the packet dumped by rte_pktmbuf_dump, the 0x24-th byte of the packet captured by tcpdump should be 2a. Because the last 12 bytes of the two packets are same, which means the two packets should be same.

Hovin
  • 39
  • 8
  • It looks like you are running the code on a little endian machine. Is this true? Can you temporarily comment `show_pkt` function code and use `rte_pktmbuf_dump` to dump the packet. Can you please compare & share the content against `tcpdump -exi [nic]`? – Vipin Varghese Oct 23 '20 at 14:41
  • @VipinVarghese Yes. I am running the code on a little endian machine. I tried what you suggested and updated my question. – Hovin Oct 24 '20 at 04:26
  • please update with the packet dump and tcpdump result. The one which you have shared is not the desired one. – Vipin Varghese Oct 24 '20 at 05:40
  • I am not able to understand from your current edit what is `0x03 0x08 0x08 x x+1 0x02`, is that header or payload. so please upload with data – Vipin Varghese Oct 24 '20 at 06:05
  • @VipinVarghese I use the `Ubuntu paste` to share the packets content. Hyper link on the word `packets`. The pattern is for easing you to check the packets content. What I want to say is that I fill the IP packets' payload with this pattern. If you still feel confused about this, pls forget this and check my code to figure out the packets' content. – Hovin Oct 24 '20 at 07:14
  • thanks for the update I have checked the content. with DPDK pkt_mbuf_dump content is `0300 0800 0800 0000 ` (little endian) and tcpdump `0300 0800 0800 0000` (big endian). Is this what you are mentioning as `change in content`? If not can you tell exact byte position I have to check for you? I can be available for you for live debug too. – Vipin Varghese Oct 24 '20 at 08:56
  • @VipinVarghese Sorry for my late response. I sent ten packets through `dpdk` and dumped by `rte_pktmbuf_dump` and captured those packets on the target machine by `tcpdump`. The 0x32-th byte and 0x5c2-th byte in packets dumped by `rte_pktmbuf_dump` and the 0x24-th byte 0x5b4-th byte can distinguish packets from each other. I updated my question to explain the packets' difference. – Hovin Oct 25 '20 at 06:44

1 Answers1

0

DPDK API with the given option and logic does not modify the packet content before the NIC transmit. To ensure the same, I have tested the logic with tcpdump to capture the packet on the Linux side.

Note: Due to the absence of the exact code or snippet, have edited the code to meet the requirement. I am able to send and receive the packet without any issues.

  1. DPDK test app cmd: sudo LD_LIBRARY_PATH=[path to shared dpdk library] ./a.out --no-pci --vdev=net_tap0 -l 10 -- -p 0x1
  2. tcpdump cmd: sudo tcpdump -exxxi dtap0 -Q in
  3. Code: https://paste.ubuntu.com/p/zHP5q89yMz/

pktmbuf_dump:

01 02 03 04 05 06 01 02 03 04 05 06 08 00 45 00
05 A2 00 00 00 00 40 11 5B 2E 01 02 03 04 0A 0B
0C 0D 7B 00 7B 00 64 00 00 00 03 00 02 00 02 00
00 00 02 00 03 00 02 00 02 00 02 00 FF FF 00 00

tcpdump:

        0x0000:  0102 0304 0506 0102 0304 0506 0800 4500
        0x0010:  05a2 0000 0000 4011 5b2e 0102 0304 0a0b
        0x0020:  0c0d 7b00 7b00 6400 0000 0300 0200 0200
        0x0030:  0000 0200 0300 0200 0200 0200 ffff 0000

Vipin Varghese
  • 4,540
  • 2
  • 9
  • 25
  • Sorry for the absence of some snippets. The repeat time of line 124-148 is 40, and the `KEY_LEN` and `VALUE_LEN` are both defined as `8`, which is the value of `sizeof(uint64_t)`. I think the code should have the same problem. Let me try it and feedback result in a minute. – Hovin Oct 25 '20 at 06:55
  • @Hovin it looks like there is problem in the code. Since there is no problem in DPDK API please accept and upvote the answer. – Vipin Varghese Oct 25 '20 at 08:15
  • I tested many times and figured out the reason. The code gets one `struct rte_mbuf` instance through `rte_pktmbuf_alloc` and inits its header. Then it calls `pkt_load` to fill the packet's content and sends it out. Again, it fills the `struct rte_mbuf` instance with other data. The received packet on the target machine dosen't contain expected content. The first half part of the packet should be the first half part of next received one. And the second half part is as expected. So I think we shouldn't change the `struct rte_mbuf` instances' content immediately after we call `rte_eth_tx_burst`. – Hovin Oct 26 '20 at 01:34
  • @Hovin, so the porblem is not with DPDK API. Problem is in the code where you send the data then modify the buffer (rte_mbuf) sends it again. So this is a program logic and not DPDK. As shared in the answer with limited code snippet I show case there is nothing wrong happening in the DPDK API. Hence if my answer has helped you please accept and upvote to close the same. – Vipin Varghese Oct 26 '20 at 03:11
  • yeah, it helps. Before close this question, I want to talk about the code you write. I seems it inits multiple thread run `l2fwd_launch_one_core`. Each `l2fwd_launch_one_core` only sends packets once. If I want each `l2fwd_launch_one_core` to send packets constantly, how should I do? Free `struct rte_mbuf` instance after calling `rte_eth_tx_burst` and then get new one through `rte_pktmbuf_alloc` again? – Hovin Oct 26 '20 at 06:03
  • @Hovin the current question in comment is not related to the original question. Hence I humbly request you open a new question with `your` code snippet (dummy function) to reproduce. will wait for your new ticket, please accept and upvote . – Vipin Varghese Oct 26 '20 at 07:22