0

I have a program running on a QEMU VM. The program running inside this VM gets notified by a program on the host via interrupts and using QEMU ivshmem. The program on the host creates an eventfd and sends this file descriptor to QEMU when the VM starts. The program in the guest then opens a VFIO group device and sets an interrupt request fd on this device. We can then add the interrupt fd to epoll and epoll_wait to wait for notifications from the host.

The thing is that I want a 1-1 matching between the times the host writes to the eventfd and the number of events that are signaled in epoll_wait. For this I decided to use EFD_SEMAPHORE for the evenfds on the host and the guest. From my understanding, every time I write an 8 byte integer with value 1, the eventfd_counter is incremented by 1. Then every time the eventfd is read, the counter is decremented by 1 (different from a regular eventfd where each read clears the whole counter). For some reason, I am not getting the desired behaviour, so I was wondering if either eventfds with the EFD_SEMAPHORE flags are not properly supported by VFIO or QEMUs ivshmem.

Below is a simplified version of the parts I think are relevant and how I setup the notification system. I hope the code below is not too verbose. I tried to reduce the number of irrelevant parts (there is too much other code in the middle that is not particularly relevant to the problem) but not 100% sure what might be relevant or not.

Code host uses to signal guest

int ivshmem_uxsocket_send_int(int fd, int64_t i)
{
    int n;

    struct iovec iov = {
        .iov_base = &i,
        .iov_len = sizeof(i),
    };

    struct msghdr msg = {
        .msg_name = NULL,
        .msg_namelen = 0,
        .msg_iov = &iov,
        .msg_iovlen = 1,
        .msg_control = NULL,
        .msg_controllen = 0,
        .msg_flags = 0,
    };

    if ((n = sendmsg(fd, &msg, 0)) != sizeof(int64_t)) 
    {
        return -1;
    }

    return n;
}

int ivshmem_uxsocket_sendfd(int uxfd, int fd, int64_t i)
{
    int n;
    struct cmsghdr *chdr;

    /* Need to pass at least one byte of data to send control data */
    struct iovec iov = {
        .iov_base = &i,
        .iov_len = sizeof(i),
    };

    /* Allocate a char array but use a union to ensure that it
       is aligned properly */
    union {
        char buf[CMSG_SPACE(sizeof(fd))];
        struct cmsghdr align;
    } cmsg;
    memset(&cmsg, 0, sizeof(cmsg));

    /* Add control data (file descriptor) to msg */
    struct msghdr msg = {
        .msg_name = NULL,
        .msg_namelen = 0,
        .msg_iov = &iov,
        .msg_iovlen = 1,
        .msg_control = &cmsg,
        .msg_controllen = sizeof(cmsg),
        .msg_flags = 0,
    };

    /* Set message header to describe ancillary data */
    chdr = CMSG_FIRSTHDR(&msg);
    chdr->cmsg_level = SOL_SOCKET;
    chdr->cmsg_type = SCM_RIGHTS;
    chdr->cmsg_len = CMSG_LEN(sizeof(int));
    memcpy(CMSG_DATA(chdr), &fd, sizeof(fd));

    if ((n = sendmsg(uxfd, &msg, 0)) != sizeof(i)) 
    {
        return -1;
    }

    return n;
}

/* SETUP IVSHMEM WITH QEMU AND PASS THE EVENTFD USED TO 
   NOTIFY THE GUEST */
int ivshmem_uxsocket_accept()
{   
    int ret;
    int cfd, ifd, nfd;

    int64_t version = IVSHMEM_PROTOCOL_VERSION;
    uint64_t hostid = HOST_PEERID;
    int vmid = 0

    /* Accept connection from qemu ivshmem */
    if ((cfd = accept(uxfd, NULL, NULL)) < 0)
    {
        return -1;
    }

    /* Send protocol version as required by qemu ivshmem */
    ret = ivshmem_uxsocket_send_int(cfd, version);
    if (ret < 0)
    {
      return -1;
    }

    /* Send vm id to qemu */
    ret = ivshmem_uxsocket_send_int(cfd, vmid);
    if (ret < 0)
    {
      return -1;
    }

    /* Send shared memory fd to qemu */
    ret = ivshmem_uxsocket_sendfd(cfd, shm_fd, -1);
    if (ret < 0)
    {
      return -1;
    }

    /* Eventfd used by guest to notify host */
    if ((nfd = eventfd(0, EFD_SEMAPHORE | EFD_NONBLOCK)) < 0)
    {
      return -1;
    }

    /* Ivshmem protocol requires to send host id
       with the notify fd */
    ret = ivshmem_uxsocket_sendfd(cfd, nfd, hostid);
    if (ret < 0)
    {
      return -1;
    }

    /* THIS IS THE EVENTFD OF INTEREST TO US: USED BY HOST
       TO NOTIFY GUEST */
    if ((ifd = eventfd(0, EFD_SEMAPHORE | EFD_NONBLOCK)) < 0)
    {
      return -1;
    }

    ret = ivshmem_uxsocket_sendfd(cfd, ifd, vmid);
    if (ret < 0)
    {
      return -1;
    }

    if (epoll_ctl(epfd, EPOLL_CTL_ADD, cfd, &ev) < 0)
    {
      return -1;
    }

    return 0;
}

/* NOW EVERY TIME WE WANT TO NOTIFY THE GUEST
 WE CALL THE FOLLOWING FUNCTION */
int notify_guest(int fd)
{
  int ret;
  uint64_t buf = 1;
  
  ret = write(fd, &buf, sizeof(uint64_t));
  if (ret < sizeof(uint64_t))
  {
    return -1;
  }
  
  return 0;
}

Code guest uses to receive notifications from host

/* THIS FUNCTION SETS THE IRQ THAT RECEIVES THE
   NOTIFICATIONS FROM THE HOST */
int vfio_set_irq(int dev)
{
  int fd;
  struct vfio_irq_set *irq_set;
  char buf[sizeof(struct vfio_irq_set) + sizeof(int)];

  if ((fd = eventfd(0, EFD_SEMAPHORE | EFD_NONBLOCK)) < 0)
  {
    return -1;
  }

  irq_set = (struct vfio_irq_set *) buf;
  irq_set->argsz = sizeof(buf);
  irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
  irq_set->index = 2;
  irq_set->start = 0;
  irq_set->count = 1;
  memcpy(&irq_set->data, &fd, sizeof(int));

  if (ioctl(dev, VFIO_DEVICE_SET_IRQS, irq_set) < 0)
  {
    return -1;
  }

  return irq_fd;
}

/* The guest sets up the ivshmem region from QEMU and sets the
   interrupt request. */
int vfio_init()
{
  int cont, group, irq_fd;
  struct epoll_event ev;
  struct vfio_group_status g_status = { .argsz = sizeof(g_status) };
  struct vfio_device_info device_info = { .argsz = sizeof(device_info) };

  /* Create vfio container */
  if ((cont = open("/dev/vfio/vfio", O_RDWR)) < 0)
  {
    return -1;
  }

  /* Check API version of container */
  if (ioctl(cont, VFIO_GET_API_VERSION) != VFIO_API_VERSION)
  {
    return -1;
  }

  if (!ioctl(cont, VFIO_CHECK_EXTENSION, VFIO_NOIOMMU_IOMMU))
  {
    return -1;
  }

  /* Open the vfio group */
  if((group = open(VFIO_GROUP, O_RDWR)) < 0)
  {
    return -1;
  }

  /* Test if group is viable and available */
  ioctl(group, VFIO_GROUP_GET_STATUS, &g_status);
  if (!(g_status.flags & VFIO_GROUP_FLAGS_VIABLE))
  {
    return -1;
  }

  /* Add group to container */
  if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &cont) < 0)
  {
    return -1;
  }

  /* Enable desired IOMMU model */
  if (ioctl(cont, VFIO_SET_IOMMU, VFIO_NOIOMMU_IOMMU) < 0)
  {
    return -1;
  }

  /* Get file descriptor for device */
  if ((dev = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, VFIO_PCI_DEV)) < 0)
  {
    return -1;
  }

  /* Get device info */
  if (ioctl(dev, VFIO_DEVICE_GET_INFO, &device_info) < 0)
  {
    return -1;
  }

  /* Set interrupt request fd */
  if ((irq_fd = vfio_set_irq(dev)) < 0)
  {
    return -1
  }

  /* Add interrupt request fd to interest list */
  if (vfio_subscribe_irq() < 0)
  {
    return -1;
  }

  /* Do other shm setup stuff not related to the interrupt
     request */

  ev.events = EPOLLIN;
  ev.data.ptr = EP_NOTIFY;
  ev.data.fd = irq_fd;
  if (epoll_ctl(epfd, EPOLL_CTL_ADD, irq_fd, &ev) != 0)
  {
    return -1;
  }

  return 0;
}

int ivshmem_drain_evfd(int fd) 
{
  int ret;
  uint64_t buf;
  ret = read(fd, &buf, sizeof(uint64_t));
  if (ret == 0)
  {
    return -1;
  }

  return ret;
}

/* I should get every notification from the host here,
   but it seems that not all notifications are going
   through. The number of calls to notify_guest does not
   match the number of events received from epoll_wait
   here */
int notify_poll()
{
  int i, n;
  struct epoll_event evs[32];
  n = epoll_wait(epfd, evs, 32, 0);

  for (i = 0; i < n; i++)
  {
    if (evs[i].events & EPOLLIN)  
    {
      /* Drain evfd */
      drain_evfd(irq_fd);
      /* Handle notification ... */
      handle();
    }
  }
}
marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
MUAS
  • 519
  • 1
  • 7
  • 20

0 Answers0