On the following page https://lwn.net/Articles/810414/
IORING_OP_READ_FIXED IORING_OP_WRITE_FIXED These opcodes also submit I/O operations, but they use "registered" buffers that are already mapped into the kernel, reducing the amount of total overhead.
However I could not find a single example online on how to use it. In io_uring_enter it says
EFAULT IORING_OP_READ_FIXED or IORING_OP_WRITE_FIXED was specified in the opcode field of the submission queue entry, but either buffers were not registered for this io_uring instance, or the address range described by addr and len does not fit within the buffer registered at buf_index.
It seems to me that I should pick a memory address and block for it to use but using an address like 0x555555500000 and len as 4096 gets me the same error.
How does IORING_OP_READ_FIXED work? Below is a working example of IORING_OP_READ
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <linux/io_uring.h>
#define read_barrier() __asm__ __volatile__("":::"memory")
#define write_barrier() __asm__ __volatile__("":::"memory")
int main(int argc, char *argv[])
{
struct io_uring_params uring;
memset(&uring, 0, sizeof(uring));
auto queue_size = 5;
auto ring_fd = syscall(__NR_io_uring_setup, queue_size, &uring);
auto*uring_ptr = (char*)mmap(0, uring.sq_off.array + uring.sq_entries * 4, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring_fd, IORING_OFF_SQ_RING);
auto*submit_entries = (io_uring_sqe*)mmap(0, uring.sq_entries * sizeof(struct io_uring_sqe), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring_fd, IORING_OFF_SQES);
unsigned &sqHead = *(unsigned*)(uring_ptr + uring.sq_off.head);
unsigned &sqTail = *(unsigned*)(uring_ptr + uring.sq_off.tail);
unsigned &sqMask = *(unsigned*)(uring_ptr + uring.sq_off.ring_mask);
unsigned &sqFlags = *(unsigned*)(uring_ptr + uring.sq_off.flags);
unsigned *sqArray = (unsigned*)(uring_ptr + uring.sq_off.array);
unsigned &cqHead = *(unsigned*)(uring_ptr + uring.cq_off.head);
unsigned &cqTail = *(unsigned*)(uring_ptr + uring.cq_off.tail);
unsigned &cqMask = *(unsigned*)(uring_ptr + uring.cq_off.ring_mask);
io_uring_cqe *cqes = (io_uring_cqe*)(uring_ptr + uring.cq_off.cqes);
int fd[2];
fd[0] = open(argv[1], O_RDONLY);
struct stat stat;
if (fstat(fd[0], &stat) < 0) {
perror("fstat");
return -1;
}
int size_aligned = (stat.st_size & ~63) + (stat.st_size & 63 ? 64 : 0);
auto*fileBuf = (unsigned char*)malloc(size_aligned*2);
for(int i=0; i<1; i++)
{
io_uring_sqe&sqe = submit_entries[sqTail & sqMask];
sqe.fd = fd[i];
sqe.flags = 0;
sqe.opcode = IORING_OP_READ;
sqe.addr = (unsigned long long)fileBuf+i*size_aligned;
sqe.len = size_aligned;
sqe.user_data = (unsigned long long)fileBuf+i*size_aligned;
sqArray[sqTail&sqMask] = sqTail&sqMask;
sqTail++;
}
write_barrier();
//int ret = syscall(__NR_io_uring_enter, ring_fd, 2, 2, IORING_ENTER_GETEVENTS, 0);
int ret = syscall(__NR_io_uring_enter, ring_fd, 1, 1, IORING_ENTER_GETEVENTS, 0);
//int ret = syscall(__NR_io_uring_enter, ring_fd, 1, 0, IORING_ENTER_GETEVENTS, 0);
//sleep(1);
read_barrier();
while (cqHead != cqTail)
{
unsigned long long a = cqHead;
unsigned long long b = cqTail;
unsigned long long c = cqMask;
auto index=cqHead & cqMask;
io_uring_cqe&cqe = cqes[index];
auto u=cqe.user_data;
auto f=cqe.flags;
auto r=cqe.res;
puts((const char*)u);
cqHead++;
}
int a=0;
return 0;
}