I have an assignment which asks for implementing some files related syscalls as well as the structures needed to make them work (i.e. file table and file handle).
This is my implementation of the structures:
// File Handle -------------------------
struct filehandle {
int flags;
off_t offset;
unsigned int refcount;
struct vnode *vnode_ptr;
struct lock *offset_lock;
struct spinlock refcount_spinlock;
// flags:
// bit 0: O_RDONLY or O_WRONLY
// bit 1: O_RDWR
// bit 2: is_seekable
};
struct filehandle *filehandle_create(void);
void filehandle_destroy(struct filehandle *);
void filehandle_incref(struct filehandle *);
void filehandle_decref(struct filehandle *);
// File Table --------------------------
struct filetable {
struct filehandle **table;
struct lock **fh_locks;
struct rwlock *table_lock;
int search_start_index;
int capacity;
};
struct filetable *filetable_create(void);
void filetable_destroy(struct filetable *);
void filetable_incref_all(struct filetable *);
void filetable_decref_all(struct filetable *);
int get_min_available_fd(struct filetable *, int *);
void reclaim_fd(struct filetable *, int);
void filetable_lock_handle(struct filetable *, int);
void filetable_unlock_handle(struct filetable *, int);
void filetable_lock(struct filetable *);
void filetable_unlock(struct filetable *);
In the struct filehandle
, I have a spinlock for changing the refcount. But I believe there is something wrong here.
Here is the implementation of the sys_close
syscall:
void sys_close_nocheck(int fd)
{
struct filehandle **handles = curproc->ft->table;
filehandle_decref(handles[fd]);
// TODO accessing the refcount
// without acquiring the lock.
if (handles[fd]->refcount == 0) {
filehandle_destroy(handles[fd]);
}
handles[fd] = NULL;
}
int sys_close(int fd)
{
// when to return EIO?..
struct filetable *ft = curproc->ft;
if (fd < 0 || fd >= ft->capacity) {
return EBADF;
}
filetable_lock_handle(ft, fd);
if (ft->table[fd] == NULL) {
filetable_unlock_handle(ft, fd);
return EBADF;
}
sys_close_nocheck(fd);
filetable_unlock_handle(ft, fd);
return 0;
}
And here is the implementation of the sys_dup2
function:
int sys_dup2(int oldfd, int newfd, int *ret_fd)
{
// when to return EMFILE or ENFILE?...
struct filetable *ft = curproc->ft;
if (oldfd < 0 || newfd < 0 ||
oldfd >= ft->capacity || newfd >= ft->capacity) {
return EBADF;
}
filetable_lock_handle(ft, oldfd);
if (ft->table[oldfd] == NULL) {
filetable_unlock_handle(ft, oldfd);
return EBADF;
}
*ret_fd = newfd;
if (oldfd == newfd) {
filetable_unlock_handle(ft, oldfd);
return 0;
}
filetable_lock_handle(ft, newfd);
if (ft->table[newfd] != NULL) {
sys_close_nocheck(newfd);
}
struct filehandle *fh = ft->table[oldfd];
filehandle_incref(fh);
ft->table[newfd] = fh;
filetable_unlock_handle(ft, oldfd);
filetable_unlock_handle(ft, newfd);
return 0;
}
I'm incrementing the refcount
in the sys_dup2
function and decrementing it in sys_close
. But notice that in sys_close
, I'm checking whether the refcount
is zero or not, and if zero, then destroy the filehandle. But this is done without acquiring the spinlock, which means that it's possible that just after the condition of the refcount evaluates as true, the thread may be descheduled and another sys_dup2
call can continue, copy the filehandle
and after that, the thread with sys_close
gets scheduled, and destroys the filehandle
, which makes this code unreliable.
I can't keep holding the spinlock before the check for two reasons:
1 - since it's a part of the filehandle and it can't be acquired before the destruction.
2 - If the condition is true and the function filehandle_destroy
gets called, that's inefficient and a sleep lock should be used instead of a spinlock. But I believe I also shouldn't use a sleep lock since most of the time, the lock would only protect a single line of code (e.g. filehandle->refcount++
).
What should be done with this design so that it becomes solid and not susceptible for some bad behaviour?
In case someone needs to look at the implementation of the functions, here are they:
struct filehandle *filehandle_create()
{
struct filehandle *fh;
fh = kmalloc(sizeof(*fh));
if (fh == NULL) {
return NULL;
}
fh->offset_lock = lock_create("offset_lock");
if (fh->offset_lock == NULL) {
kfree(fh);
return NULL;
}
spinlock_init(&(fh->refcount_spinlock));
fh->refcount = 1;
fh->offset = 0;
fh->vnode_ptr = NULL;
fh->flags = 0;
return fh;
}
void filehandle_destroy(struct filehandle *fh)
{
//TODO check the cleaning and freeing.
KASSERT(fh != NULL);
KASSERT(fh->offset_lock != NULL);
if (fh->vnode_ptr != NULL) {
vfs_close(fh->vnode_ptr);
}
lock_destroy(fh->offset_lock);
spinlock_cleanup(&(fh->refcount_spinlock));
kfree(fh);
}
void filehandle_incref(struct filehandle *fh)
{
spinlock_acquire(&(fh->refcount_spinlock));
fh->refcount++;
spinlock_release(&(fh->refcount_spinlock));
}
void filehandle_decref(struct filehandle *fh)
{
spinlock_acquire(&(fh->refcount_spinlock));
KASSERT(fh->refcount > 0);
fh->refcount--;
spinlock_release(&(fh->refcount_spinlock));
}
struct filetable *filetable_create()
{
struct filetable *ft;
ft = kmalloc(sizeof(*ft));
if (ft == NULL) {
return NULL;
}
ft->capacity = FILETABLE_SIZE;
ft->table = kmalloc(ft->capacity * sizeof(struct filehandle *));
if (ft->table == NULL) {
kfree(ft);
return NULL;
}
ft->fh_locks = kmalloc(ft->capacity * sizeof(struct lock *));
if (ft->table == NULL) {
kfree(ft->table);
kfree(ft);
return NULL;
}
// TODO use custom names for easier debugging.
ft->table_lock = rwlock_create("table_lock");
if (ft->table_lock == NULL) {
kfree(ft->table);
kfree(ft->fh_locks);
kfree(ft);
return NULL;
}
for (int i = 0; i < ft->capacity; i++) {
// TODO use custom names for easier debugging.
ft->fh_locks[i] = lock_create("fh_lock");
if (ft->fh_locks[i] == NULL) {
for (int j = 0; j < i; j++) {
lock_destroy(ft->fh_locks[i]);
}
rwlock_destroy(ft->table_lock);
kfree(ft->table);
kfree(ft->fh_locks);
kfree(ft);
return NULL;
}
}
//TODO may use a function here (i.e. bzero)
for (int i = 0; i < ft->capacity; i++) {
ft->table[i] = NULL;
}
ft->search_start_index = 0;
return ft;
}
void filetable_destroy(struct filetable *ft)
{
KASSERT(ft != NULL);
if (ft->table != NULL) {
kfree(ft->table);
}
rwlock_destroy(ft->table_lock);
if (ft->fh_locks != NULL) {
for (int i = 0; i < ft->capacity; i++) {
lock_destroy(ft->fh_locks[i]);
}
kfree(ft->fh_locks);
}
kfree(ft);
}
void filetable_incref_all(struct filetable *ft)
{
for (int i = 0; i < ft->capacity; i++) {
if (ft->table[i] != NULL) {
ft->table[i]->refcount++;
}
}
}
void filetable_decref_all(struct filetable *ft)
{
for (int i = 0; i < ft->capacity; i++) {
if (ft->table[i] != NULL) {
KASSERT(ft->table[i]->refcount > 0);
ft->table[i]->refcount--;
}
}
}
int get_min_available_fd(struct filetable *ft, int *fd)
{
for (int i = ft->search_start_index; i < ft->capacity; i++) {
if (ft->table[i] == NULL) {
ft->search_start_index = i + 1;
*fd = i;
return 0;
}
}
return EMFILE;
}
void reclaim_fd(struct filetable *ft, int fd)
{
KASSERT(fd >= 0);
if (fd < ft->search_start_index) {
ft->search_start_index = fd;
}
}
void filetable_lock_handle(struct filetable *ft, int fd)
{
// no check for the fd since this is called from
// a trusted source.
// it's fine if the thread is descheduled after only
// acquiring the rwlock and not the sleep lock since
// the rwlock does nothing except for indicating that
// there are threads using the table currently. if
// this thread is trying to lock a fh that is already
// locked, it'll sleep, but no one will be able to
// acquire the global rw lock until all locks are
// released (including this one that is being waited
// for). that means that the order in which the
// locks are acquired matters.
rwlock_acquire_read(ft->table_lock);
lock_acquire(ft->fh_locks[fd]);
}
void filetable_unlock_handle(struct filetable *ft, int fd)
{
// does the order matter here?
lock_release(ft->fh_locks[fd]);
rwlock_release_read(ft->table_lock);
}
void filetable_lock(struct filetable *ft)
{
rwlock_acquire_write(ft->table_lock);
}
void filetable_unlock(struct filetable *ft)
{
rwlock_release_write(ft->table_lock);
}
Edit:
after the help of @Tsyvarev, I was able to get to a point which I believe is probably well synchronized.
sys_close
:
void sys_close_nocheck(int fd)
{
struct filehandle **handles = curproc->ft->table;
// will return 0 if the refcount is
// 0 after decrementing it.
if (!filehandle_decref(handles[fd])) {
filehandle_destroy(handles[fd]);
}
handles[fd] = NULL;
}
int sys_close(int fd)
{
// when to return EIO?..
struct filetable *ft = curproc->ft;
if (fd < 0 || fd >= ft->capacity) {
return EBADF;
}
filetable_lock_handle(ft, fd);
if (ft->table[fd] == NULL) {
filetable_unlock_handle(ft, fd);
return EBADF;
}
sys_close_nocheck(fd);
filetable_unlock_handle(ft, fd);
return 0;
}
sys_dup2
:
int sys_dup2(int oldfd, int newfd, int *ret_fd)
{
// when to return EMFILE or ENFILE?...
struct filetable *ft = curproc->ft;
if (oldfd < 0 || newfd < 0 ||
oldfd >= ft->capacity || newfd >= ft->capacity) {
return EBADF;
}
bool are_the_same = false;
if (oldfd < newfd) {
filetable_lock_handle(ft, oldfd);
filetable_lock_handle(ft, newfd);
} else if (newfd < oldfd) {
filetable_lock_handle(ft, newfd);
filetable_lock_handle(ft, oldfd);
} else {
filetable_lock_handle(ft, oldfd);
are_the_same = true;
}
struct filehandle *fh = ft->table[oldfd];
// filehandle_incref would return 0 in
// case the refcount was 0 when trying
// to increment it. if that's the case,
// then this filehandle is in the middle
// of being destroyed. Equivelent for
// having a NULL in this slot.
if (fh == NULL || !filehandle_incref(fh)) {
filetable_unlock_handle(ft, oldfd);
if (!are_the_same) {
filetable_unlock_handle(ft, newfd);
}
return EBADF;
}
*ret_fd = newfd;
if (are_the_same) {
filetable_unlock_handle(ft, oldfd);
return 0;
}
if (ft->table[newfd] != NULL) {
sys_close_nocheck(newfd);
}
ft->table[newfd] = fh;
filetable_unlock_handle(ft, oldfd);
filetable_unlock_handle(ft, newfd);
return 0;
}
filehandle_incref
and filehandle_decref
:
bool filehandle_incref(struct filehandle *fh)
{
spinlock_acquire(&(fh->refcount_spinlock));
if (fh->refcount == 0) {
spinlock_release(&(fh->refcount_spinlock));
return false;
}
fh->refcount++;
spinlock_release(&(fh->refcount_spinlock));
return true;
}
bool filehandle_decref(struct filehandle *fh)
{
spinlock_acquire(&(fh->refcount_spinlock));
KASSERT(fh->refcount > 0);
fh->refcount--;
bool
result = (bool)fh->refcount;
spinlock_release(&(fh->refcount_spinlock));
return result;
}
I'm not sure if that's the case. Is it well synchronized now?