I'm programming in C, using berkeley db 4.3 (/usr/lib64/libdb-4.3.so) on RHEL5.6 with kernel 2.6.18-238_xen_AMD64.
In my tests (writing 1,000,000 key/value pairs), if one process quitted abnormally (ctrl + c, kill, or assert fails) while an operation on the db is in process, later operation on that db would be blocked upon opening. Strace shows that the process stuck at a futex(ptr_to_something, FUTEX_WAIT, 2, NULL) call after opening the __db.00x(e.g __db.001, __db.002, __db.003) files.
The only way I know to clear the lock is to remove __db.00x files, and following tests showed that the database is not damaged. It meets my requirement, but I'm just wondering whether there's a better(or more elegant) way to solve this problem.
Here I listed some strace stderr and the code to operate the database which may help.
some of the strace stderr
...
open("__db.001", O_RDWR) = 3
fcntl(3, F_SETFD, FD_CLOEXEC) = 0
fstat(3, {st_mode=S_IFREG|0640, st_size=24576, ...}) = 0
close(3) = 0
open("__db.001", O_RDWR) = 3
fcntl(3, F_SETFD, FD_CLOEXEC) = 0
mmap(NULL, 24576, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = 0x2afcc4149000
close(3) = 0
futex(0x2afcc4149000, FUTEX_WAIT, 2, NULL **[[stuck here]]**
code to operate the database
typedef DB* db_handle;
db_handle bdb_open(const char *filename, u_int32_t cache_size_mb)
{
int ret;
DB_ENV *env;
db_handle dbp;
u_int32_t flags = DB_CREATE | DB_THREAD | DB_INIT_LOCK | DB_INIT_MPOOL | DB_INIT_LOCK ;
u_int32_t gb = cache_size_mb / 1024, mb = cache_size_mb % 1024;
if (ret = db_env_create(&env, 0)) {
fprintf(stderr, "db_env_create:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = env->set_timeout(env, 3 * 1000000, DB_SET_LOCK_TIMEOUT)) {
fprintf(stderr, "env->set_timeout:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = env->set_lk_detect(env, DB_LOCK_DEFAULT)) { /* this seems to be of no use in my case */
fprintf(stderr, "env->set_lk_detect:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = env->set_cachesize(env, gb, mb * 1024 * 1024, 0)) {
fprintf(stderr, "env->set_cachesize:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if ((ret = env->open(env, NULL, flags, 0)) != 0) {
fprintf(stderr, "db_env_open:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = db_create(&dbp, env, 0)) {
fprintf(stderr, "db_create:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = dbp->open(dbp, NULL, filename, NULL, DB_BTREE, flags, 0664)) {
fprintf(stderr, "dbp->open:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
return dbp;
}
int bdb_put(db_handle db, void* key, u_int32_t keylen, void* val, u_int32_t vallen)
{
DBT dkey, dval;
bzero(&dkey, sizeof(dkey));
bzero(&dval, sizeof(dval));
dkey.data = key, dkey.size = keylen;
dval.data = val, dval.size = vallen;
return db->put(db, NULL, &dkey, &dval, 0);
}
int bdb_get(db_handle db, void* key, const u_int32_t keylen,
void* buf, u_int32_t buflen, u_int32_t* nwrite)
{
DBT dkey, dval;
bzero(&dkey, sizeof(dkey));
bzero(&dval, sizeof(dval));
dkey.data = key, dkey.size = keylen;
dval.data = buf, dval.ulen = buflen, dval.flags = DB_DBT_USERMEM;
int ret = db->get(db, NULL, &dkey, &dval, 0);
if (ret == 0 && nwrite != NULL)
*nwrite = dval.size;
return ret;
}