So I am writing a server in C#. It has a LiteDB database to store messages in. When I start up the server, I also launch an async task to delete expired messages from the DB (messages that are older than some predefined threshold). Lets call this task the GC (garbage collector).
Every time the server receives a message, it stores the message in the DB, and sends a copy to the proper destination.
Since LiteDB is supposed to be thread-safe, I don't use locks to synchronize reads/writes to the DB.
I've made a test (on a WPF app) that connects 100 clients, each in a separate task. Each client sends a message to all other clients and expects to receive a message from all other clients. I then make sure that all messages were sent/received as expected. During this test, the GC runs every 1 second (in the "real world" it will run once a week or something like that).
I observe some weird behavior in the test that I would love some help with.
In the first 1-3 seconds, things seem fine. The GC does a pass once a second, and between passes, many tasks (to send/receive messages) are running. Then, after these 1-3 seconds have passed, the GC does a pass, and then only one message is sent once a second (instead of many messages all at once). The GC task does not do a pass again.
Eventually, after 1 minute of this, I get the following exception:
Exception thrown: 'LiteDB.LiteException' in LiteDB.dll
Database lock timeout when entering in transaction mode after 00:01:00
I assume this means that I have a deadlock somewhere. I've added a simplified version of my server's code.
public class Worker : BackgroundService
{
private static readonly TimeSpan _collectionInterval = TimeSpan.FromSeconds(1);
private static readonly TimeSpan _expirationThreshold = TimeSpan.FromMinutes(2);
private readonly ConcurrentDictionary<string, Client> Clients = new();
private readonly DBHandler _dbHandler = DBHandler.GetInstance(AppDomain.CurrentDomain.BaseDirectory);
protected override async Task ExecuteAsync(CancellationToken cancellationToken)
{
// Create a linked CTSource so that the garbage collection task could cancel
// the entire operation in case of a fatal exception
CancellationTokenSource cancellationTokenSource =
CancellationTokenSource
.CreateLinkedTokenSource(cancellationToken);
CancellationToken token = cancellationTokenSource.Token;
List<Task> GCAndConnectionsTasks = new();
try
{
listener.Start();
// Launch the garbage collection task to remove expired messages from the DB
// Runs throughout the server's lifetime every (TimeSpan)_collectionInterval
GCAndConnectionsTasks.Add(
CollectExpiredMessagesAsync(
collectionInterval: _collectionInterval,
expirationThreshold: _expirationThreshold,
cancellationTokenSource // Allow task to cancel server
)
);
while (!token.IsCancellationRequested)
{
TcpClient tcpClient = await listener.AcceptTcpClientAsync(token);
// Handle the client connection in a separate task
GCAndConnectionsTasks.Add(
HandleClientConnectionAsync(tcpClient, token)
);
}
cancellationTokenSource.Cancel();
await Task.WhenAll(GCAndConnectionsTasks);
}
finally
{
_dbHandler.Dispose();
listener.Stop();
cancellationTokenSource.Dispose();
token.ThrowIfCancellationRequested();
}
}
private async Task HandleClientConnectionAsync(TcpClient tcpClient, CancellationToken token)
{
Client? client = null;
try
{
client = new(tcpClient);
client.EnableKeepAlive();
await SendMessageAsync(new WelcomeMessage(), client, token);
DisconnectReason? reason = null;
Message? message = await ReceiveMessageAsync(client, token);
if (Clients.ContainsKey(message.FromStationID))
{
Clients.TryAdd(message.FromStationID, client);
await ReceiveMessagesAsync(client, token);
}
}
finally
{
// Clean up closed connection
if (client is not null)
{
if (client.StationID is not null)
{
Clients.TryRemove(client.StationID, out _);
}
client.CloseTcpAndStream();
}
}
}
private async Task ReceiveMessagesAsync(Client client, CancellationToken token)
{
while (client.IsConnected && !token.IsCancellationRequested)
{
Message? message = await ReceiveMessageAsync(client, token);
if (token.IsCancellationRequested || message is null)
{
break;
}
await ProcessMessageByTypeAsync(message, token);
}
}
private async Task ProcessMessageByTypeAsync(Message message, CancellationToken token)
{
if (message is null)
{
return;
}
else if (message is AckMessage ackMessage)
{
await ProcessAckMessageAsync(ackMessage, token);
}
else if (message is ComponentsMessage || message is FeedbackMessage)
{
await ProcessDataMessageAsync(message, token);
}
// Ignore other messages
}
private async Task ProcessDataMessageAsync(Message message, CancellationToken token)
{
if (message is null)
{
return;
}
if (message.ToStationID != null)
{
_dbHandler.Insert(message);
Client client;
if (Clients.TryGetValue(message.ToStationID, out client))
{
await SendMessageAsync(message, client, token);
}
}
}
private async Task ProcessAckMessageAsync(AckMessage ackMessage, CancellationToken token)
{
_dbHandler.DeleteMessageByID(ackMessage.AckedMessageID);
}
private static async Task SendMessageAsync(Message message, Client client, CancellationToken token)
{
//use message.Serialize(), then add 4 byte header to create byte[] buffer
await client.WriteAsync(buffer, token);
}
private static async Task<Message?> ReceiveMessageAsync(Client client, CancellationToken token)
{
// use await client.ReadAsync to receive 4 byte header
// use await client.ReadAsync to receive the message bytes into byte[] buffer
return
buffer is null ?
null :
Message.Deserialize(Encoding.UTF8.GetString(buffer, 0, buffer.Length));
}
private async Task CollectExpiredMessagesAsync(
TimeSpan collectionInterval,
TimeSpan expirationThreshold,
CancellationTokenSource source)
{
while (!source.IsCancellationRequested)
{
// Make sure GC has exclusive rights to the Messages of the DB.
try
{
_dbHandler.DeleteExpiredMessages(expirationThreshold);
}
// Fatal error, cancel the entire operation (the server).
catch (Exception ex)
{
source.Cancel();
throw;
}
if (!source.IsCancellationRequested)
{
// Delay before the next garbage collection iteration
await Task.Delay(collectionInterval, source.Token);
}
}
}
}
And the relevant DB code:
private ILiteCollection<Message> GetCollection(Message message)
{
var msgCollection = _dataBase
.GetCollection<Message>(DB.MESSAGES);
msgCollection.EnsureIndex((Message m) => m.MessageID);
return msgCollection;
}
private ILiteCollection<Message> GetMessages()
{
return GetCollection((Message)null);
}
public BsonValue Insert(Message message)
{
message.MessageID = 0;
message.TimeStamp = DateTime.UtcNow;
return GetMessages()
.Insert(message);
}
public bool DeleteMessageByID(BsonValue messageID)
{
return GetMessages()
.Delete(messageID);
}
public IEnumerable<Message> GetMessagesForStationID(string stationID)
{
return GetMessages()
.Find(message => message.ToStationID == stationID);
}
public int DeleteExpiredMessages(TimeSpan expirationThreshold)
{
var cutoffTime = DateTime.UtcNow - expirationThreshold;
return GetMessages()
.DeleteMany((Message m) => m.TimeStamp < cutoffTime);
}