If you are only interested in the data in the array you want to deserialize as IAsyncEnumerable
and want to ignore the other data then there is a way to achieve this. You can write a custom JsonConverter
that will smuggle out the already deserialized items during deserialization.
Here's an example:
public static class Deserializer
{
public static IAsyncEnumerable<TDataItem> DeserializeAsyncEnumerable<TDataItem>(Stream stream, string propertyPath)
{
var converter = new AsyncEnumerableConverter<TDataItem>(propertyPath);
_ = JsonSerializer.DeserializeAsync<object>(stream, new JsonSerializerOptions
{
Converters = { converter }
});
return converter.OutputChannel.Reader.ReadAllAsync();
}
private class AsyncEnumerableConverter<TDataItem> : JsonConverter<object>
{
private readonly string _propertyPath;
public AsyncEnumerableConverter(string propertyPath)
{
_propertyPath = propertyPath;
}
public Channel<TDataItem> OutputChannel { get; } = Channel.CreateUnbounded<TDataItem>(new()
{
SingleReader = true,
SingleWriter = true
});
public override object Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
if (!TryMoveToItemsProperty(ref reader))
{
OutputChannel.Writer.Complete();
return null;
}
if (reader.TokenType == JsonTokenType.Null)
{
OutputChannel.Writer.Complete();
return null;
}
if (reader.TokenType != JsonTokenType.StartArray)
{
throw new JsonException($"Property {_propertyPath} is not JSON Array");
}
reader.Read(); // read start array
ReadItems(ref reader, options);
OutputChannel.Writer.Complete();
return null;
}
public override void Write(Utf8JsonWriter writer, object value, JsonSerializerOptions options) =>
throw new NotSupportedException();
private bool TryMoveToItemsProperty(ref Utf8JsonReader reader)
{
var propertyNames = _propertyPath.Split('.');
var level = 0;
while (reader.Read())
{
if (reader.TokenType == JsonTokenType.EndObject)
{
return false;
}
if (reader.TokenType == JsonTokenType.PropertyName &&
reader.GetString() == propertyNames[level])
{
level++;
}
if (level == propertyNames.Length)
{
reader.Read();
return true;
}
}
throw new JsonException("Invalid JSON");
}
private void ReadItems(ref Utf8JsonReader reader, JsonSerializerOptions options)
{
while (reader.TokenType != JsonTokenType.EndArray)
{
var item = JsonSerializer.Deserialize<TDataItem>(ref reader, options);
OutputChannel.Writer.TryWrite(item);
reader.Read();
}
}
}
}
The converter skips all the data you are not interested in and navigates according to the passed path to the array. After it deserializes all the elements and writes them to the channel, it does not continue reading. (Of course, it is possible to do a complete deserialization of all the data that will eventually be returned, but in this context it doesn't make sense to me)
The DeserializeAsyncEnumerable
method creates an instance of the converter and starts deserializing with it, but doesn't wait for it to finish and directly returns the IAsyncEnumerable
of the converter's output channel. Also, the DeserializeAsync(Stream)
method is used internally to ensure the required memory efficiency.
Consider the following data models:
public class Root
{
public string Property { get; set; }
public DataBox DataBox { get; set; }
}
public class DataBox
{
public string Property { get; set; }
public ItemsBox ItemsBox { get; set; }
}
public class ItemsBox
{
public string Property { get; set; }
public List<Item> Items { get; set; }
}
public class Item
{
public Guid Id { get; set; }
public string Property { get; set; }
}
Then the usage will be as follows:
var asyncEnumerable = Deserializer.DeserializeAsyncEnumerable<Item>(stream, "DataBox.ItemsBox.Items");
await foreach (var dataItem in asyncEnumerable)
{
...
}