I have a batch block in tpl dataflow and have several target blocks linked to the batch block. However, the number of target blocks changes dynamically and thus the size of the batches. Problem is that the batch size must be supplied at the initialization of the batchblock and I dont see a way to adjust it later on. Any ideas how to get around this? Is the only way to unlink (dispose all links to batchblock and from batchblock), re-initialize the batch block with a new batch size and then link again? I could do that but how to ensure that old batches and new batches are not get all mixed up?
For example if I had 2 transform blocks stream to batch block and now have an additional transform block and want to increase batch size to 3, how do I make sure that all previous batches prior to the increase were processed to ensure synched behavior? Point is that all transform blocks get the exact identical item and the outputs of those transform blocks should be batched in the way that only those outputs are batched that match identical inputs.
Here a sample how I want it to be:
Constant stream of ints to transform blocks: 1,2,3, [point where batch size is increased],4,5,...
Let transform blocks output what they got in like 1 => 1
So batchblock should output like this : [1,1], [2,2], [3,3], [change of batch size], [4,4,4], [5,5,5],...
Here my current code:
public class Test
{
private Stopwatch watch;
private BroadcastBlock<List<InputObject>> tempBCB;
private BatchBlock<Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>> batchBlock;
private TransformBlock<Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>[], List<FinalObject>> transformBlock;
private ActionBlock<List<FinalObject>> justToFlushTransformBlock;
private CoreLogic core1;
private CoreLogic core2;
public Test()
{
tempBCB = new BroadcastBlock<List<InputObject>>(input => input);
//here batch size = 2
batchBlock = new BatchBlock<Tuple<List<InputObject>,Dictionary<int,IntermediateObject>>>(2, new GroupingDataflowBlockOptions { Greedy = false });
transformBlock = new TransformBlock<Tuple<List<InputObject>,Dictionary<int,IntermediateObject>>[],List<FinalObject>>(array =>
{
List<InputObject> inputObjects = array[0].Item1;
List<FinalObject> ret = inputObjects.ConvertAll(x => new FinalObject(x));
foreach (var tuple in array)
{
//iterate over each individual object
foreach (var dictionary in tuple.Item2)
{
ret[dictionary.Key].outputList.Add(dictionary.Value);
}
}
return ret;
}, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = DataflowBlockOptions.Unbounded });
justToFlushTransformBlock = new ActionBlock<List<FinalObject>>(list =>
{
//just in order to accept items from the transformBlock output queue
});
//Generate 2 CoreLogic objects
core1 = new CoreLogic();
core2 = new CoreLogic();
//linking
tempBCB.LinkTo(core1.transformBlock, new DataflowLinkOptions { PropagateCompletion = true });
tempBCB.LinkTo(core2.transformBlock, new DataflowLinkOptions { PropagateCompletion = true });
core1.transformBlock.LinkTo(batchBlock);
core2.transformBlock.LinkTo(batchBlock);
batchBlock.LinkTo(transformBlock, new DataflowLinkOptions { PropagateCompletion = true });
transformBlock.LinkTo(justToFlushTransformBlock, new DataflowLinkOptions { PropagateCompletion = true });
}
public void Start()
{
const int numberChunks = 30;
watch = new Stopwatch();
watch.Start();
for (int j = 1; j <= numberChunks; j++)
{
int collectionSize = 10000 * j;
List<InputObject> collection = new List<InputObject>(collectionSize);
for (int i = 0; i < collectionSize; i++)
{
collection.Add(new InputObject(i));
}
tempBCB.Post(collection);
}
tempBCB.Complete();
Task.WhenAll(core1.transformBlock.Completion, core2.transformBlock.Completion).ContinueWith(_ =>
{
batchBlock.Complete();
});
transformBlock.Completion.Wait();
watch.Stop();
Console.WriteLine("Elapsed time (in milliseconds): " + watch.ElapsedMilliseconds);
Console.ReadLine();
}
}
public class CoreLogic
{
private Random rand;
public TransformBlock<List<InputObject>, Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>> transformBlock;
public CoreLogic()
{
const int numberIntermediateObjects = 10000;
transformBlock = new TransformBlock<List<InputObject>, Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>>(input =>
{
//please ignore the fact that `input` is not utilized here, the point is to generate a collection of IntermediateObject and return
Dictionary<int, IntermediateObject> ret = new Dictionary<int, IntermediateObject>();
for (int i = 0; i < numberIntermediateObjects; i++)
{
IntermediateObject value = new IntermediateObject(i);
ret.Add(i, value);
}
var tuple = new Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>(input, ret);
return tuple;
});
}
}
public class InputObject : ICloneable
{
public int value1 { get; private set; }
public InputObject(int value)
{
this.value1 = value;
}
object ICloneable.Clone()
{
return Clone();
}
public InputObject Clone()
{
return (InputObject)this.MemberwiseClone();
}
}
public class IntermediateObject
{
public int value1 { get; private set; }
public IntermediateObject(int value)
{
this.value1 = value;
}
}
public class FinalObject
{
public InputObject input { get; private set; }
public List<IntermediateObject> outputList;
public FinalObject(InputObject input)
{
this.input = input;
this.outputList = new List<IntermediateObject>();
}
}
public static class Cloning
{
public static List<TValue> CloneListCloneValues<TValue>(List<TValue> original) where TValue : ICloneable
{
List<TValue> ret = new List<TValue>(original.Count);
foreach (TValue entry in original)
{
ret.Add((TValue)entry.Clone());
}
return ret;
}
}