I am using a data flow task to connect to oledb source and run sql code which produces millions of rows. Each row is an xml content there is only one column (nvarchar(max)). I connect this to a script component destination, the reason being when the rowcount reaches 10k i want to stop and write to another file.
It works fine when I use a breakpoint, so if there are 100 rows and I want 10 rows in each file when I use a breakpoint it produces 10 files. But, when I run the pacakge without breakpoints it creates random number of files. For ex: it creates 6 files and 3 of those contains 10 rows and the remaining 70 are split in the other files.
I read through the docs and blogs to see if the input buffer is synchronous, and it seems like it is. i.e each row is processed one at a time at least this is what my understanding is, so I am not sure what is the root cause to this problem? Can someone please shed some light. Below is the code I am using to split files in Script Componenet
[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{
StreamWriter writer;
int rowCount = 0;
int rowLimit = 50;
String RunFileName = "C:\\Users\\Desktop\\" + DateTime.Now.ToString("yyyyMMddHHmmss") + ".txt";
public override void PreExecute()
{
base.PreExecute();
writer = new System.IO.StreamWriter(RunFileName, true);
rowCount = 0;
}
public override void PostExecute()
{
base.PostExecute();
writer.Dispose();
}
public override void Input0_ProcessInputRow(Input0Buffer Row)
{
if (rowCount == rowLimit)
{
writer.Dispose();
rowCount = 0;
RunFileName = "C:\\Users\\Desktop\\" + DateTime.Now.ToString("yyyyMMddHHmmss") + ".txt";
writer = new System.IO.StreamWriter(RunFileName, true);
}
var data = System.Text.Encoding.Unicode.GetString(Row.XMLInnerText.GetBlobData(0, Convert.ToInt32(Row.XMLInnerText.Length)));
writer.WriteLine(data);
rowCount++;
}
}