I'm using HDF5DotNet in C# and I have a very large array (several GB) that I want to write to an HDF5 file. It's too big to store the whole thing in memory, so I'm generating regions of it at a time and want to write them out, but still have it look like one big array when it's read back out. I know this is possible with HDF5 but the documentation for the .NET API is somewhat sparse.
I wrote some short example code with a 5 x 3 array filled with values 1..15:
const int ROWS = 5;
const int COLS = 3;
static void Main(string[] args)
{
WriteWholeArray();
WriteArrayByRows();
ushort[,] array = ReadWholeArray();
}
static void WriteWholeArray()
{
H5FileId h5 = H5F.create(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "test.h5"), H5F.CreateMode.ACC_TRUNC);
H5DataSpaceId dsi = H5S.create_simple(2, new long[] { ROWS, COLS });
H5DataSetId dataset = H5D.create(h5, "array", new H5DataTypeId(H5T.H5Type.NATIVE_USHORT), dsi);
ushort[,] array = new ushort[ROWS, COLS];
ushort value = 1;
for(int i = 0; i < array.GetLength(0); i++)
{
for (int j = 0; j < array.GetLength(1); j++)
{
array[i, j] = value++;
}
}
H5D.write<ushort>(dataset, new H5DataTypeId(H5T.H5Type.NATIVE_USHORT), new H5Array<ushort>(array));
H5D.close(dataset);
H5F.close(h5);
}
static void WriteArrayByRows()
{
H5FileId h5 = H5F.create(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "test.h5"), H5F.CreateMode.ACC_TRUNC);
H5DataSpaceId dsi = H5S.create_simple(2, new long[] { ROWS, COLS });
H5DataSetId dataset = H5D.create(h5, "array", new H5DataTypeId(H5T.H5Type.NATIVE_USHORT), dsi);
ushort[,] array = new ushort[ROWS, COLS];
ushort value = 1;
for (int i = 0; i < array.GetLength(0); i++)
{
for (int j = 0; j < array.GetLength(1); j++)
{
array[i, j] = value++;
}
}
for(int i = 0; i < array.GetLength(0); i++)
{
H5S.selectHyperslab(dsi, H5S.SelectOperator.SET, new long[] { i, 0 }, new long[] { 1, array.GetLength(1) });
ushort[,] row = new ushort[1, array.GetLength(1)];
for(int j = 0; j < array.GetLength(1); j++)
{
row[0, j] = array[i, j];
}
H5D.write<ushort>(dataset, new H5DataTypeId(H5T.H5Type.NATIVE_USHORT), new H5Array<ushort>(row));
}
H5D.close(dataset);
H5F.close(h5);
}
static ushort[,] ReadWholeArray()
{
H5FileId h5 = H5F.open(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "test.h5"), H5F.OpenMode.ACC_RDONLY);
ushort[,] array = new ushort[ROWS, COLS];
H5Array<ushort> h5_array = new H5Array<ushort>(array);
H5DataSetId dataset = H5D.open(h5, "array");
H5D.read<ushort>(dataset, new H5DataTypeId(H5T.H5Type.NATIVE_USHORT), h5_array);
H5D.close(dataset);
H5F.close(h5);
return (array);
}
When I write the whole array at once, it reads back in fine. When I try to write by rows, the array I read back in has some correct values (in the wrong elements), some zeroes, and some crazy values (e.g. 43440). Can somebody show me how to do this correctly?