I need to TarZip(.tar.gz) multiple CSV files that are present in Azure blob storageV2 inside container named input and save resultant file in another container output using SharpZipLib library in C# using Azure function. File Size of CSV's could be up to 3 GB of a single file.
It worked by downloading files from blob in working directory and then tar zip and uploading the tar zip file in blob. I want to do it with without downloading it directly tarzipping on Blob. As files size is much higher around 4 GB.
public static async void tar()
{
// define blobs you need to use
string connectionString = "XXXX";
string Azure_container_name = "input";
List<string> blobs = ListAllBlobsName(connectionString, Azure_container_name);
BlobServiceClient blobServiceClient = new BlobServiceClient(connectionString);
var sourceContainer = blobServiceClient.GetBlobContainerClient("input");
var desContainer = blobServiceClient.GetBlobContainerClient("output");
var desBlob = desContainer.GetBlockBlobClient("file.tar.gz");
var options = new BlockBlobOpenWriteOptions
{
HttpHeaders = new BlobHttpHeaders
{
ContentType = MimeMapping.GetMimeMapping("file.tar.gz"),
},
};
using (var outStream = desBlob.OpenWriteAsync(true, options).GetAwaiter().GetResult())
using (TarOutputStream tarOutputStream = new TarOutputStream(outStream, Encoding.UTF8))
{
foreach (var blob in blobs)
{
var source = sourceContainer.GetBlobClient(blob);
Console.WriteLine("Adding file "+blob + " in tar zip");
Azure.Storage.Blobs.Models.BlobProperties properties = source.GetPropertiesAsync().GetAwaiter().GetResult();
var entry = TarEntry.CreateTarEntry(blob);
entry.Size = properties.ContentLength;
tarOutputStream.PutNextEntry(entry);
source.DownloadToAsync(tarOutputStream).GetAwaiter().GetResult();
tarOutputStream.CloseEntry();
Console.WriteLine("Added file " + blob + " in tar zip");
Console.WriteLine();
}
tarOutputStream.Finish();
tarOutputStream.Close();
}
}