I have an Azure function with HTTP trigger. Function gets Email Address from the incoming request and search that Email Address in tab delimited text file that stored in Azure Blob Storage and return the entire row as JSON. Code is working fine for small file. But I'm getting time out request while processing around 200 GB file. I know it's bad idea to download the data from file to string variable. And that's where it's giving me time out request. Is there any other way to implement it?
Code:
using System;
using System.IO;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json;
using System.Linq;
using System.Collections.Generic;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
namespace V012ProdFunctionApp
{
public static class V012Consumer
{
[FunctionName("V012Consumer")]
public static async Task<IActionResult> Run(
[HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = null)] HttpRequest req,
ILogger log)
{
log.LogInformation("C# HTTP trigger function processed a request.");
string email = req.Query["email"];
string requestBody = await new StreamReader(req.Body).ReadToEndAsync();
dynamic requestdata = JsonConvert.DeserializeObject(requestBody);
email = email ?? requestdata?.email;
string connectionString = "DefaultEndpointsProtocol=https;AccountName=storageaccountlearnazure;AccountKey=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx;BlobEndpoint=https://storageaccountlearnazure.blob.core.windows.net/;QueueEndpoint=https://storageaccountlearnazure.queue.core.windows.net/;TableEndpoint=https://storageaccountlearnazure.table.core.windows.net/;FileEndpoint=https://storageaccountlearnazure.file.core.windows.net/;";
// Setup the connection to the storage account
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(connectionString);
// Connect to the blob storage
CloudBlobClient serviceClient = storageAccount.CreateCloudBlobClient();
// Connect to the blob container
CloudBlobContainer container = serviceClient.GetContainerReference("container-learn-azure");
// Connect to the blob file
CloudBlockBlob blob = container.GetBlockBlobReference("V12_ConsumerPlus_2020Q3_Sample.txt");
//CloudBlockBlob blob = container.GetBlockBlobReference("V12_ConsumerPlus_2020Q3.txt");
// Get the blob file as text
string contents = blob.DownloadTextAsync().Result;
var searchedLinesFromString = contents.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries).
Select((text, index) => new { text, lineNumber = index + 1 })
.Where(x => x.text.Contains(email) || x.lineNumber == 1);
List<DataObjects> objList = new List<DataObjects>();
string[] headerColumns = null;
foreach (var match in searchedLinesFromString)
{
if (match.lineNumber == 1)
{
headerColumns = match.text.Split('\t');
}
else if (headerColumns != null)
{
string missedProperties = string.Empty;
var data = match.text.Split('\t');
DataObjects obj = new DataObjects();
if (data.Any())
{
foreach (var prop in obj.GetType().GetProperties())
{
int valueIndex = Array.IndexOf(headerColumns, prop.Name);
if (valueIndex != -1)
{
var columnValue = data[valueIndex];
prop.SetValue(obj, columnValue);
}
else
{
missedProperties = missedProperties + ", " + prop.Name;
}
}
objList.Add(obj);
Console.WriteLine("{0}: {1}", match.lineNumber, match.text);
}
}
}
var result = JsonConvert.SerializeObject(objList);
return new OkObjectResult(await Task.FromResult(objList));
}
}
}