-1

I have a very long running process on Azure. It happens that the execution stops at a random moment, without even leaving a log or an error message. Sometimes it runs hours straight, sometimes just a couple of minutes. This doesn't happen in my local PC.

Some people have already answered similar questions suggesting that turning the app to "Always on" solves the issue, but it doesn't in my case. The problem continues.

I read other posts about this matter and some answers suggested trying to use WebJobs. I can't, because my application has 150MB, and that exceeds WebJobs maximum file size.

About the project: it is an implementation of a heavy face detection and recognition algorithm provided by a third party. All the code I can't see is surrounded by try statements.

That's how I call the function:

Task.Run(()=> loopDeReconhecimento(biometricClient, code, photosInfo,ultimoIndiceReconhecido, totalNumberOfFiles,outPutDestionation));


private async void loopDeReconhecimento(NBiometricClient biometricClient, string code, List<PhotoInfo> photosInfo,int ultimoIndiceReconhecido, int totalNumberOfFiles,string outPutDestionation)
        {
            WebClient wc = new WebClient();
            for (int i = ultimoIndiceReconhecido; i < totalNumberOfFiles; i++)
            {
                if (forceStop.Contains(code))
                {
                    Log.register(code, "STOPPING!!!!");
                    forceStop.Remove(code);
                    return;
                }
                if (i >= photosInfo.Count)
                {
                    i--;
                    try
                    {
                        Log.register(code, "Fim das fotos upadas por enquanto foi encontrado. Esperando trinta segundos, baixando novamente as informações e tentando de novo " + DateTime.Now.ToLongTimeString());
                        Thread.Sleep(30000);

                        wc.DownloadFile(pathWebBase + code + @"/" + @"1.Eventos_grande_simples/imagensConfig.txt", outPutDestionation);
                        //Log.register(code,"Tempo de download: " + tempoTotal);
                        PhotoInfo.init(File.ReadAllLines(outPutDestionation), photosInfo);
                    } catch
                    {
                        Log.register(code, "Attempt to download failed. Try again in 30 seconds");
                    }
                    continue;
                }
                Log.register(code, "Starting photo " + i.ToString() + " de " + totalNumberOfFiles);


                recognizePhoto(biometricClient,wc, code, photosInfo[i], photosInfo, tentativasPorFoto);

                status = i.ToString() + @"/" + totalNumberOfFiles.ToString();
                if (forceSave.Contains(code) ||  (double)i / salvarACadaQuantas == Math.Floor((double)i / salvarACadaQuantas))
                {
                    forceSave.Remove(code);
                    salvar(i, code, photosInfo);

                }

            }

            Log.register(code, "Fim.");

        }


void recognizePhoto(NBiometricClient biometricClient,WebClient wc, string code, PhotoInfo photoInfo, List<PhotoInfo> photosInfo, int attempts)
{



            try
            {

                Log.register(code, "Foto iniciada: " + photoInfo.shortAdress);

                NBiometricStatus status;

                string localPath = localPathBase + code + @"\Fotos a separar\1.Eventos_grande" + photoInfo.shortAdress;
                Stopwatch sw = new Stopwatch();
                sw.Start();


                NSubject candidateSubject = CreateSubjectFromURL(pathWebBase + code + @"/1.Eventos_grande_simples" + photoInfo.shortAdress, true);

                status = biometricClient.CreateTemplate(candidateSubject);
                if (status != NBiometricStatus.Ok)
                {
                    Log.register(code, "Template creation was unsuccessful. Status: " + status);
                    return;
                }
                else
                {
                    Log.register(code, "Created: Status: " + status);

                }

                // Set ids to candidate subjects and related subjects
                int i = 1;
                candidateSubject.Id = "ID_0";
                Log.register(code, "Subject na foto: Status: " + candidateSubject.Id);
                PersonTagInfo pti = detalharFace(candidateSubject, biometricClient, code);
                if (pti != null)
                    photoInfo.peopleTags.Add(pti);
                foreach (var subject in candidateSubject.RelatedSubjects)
                {
                    subject.Id = string.Format("ID_{0}", i++);
                    Log.register(code, "Subject found in photo: Status: " + subject.Id);
                    pti = detalharFace(subject, biometricClient, code);
                    if (pti != null)
                        photoInfo.peopleTags.Add(pti);

                }


                identificarESalvarPersonTagInfo(biometricClient, photoInfo, candidateSubject, code);
                foreach (NSubject candidato in candidateSubject.RelatedSubjects)
                {
                    identificarESalvarPersonTagInfo(biometricClient, photoInfo, candidato, code);
                }

                photoInfo.done = true;
                Log.register(code, "Tempo de processamento: " + sw.ElapsedMilliseconds);
            } catch
            {
                if (attempts > 0)
                {
                    Log.register(code, "Erro ao processar foto. Tentando novamente em 1 segundo. Tentativas restantes: " + attempts.ToString());

                    Thread.Sleep(1000);
                    recognizePhoto(biometricClient,wc, code, photoInfo,photosInfo, attempts - 1);

                }

            }





        }
Tom Sun - MSFT
  • 24,161
  • 3
  • 30
  • 47
Lucas
  • 558
  • 11
  • 28
  • Can you provide us with some detail about what this long running process is, how it was originally implemented (I'm guessing at the moment in an MVC/WebAPI action) and how your WebJob executable (and dependencies) manages to exceed 150MB. – Brendan Green Nov 21 '16 at 01:27
  • It uses a third party SDK for face detection and recognition. It was originally implemented as a web app. I changed the question so that you can see the code – Lucas Nov 21 '16 at 10:01

1 Answers1

0

As your description, you need a very long running process on platform. Please have a try to use worker role. Worker role is the one that was defined to fill this. It can be used to process the work in a loop. And it is no 150M size limited. We also can have more control over the VMs. More info about the worker role please refer to document.

There are also some tutorials about how to program with worker roles. https://channel9.msdn.com/Series/Windows-Azure-Cloud-Services-Tutorials/Introduction-to-Windows-Azure-Worker-Roles-Part-1

https://channel9.msdn.com/Series/Windows-Azure-Cloud-Services-Tutorials/Introduction-to-Windows-Azure-Worker-Roles-Part-2

Tom Sun - MSFT
  • 24,161
  • 3
  • 30
  • 47