0

I'm trying to upload images to azure blob storage and before uploading I'm checking with head request to make sure that file doesn't exists.

Code runs well but stops after sometime. I tried to close file but it is not working.

Below is my code

from azure.identity.aio import ClientSecretCredential
from azure.storage.blob.aio import BlobClient
from azure.core.exceptions import ResourceExistsError
import asyncio
import aiohttp
import os

blob_url = ''
sourceDir = ''
accountName = ''
accountKey = ''
containerName = ''


async def fetch(session, url):
    async with session.head(url) as response:
        await response.text()
        return response.status


async def task(name, work_queue):
    while not work_queue.empty():
        f, blob_name = await work_queue.get()
        print("Processing :", f)
        blob_name = blob_name.replace(sourceDir, '')
        blobClient = BlobClient(
            "https://{}.blob.core.windows.net".format(accountName),
            credential = accountKey,
            container_name = containerName,
            blob_name = blob_name,
        )
        async with aiohttp.ClientSession() as session:
            status = await fetch(session, blob_url+blob_name)
            if status != 200:
                async with blobClient:
                    with open(f, "rb") as data:
                        await blobClient.upload_blob(data, overwrite=True)
                        data.close()

def main():                                                                                                                  
    work_queue = asyncio.Queue()
                                                                                                                
    for (path, dirs, files) in os.walk(sourceDir):
        for f in files:
            blob_hash_dir = f[0:2]
            work_queue.put_nowait((os.path.join(path, f), os.path.join(path, blob_hash_dir, f)))
                                                                                                                           
    loop = asyncio.get_event_loop()
    loop.run_until_complete(
        asyncio.gather(
            *[task("{}".format(num), work_queue) for num in range(1,51)]
        )
    )
    loop.close()
    

main()

This is the error I'm getting after few minutes of run.

Traceback (most recent call last):
  File "upload_to_blob.py", line 57, in <module>
  File "upload_to_blob.py", line 49, in main
  File "/home/root/miniconda3/envs/other/lib/python3.8/asyncio/base_events.py", line 612, in run_until_complete
  File "upload_to_blob.py", line 36, in task
OSError: [Errno 24] Too many open files: '/home/other/bfd78bca2ec99487668.jpg'
libgcc_s.so.1 must be installed for pthread_cancel to work
Aborted (core dumped)

  • It seems like you have a file descriptor leak. If I were you I'd wait for the program to run a couple of minutes (but not to exit), and inspect the output of `ls -l /proc//fd` to find out which file descriptors are not getting closed. – user4815162342 Jun 25 '20 at 11:58
  • The code looks OK. The number of asyncio tasks is fixed so that's ok (good idea to use queue). Is this the complete code? Can you check what files are open? (see previous comment from @user4815162342) – Messa Jul 03 '20 at 10:10

0 Answers0