0

I have the code below to download multiple files from S3 in parallel using AmazonS3Client. This has been working fine.

public async Task Download(IList<string> urls, int maxConcurrentDownloads)
{
        var output = new List<Stream>();
        var tasks = new List<Task>();
        for (int i = 0; i < urls.Count; i++)
        {
            
            AmazonS3Uri s3Uri = null;
            if (AmazonS3Uri.TryParseAmazonS3Uri(urls[i], out s3Uri))
            {
                var ms = new MemoryStream();
                output.Add(ms);
                tasks.Add(GetObject(s3Uri,ms));
            }

            if (tasks.Count == maxConcurrentDownloads || i == urls.Count - 1)
            {
                await Task.WhenAll(tasks);
                tasks.Clear();
            }
        }                        
}

private async Task GetObject(AmazonS3Uri s3Uri, Stream output)
{
    using (var s3Client = new AmazonS3Client(s3Uri.Region))
    {
       // removed for brevity purpose
       await s3Client.GetObjectAsync(...);
    }
}

All URLs are in the same region. The code above creates new instance of AmazonS3Client for each URL. So I refactored the code to use single instance of AmazonS3Client.

public async Task Download(IList<string> urls, int maxConcurrentDownloads)
{
        var output = new List<Stream>();
        var tasks = new List<Task>();
        using(AmazonS3Client s3Client = new AmazonS3Client())
        {
            for (int i = 0; i < urls.Count; i++)
            {
                AmazonS3Uri s3Uri = null;
                if (AmazonS3Uri.TryParseAmazonS3Uri(urls[i], out s3Uri))
                {
                    var ms = new MemoryStream();
                    output.Add(ms);
                    tasks.Add(GetObject(s3Client,s3Uri,ms));
                }

                if (tasks.Count == maxConcurrentDownloads || i == urls.Count - 1)
                {
                    await Task.WhenAll(tasks);
                    tasks.Clear();
                }
            }
       }            
}

private async Task GetObject(AmazonS3Client s3Client, AmazonS3Uri s3Uri, Stream output)
{
    // download file using provided s3Client
    await s3Client.GetObjectAsync(...);
}

Questions

  1. Given maxConcurrentDownloads = 20, is it advisable to use single instance of S3Client to download or upload in parallel?

  2. The default value for AmazonS3Config.ConnectionLimit is 50, does that mean single instance of S3Client can concurrently download or upload 50 files?

  3. Or better yet, should I use the Singleton instance of Amazons3Client?

Any recommendations?

Metro Smurf
  • 37,266
  • 20
  • 108
  • 140
LP13
  • 30,567
  • 53
  • 217
  • 400
  • Does this answer your question? [AmazonS3Client Single connection Vs new connection for each call C#](https://stackoverflow.com/questions/64145548/amazons3client-single-connection-vs-new-connection-for-each-call-c-sharp) – Metro Smurf Nov 03 '21 at 13:22
  • According to this dup [AmazonS3Client Single connection Vs new connection](https://stackoverflow.com/q/64145548/9664), the client maintains a pool of `HttpClient`s. With the potential bottleneck being credential resolution. See link for more info... – Metro Smurf Nov 03 '21 at 13:26

0 Answers0