54

How do I copy all objects from one prefix to other? I have tried all possible ways to copy all objects in one shot from one prefix to other, but the only way that seems to work is by looping over a list of objects and copying them one by one. This is really inefficient. If I have hundreds of files in a folder, will I have to make 100 calls?

var params = {
         Bucket: bucket,
         CopySource: bucket+'/'+oldDirName+'/filename.txt',
         Key: newDirName+'/filename.txt',
 };
s3.copyObject(params, function(err, data) {
  if (err) {
      callback.apply(this, [{
          type: "error",
          message: "Error while renaming Directory",
          data: err
      }]);
  } else {
      callback.apply(this, [{
          type: "success",
          message: "Directory renamed successfully",
          data: data
      }]);
  }
});
Pat Myron
  • 4,437
  • 2
  • 20
  • 39
Yousaf
  • 841
  • 1
  • 7
  • 15

6 Answers6

56

You will need to make one AWS.S3.listObjects() to list your objects with a specific prefix. But you are correct in that you will need to make one call for every object that you want to copy from one bucket/prefix to the same or another bucket/prefix.

You can also use a utility library like async to manage your requests.

var AWS = require('aws-sdk');
var async = require('async');
var bucketName = 'foo';
var oldPrefix = 'abc/';
var newPrefix = 'xyz/';
var s3 = new AWS.S3({params: {Bucket: bucketName}, region: 'us-west-2'});

var done = function(err, data) {
  if (err) console.log(err);
  else console.log(data);
};

s3.listObjects({Prefix: oldPrefix}, function(err, data) {
  if (data.Contents.length) {
    async.each(data.Contents, function(file, cb) {
      var params = {
        Bucket: bucketName,
        CopySource: bucketName + '/' + file.Key,
        Key: file.Key.replace(oldPrefix, newPrefix)
      };
      s3.copyObject(params, function(copyErr, copyData){
        if (copyErr) {
          console.log(copyErr);
        }
        else {
          console.log('Copied: ', params.Key);
          cb();
        }
      });
    }, done);
  }
});

Hope this helps!

7hibault
  • 2,371
  • 3
  • 23
  • 33
Aditya Manohar
  • 2,204
  • 1
  • 17
  • 20
21

Here is a code snippet that do it in the "async await" way:

const AWS = require('aws-sdk');
AWS.config.update({
  credentials: new AWS.Credentials(....), // credential parameters
});
AWS.config.setPromisesDependency(require('bluebird'));
const s3 = new AWS.S3();

... ...

const bucketName = 'bucketName';        // example bucket
const folderToMove = 'folderToMove/';   // old folder name
const destinationFolder = 'destinationFolder/'; // new destination folder 
try {
    const listObjectsResponse = await s3.listObjects({
        Bucket: bucketName,
        Prefix: folderToMove,
        Delimiter: '/',
    }).promise();

    const folderContentInfo = listObjectsResponse.Contents;
    const folderPrefix = listObjectsResponse.Prefix;

    await Promise.all(
      folderContentInfo.map(async (fileInfo) => {
        await s3.copyObject({
          Bucket: bucketName,
          CopySource: `${bucketName}/${fileInfo.Key}`,  // old file Key
          Key: `${destinationFolder}/${fileInfo.Key.replace(folderPrefix, '')}`, // new file Key
        }).promise();
    
        await s3.deleteObject({
          Bucket: bucketName,
          Key: fileInfo.Key,
        }).promise();
      })
    );
} catch (err) {
  console.error(err); // error handling
}
Peter Peng
  • 1,910
  • 1
  • 27
  • 37
9

None of the above handle large directories, as the list-objects-v2 command returns no more than 1000 results at a time, providing a continuation token to access additional "pages".

Here is a solution using the modern, v3 sdk:

const copyAll = async ({
  s3Client,
  sourceBucket,
  targetBucket = sourceBucket,
  sourcePrefix,
  targetPrefix,
  concurrency = 1,
  deleteSource = false,
}) => {
  let ContinuationToken;

  const copyFile = async (sourceKey) => {
    const targetKey = sourceKey.replace(sourcePrefix, targetPrefix);

    await s3Client.send(
      new CopyObjectCommand({
        Bucket: targetBucket,
        Key: targetKey,
        CopySource: `${sourceBucket}/${sourceKey}`,
      }),
    );

    if (deleteSource) {
      await s3Client.send(
        new DeleteObjectCommand({
          Bucket: sourceBucket,
          Key: sourceKey,
        }),
      );
    }
  };

  do {
    const { Contents = [], NextContinuationToken } = await s3Client.send(
      new ListObjectsV2Command({
        Bucket: sourceBucket,
        Prefix: sourcePrefix,
        ContinuationToken,
      }),
    );

    const sourceKeys = Contents.map(({ Key }) => Key);

    await Promise.all(
      new Array(concurrency).fill(null).map(async () => {
        while (sourceKeys.length) {
          await copyFile(sourceKeys.pop());
        }
      }),
    );

    ContinuationToken = NextContinuationToken;
  } while (ContinuationToken);
};

If the Promise.all part is unclear, it's just a poor man's "thread pool", allowing you to copy multiple files concurrently, which can dramatically speed things up. These don't use any bandwidth since the content is copied within AWS, so I had no issues with a value of 20 or more for concurrency. For clarity, it's just parallelized version of:

const sourceKeys = Contents.map(({ Key }) => Key);

while (sourceKeys.length) {
  await copyFile(sourceKeys.pop());
}
Eric Haynes
  • 5,126
  • 2
  • 29
  • 36
  • You will need to add package "@aws-sdk/client-s3" to use "S3, CopyObjectCommand, ListObjectsV2Command" – Dzun Ho Apr 05 '22 at 10:47
  • Ah, yes. I didn't include the import in the snippet above. FYI it's documented here: https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-s3/index.html – Eric Haynes Apr 05 '22 at 14:20
6

A small change to the code of Aditya Manohar that improves the error handling in the s3.copyObject function and will actually finish the "move" request by removing the source files after the copy requests have been executed:

const AWS = require('aws-sdk');
const async = require('async');
const bucketName = 'foo';
const oldPrefix = 'abc/';
const newPrefix = 'xyz/';

const s3 = new AWS.S3({
    params: {
        Bucket: bucketName
    },
    region: 'us-west-2'
});


// 1) List all the objects in the source "directory"
s3.listObjects({
    Prefix: oldPrefix
}, function (err, data) {



    if (data.Contents.length) {

        // Build up the paramters for the delete statement
        let paramsS3Delete = {
            Bucket: bucketName,
            Delete: {
                Objects: []
            }
        };

        // Expand the array with all the keys that we have found in the ListObjects function call, so that we can remove all the keys at once after we have copied all the keys
        data.Contents.forEach(function (content) {
            paramsS3Delete.Delete.Objects.push({
                Key: content.Key
            });
        });

        // 2) Copy all the source files to the destination
        async.each(data.Contents, function (file, cb) {
            var params = {
                CopySource: bucketName + '/' + file.Key,
                Key: file.Key.replace(oldPrefix, newPrefix)
            };
            s3.copyObject(params, function (copyErr, copyData) {

                if (copyErr) {
                    console.log(err);
                } else {
                    console.log('Copied: ', params.Key);
                }
                cb();
            });
        }, function (asyncError, asyncData) {
            // All the requests for the file copy have finished
            if (asyncError) {
                return console.log(asyncError);
            } else {
                console.log(asyncData);

                // 3) Now remove the source files - that way we effectively moved all the content
                s3.deleteObjects(paramsS3Delete, (deleteError, deleteData) => {
                    if (deleteError) return console.log(deleteError);

                    return console.log(deleteData);
                })

            }
        });
    }
});

Note that I have moved the cb() callback function outside the if-then-else loop. That way even when an error occurs the async module will fire the done() function.

Guppie70
  • 173
  • 2
  • 8
  • is there a way to move or copy multiple files at one request as we are doing above with delete? – Raghavendra Feb 10 '17 at 05:48
  • @Raghavendra: not really sure what you are looking for. If you want copy instead of moving the files, then just skip step (3) "s3.deleteObjects()". If you want to avoid multiple HTTP requests for each file, then I believe that the only way is to rely on the AWS CLI. The AWS CLI has the cp() method that allows you to copy multiple files or a complete "directory" in one go: http://docs.aws.amazon.com/cli/latest/reference/s3/cp.html – Guppie70 Feb 13 '17 at 08:48
  • these methods do not allow delete multiple they take a pattern prefix to copy or delete i have a set of files – Raghavendra Feb 13 '17 at 09:43
  • In your code asyncData is always undefined? asyncError is also always undefined? – bpavlov Jan 04 '18 at 14:09
  • 1
    @bpavlov: I guess that you are right about the asyncData object. But according to the async documentation (https://caolan.github.io/async/docs.html#each) the `asyncError` object will be filled whenever an error occurred in the iteration routine - which in this case is the `s3.copyObject()` logic. So when nothing goes wrong in the `s3.copyObject()` logic, then the `asyncError` object will be empty. Hope this helps! – Guppie70 Jan 07 '18 at 19:56
  • The best way that worked for me was AWS-CLI for bulk operations which is capable of moving/syncing folders. – Yousaf Mar 09 '18 at 20:49
5

More update on the original code which copies folders recursively. Some limitations is that the code does not handle more than 1000 objects per Prefix and of course the depth limitation if your folders are very deep.

import AWS from 'aws-sdk';

AWS.config.update({ region: 'ap-southeast-1' });

/**
 * Copy s3 folder
 * @param {string} bucket Params for the first argument
 * @param {string} source for the 2nd argument
 * @param {string} dest for the 2nd argument
 * @returns {promise} the get object promise
 */
export default async function s3CopyFolder(bucket, source, dest) {
  // sanity check: source and dest must end with '/'
  if (!source.endsWith('/') || !dest.endsWith('/')) {
    return Promise.reject(new Error('source or dest must ends with fwd slash'));
  }

  const s3 = new AWS.S3();

  // plan, list through the source, if got continuation token, recursive
  const listResponse = await s3.listObjectsV2({
    Bucket: bucket,
    Prefix: source,
    Delimiter: '/',
  }).promise();

  // copy objects
  await Promise.all(
    listResponse.Contents.map(async (file) => {
      await s3.copyObject({
        Bucket: bucket,
        CopySource: `${bucket}/${file.Key}`,
        Key: `${dest}${file.Key.replace(listResponse.Prefix, '')}`,
      }).promise();
    }),
  );

  // recursive copy sub-folders
  await Promise.all(
    listResponse.CommonPrefixes.map(async (folder) => {
      await s3CopyFolder(
        bucket,
        `${folder.Prefix}`,
        `${dest}${folder.Prefix.replace(listResponse.Prefix, '')}`,
      );
    }),
  );

  return Promise.resolve('ok');
}
erwinkarim
  • 59
  • 1
  • 2
0

Here's what I use for moving multiple objects.

const asyncForEach = async (array, callback) => {
  for (let i = 0; i < array.length; i++) {
    await callback(array[i], i, array)
  }
}

const awsMove = async ({ files }) => {
  try {
    const s3 = new aws.S3()
    const AWS_BUCKET = 'bucket'

    await asyncForEach(files, async file => {
      const copyParams = {
        Key: file.newPath,
        ACL: 'public-read',
        Bucket: AWS_BUCKET,
        CopySource: encodeURI(`/${AWS_BUCKET}/${file.oldPath}`)
      }
      await s3.copyObject(copyParams).promise()

      const deleteParams = {
        Key: file.oldPath,
        Bucket: AWS_BUCKET
      }
      await s3.deleteObject(deleteParams).promise()
    })
  } catch (err) {
    console.log(err)
  }
}

const files = [
  { oldPath: 'folder/file', newPath: 'folder-copy/file' },
  { oldPath: 'another-folder/file', newPath: 'another-folder-copy/file' }
]
await awsMove({ files })
ozgrozer
  • 1,824
  • 1
  • 23
  • 35