The latest version of the boto3 codecommit
includes the methods get_differences
and get_blob
.
You can get all the content of a codecommit repository using these two methods (at least, if you are not interested in the retaining the .git history).
The script below takes all the content of the master branch and adds it to a tar file. Afterwards you could upload it to s3 as you please.
You can run this as a lambda function, which can be invoked when you push to codecommit.
This works with the current lambda python 3.6 environment.
botocore==1.5.89
boto3==1.4.4
import boto3
import pathlib
import tarfile
import io
import sys
def get_differences(repository_name, branch="master"):
response = codecommit.get_differences(
repositoryName=repository_name,
afterCommitSpecifier=branch,
)
differences = []
while "nextToken" in response:
response = codecommit.get_differences(
repositoryName=repository_name,
afterCommitSpecifier=branch,
nextToken=response["nextToken"]
)
differences += response.get("differences", [])
else:
differences += response["differences"]
return differences
if __name__ == "__main__":
repository_name = sys.argv[1]
codecommit = boto3.client("codecommit")
repository_path = pathlib.Path(repository_name)
buf = io.BytesIO()
with tarfile.open(None, mode="w:gz", fileobj=buf) as tar:
for difference in get_differences(repository_name):
blobid = difference["afterBlob"]["blobId"]
path = difference["afterBlob"]["path"]
mode = difference["afterBlob"]["mode"] # noqa
blob = codecommit.get_blob(
repositoryName=repository_name, blobId=blobid)
tarinfo = tarfile.TarInfo(str(repository_path / path))
tarinfo.size = len(blob["content"])
tar.addfile(tarinfo, io.BytesIO(blob["content"]))
tarobject = buf.getvalue()
# save to s3