In a recent update the ARM provider for Batch was improved to allow creation of a container enabled pool. The following ARM template will create a container pool, notice the API version is updated.
{
"type": "Microsoft.Batch/batchAccounts/pools",
"name": "[concat(variables('batchAccountName'), '/', parameters('poolID'))]",
"apiVersion": "2018-12-01",
"scale": null,
"properties": {
"vmSize": "[parameters('virtualMachineSize')]",
"networkConfiguration": {
"subnetId": "[parameters('virtualNetworkSubnetId')]"
},
"maxTasksPerNode": 1,
"taskSchedulingPolicy": {
"nodeFillType": "Spread"
},
"deploymentConfiguration": {
"virtualMachineConfiguration": {
"containerConfiguration": {
"containerImageNames": "[parameters('dockerImagesToCache')]",
"type": "DockerCompatible"
},
"imageReference": {
"publisher": "microsoft-azure-batch",
"offer": "ubuntu-server-container",
"sku": "16-04-lts",
"version": "latest"
},
"nodeAgentSkuId": "batch.node.ubuntu 16.04"
}
},
"scaleSettings": {
"autoScale": {
"evaluationInterval": "PT5M",
"formula": "[concat('startingNumberOfVMs = 0;maxNumberofVMs = ', parameters('maxNodeCount'), ';pendingTaskSamplePercent = $PendingTasks.GetSamplePercent(160 * TimeInterval_Second);pendingTaskSamples = pendingTaskSamplePercent < 70 ? startingNumberOfVMs : avg($PendingTasks.GetSample(160 * TimeInterval_Second));$TargetDedicatedNodes=min(maxNumberofVMs, pendingTaskSamples);')]"
}
}
},
"dependsOn": [
"[resourceId('Microsoft.Batch/batchAccounts', variables('batchAccountName'))]"
]
}
----- Previous Answer now not needed -----
I've managed to find a workaround using an ACI container along with Managed Service Identities and some Python. It's not pretty but it does work.
The flow of the template is as follows:
- An MSI is created
- The MSI is assigned contributor rights for the resource group
- The Batch account is created
- An ACI instance is run which pulls down a templated
pool.json
file and uses a python script to fill in the required parameters. The python logs in to the az cli
using the MSI identity then proceeds to create the pool.
Here is the full setup, you'll likely want to tweak this to fit your scenario.
The python script and pool.json
file need to be uploaded to a public location, such as blob storage or git, then the _artifactLocation
parameters are used to tell the template where to download the files.
Main template:
{
"$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"_artifactsLocation": {
"type": "string",
"metadata": {
"description": ""
}
},
"_artifactsLocationSasToken": {
"type": "string",
"metadata": {
"description": ""
}
},
"mountArgs": {
"type": "string",
"metadata": {
"description": "Arguments passed to the mount.py script."
}
},
"virtualNetworkSubnetId": {
"type": "string",
"metadata": {
"description": "The subnet in which Batch will be deployed. Requires the following ports to be enabled via NSG: https://learn.microsoft.com/en-us/azure/batch/batch-virtual-network#network-security-groups-1."
}
},
"maxTasksPerNode": {
"type": "int",
"defaultValue": 1
},
"maxNodeCount": {
"type": "int",
"defaultValue": 3
},
"virtualMachineSize": {
"type": "string",
"defaultValue": "Standard_F8s_v2",
"metadata": {
"description": "Size of VMs in the VM Scale Set."
}
},
"storageAccountSku": {
"type": "string",
"defaultValue": "Standard_LRS",
"allowedValues": [
"Standard_LRS",
"Standard_GRS",
"Standard_ZRS",
"Premium_LRS"
],
"metadata": {
"description": "Storage Account type"
}
},
"location": {
"type": "string",
"defaultValue": "[resourceGroup().location]",
"metadata": {
"description": "Location for all resources."
}
},
"poolId": {
"type": "string",
"defaultValue": "defaultpool"
}
},
"variables": {
"identityName": "batchpoolcreator",
"storageAccountName": "[concat('batch', uniqueString(resourceGroup().id))]",
"batchAccountName": "[concat('batch', uniqueString(resourceGroup().id))]",
"batchEndpoint": "[concat('https://', variables('batchAccountName'), '.' , parameters('location'), '.batch.azure.com')]",
"_comment": "The role assignment ID is required to be a guid, we use this to generate a repeatable guid",
"roleAssignmentIdRg": "[guid(concat(resourceGroup().id, 'contributorRG'))]",
"_comment": "This is the ID used to set the contributor permission on a role.",
"contributorRoleDefinitionId": "[concat('/subscriptions/', subscription().subscriptionId, '/providers/Microsoft.Authorization/roleDefinitions/', 'b24988ac-6180-42a0-ab88-20f7382dd24c')]"
},
"resources": [
{
"comments": "Create an identity to use for creating the Azure Batch pool with container support (will be assigned to ACI instance)",
"type": "Microsoft.ManagedIdentity/userAssignedIdentities",
"name": "[variables('identityName')]",
"apiVersion": "2015-08-31-preview",
"location": "[resourceGroup().location]"
},
{
"comments": "Assign the idenity contributor rights to the resource group",
"type": "Microsoft.Authorization/roleAssignments",
"apiVersion": "2017-05-01",
"name": "[variables('roleAssignmentIdRg')]",
"dependsOn": [
"[resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('identityName'))]"
],
"properties": {
"roleDefinitionId": "[variables('contributorRoleDefinitionId')]",
"principalId": "[reference(resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('identityName')), '2015-08-31-preview').principalId]",
"scope": "[resourceGroup().id]"
}
},
{
"comments": "This is the storage account used by Azure Batch for file processing/storage",
"type": "Microsoft.Storage/storageAccounts",
"name": "[variables('storageAccountname')]",
"apiVersion": "2016-01-01",
"location": "[parameters('location')]",
"sku": {
"name": "[parameters('storageAccountsku')]"
},
"kind": "Storage",
"tags": {
"ObjectName": "[variables('storageAccountName')]"
},
"properties": {}
},
{
"type": "Microsoft.Batch/batchAccounts",
"name": "[variables('batchAccountName')]",
"apiVersion": "2015-12-01",
"location": "[parameters('location')]",
"tags": {
"ObjectName": "[variables('batchAccountName')]"
},
"properties": {
"autoStorage": {
"storageAccountId": "[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]"
}
},
"dependsOn": [
"[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]"
]
},
{
"type": "Microsoft.ContainerInstance/containerGroups",
"apiVersion": "2018-10-01",
"name": "[substring(concat('batchpool', uniqueString(resourceGroup().id)), 0, 20)]",
"location": "[resourceGroup().location]",
"dependsOn": [
"[resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('identityName'))]",
"[resourceId('Microsoft.Authorization/roleAssignments', variables('roleAssignmentIdRg'))]",
"[resourceId('Microsoft.Batch/batchAccounts', variables('batchAccountName'))]"
],
"identity": {
"type": "UserAssigned",
"userAssignedIdentities": {
"[resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('identityName'))]": {}
}
},
"properties": {
"osType": "Linux",
"restartPolicy": "Never",
"containers": [
{
"name": "azure-cli",
"properties": {
"image": "microsoft/azure-cli",
"command": [
"/bin/bash",
"-c",
"[concat('curl -fsSL ', parameters('_artifactsLocation'), '/azurebatch/configurepool.py', parameters('_artifactsLocationSasToken'), ' > configurepool.py && python3 ./configurepool.py \"', parameters('poolId'), '\" ', parameters('virtualMachineSize'), ' \"', parameters('mountArgs'), '\" ', parameters('_artifactsLocation'), ' ', parameters('_artifactsLocationSasToken'), ' ', parameters('virtualNetworkSubnetId'), ' ', parameters('maxNodeCount'), ' ', resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('identityName')), ' ', resourceGroup().name, ' ', variables('batchAccountName'))]"
],
"resources": {
"requests": {
"cpu": 1,
"memoryInGB": 1
}
}
}
}
]
}
}
],
"outputs": {
"storageAccountName": {
"type": "string",
"value": "[variables('storageAccountName')]"
},
"batchAccountName": {
"type": "string",
"value": "[variables('batchAccountName')]"
},
"batchEndpoint": {
"type": "string",
"value": "[variables('batchEndpoint')]"
},
"batchAccountKey": {
"type": "securestring",
"value": "[listKeys(resourceId('Microsoft.Batch/batchAccounts', variables('batchAccountName')), '2017-09-01').primary]"
},
"batchPoolId": {
"type": "string",
"value": "[parameters('poolId')]"
}
}
}
Pool.json
{
"id": "POOL_ID_HERE",
"vmSize": "VM_SIZE_HERE",
"enableAutoScale": true,
"autoScaleFormula": "startingNumberOfVMs = 0;maxNumberofVMs = MAX_NODE_COUNT_HERE;pendingTaskSamplePercent = $PendingTasks.GetSamplePercent(160 * TimeInterval_Second);pendingTaskSamples = pendingTaskSamplePercent < 70 ? startingNumberOfVMs : avg($PendingTasks.GetSample(160 * TimeInterval_Second));$TargetDedicatedNodes=min(maxNumberofVMs, pendingTaskSamples);",
"autoScaleEvaluationInterval": "PT5M",
"enableInterNodeCommunication": false,
"startTask": {
"commandLine": "/usr/bin/python3 mount.py MOUNT_ARGS_HERE",
"resourceFiles": [
{
"blobSource": "ARTIFACT_LOCATION_HERE/examplemountscript/script.pyARTIFACT_SAS_HERE",
"filePath": "./mount.py",
"fileMode": "777"
}
],
"userIdentity": {
"autoUser": {
"scope": "pool",
"elevationLevel": "admin"
}
},
"maxTaskRetryCount": 0,
"waitForSuccess": true
},
"maxTasksPerNode": 1,
"taskSchedulingPolicy": {
"nodeFillType": "Spread"
},
"virtualMachineConfiguration": {
"containerConfiguration": {
"containerImageNames": [
"ubuntu",
"python"
]
},
"imageReference": {
"publisher": "microsoft-azure-batch",
"offer": "ubuntu-server-container",
"sku": "16-04-lts",
"version": "1.0.6"
},
"nodeAgentSKUId": "batch.node.ubuntu 16.04"
},
"networkConfiguration": {
"subnetId": "SUBNET_ID_HERE"
}
}
configurepool.py:
import subprocess
import sys
import urllib.request
def run_az_command(cmdArray):
try:
print("Attempt run {}".format(cmdArray))
subprocess.check_call(cmdArray)
print("Install completed successfully")
except subprocess.CalledProcessError as e:
print("Failed running: {} error: {}".format(cmdArray, e))
exit(4)
if len(sys.argv) != 11:
print(
"Expected 'poolid', 'vm_size', 'mount_args', 'artifact_location', 'artifact_sas', 'subnet_id', 'max_node_count', 'msi_name', 'resource_group_name' , 'batch_account_name'"
)
exit(1)
pool_id = str(sys.argv[1])
vm_size = str(sys.argv[2])
mount_args = str(sys.argv[3])
artifact_location = str(sys.argv[4])
artifact_sas = str(sys.argv[5])
subnet_id = str(sys.argv[6])
max_node_count = str(sys.argv[7])
msi_name = str(sys.argv[8])
resource_group_name = str(sys.argv[9])
batch_account_name = str(sys.argv[10])
url = "{0}/azurebatch/pool.json{1}".format(artifact_location, artifact_sas)
response = urllib.request.urlopen(url)
data = response.read()
text = data.decode("utf-8")
# Replace the target string
text = text.replace("POOL_ID_HERE", pool_id)
text = text.replace("VM_SIZE_HERE", vm_size)
text = text.replace("MOUNT_ARGS_HERE", mount_args)
text = text.replace("ARTIFACT_LOCATION_HERE", artifact_location)
text = text.replace("ARTIFACT_SAS_HERE", artifact_sas)
text = text.replace("SUBNET_ID_HERE", subnet_id)
text = text.replace("MAX_NODE_COUNT_HERE", max_node_count)
# Write the file out again
with open("pool.complete.json", "w") as file:
file.write(text)
run_az_command(["az", "login", "--identity", "-u", msi_name])
run_az_command(["az", "batch", "account", "login", "--name", batch_account_name, "-g", resource_group_name])
run_az_command(["az", "batch", "pool", "create", "--json-file", "pool.complete.json"])