I have 2 scripts that use Azure ML SDK. In the main script I call the second script connect_to_blob.py
. The connect_to_blob.py
fails with the following error:
User program failed with ImportError: cannot import name 'BlockBlobService'
If I run the code of connect_to_blob.py
from Jupyter Notebook in Azure VM, it works well. I assume that for some reason azure-storage-blob
is unavailable for connect_to_blob.py
.
Cannot understand why azure-storage-blob
is not installed from requirements.txt
.
Main script (works fine): it
from azureml.core import Workspace
from azureml.core import Environment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.experiment import Experiment
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.core.runconfig import RunConfiguration
print("Create workspace connection")
ws = Workspace.from_config()
source_directory = './sources'
run_config = RunConfiguration()
env = Environment.from_pip_requirements(name = "myenv",
file_path = "requirements.txt")
env.docker.enabled = True
run_config.environment = env
compute_name = "aml-compute"
vm_size = "STANDARD_D1"
if compute_name in ws.compute_targets:
compute_target = ws.compute_targets[compute_name]
if compute_target and type(compute_target) is AmlCompute:
print('Found compute target: ' + compute_name)
else:
print('Creating a new compute target...')
provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
min_nodes=0,
max_nodes=4)
compute_target = ComputeTarget.create(
ws, compute_name, provisioning_config)
compute_target.wait_for_completion(
show_output=True, min_node_count=None, timeout_in_minutes=20)
print(compute_target.status.serialize())
step1 = PythonScriptStep(name="train_connection_step",
script_name="connect_to_blob.py",
compute_target=compute_target,
source_directory=source_directory,
runconfig=run_config,
allow_reuse=True)
print("Step1 created")
steps = [step1]
pipeline1 = Pipeline(workspace=ws, steps=steps)
print ("Pipeline is built")
pipeline1.validate()
print("Pipeline validation complete")
pipeline_run = Experiment(ws, 'Hello_World1').submit(pipeline1, regenerate_outputs=False)
print("Pipeline is submitted for execution")
connect_to_blob.py
from azure.storage.blob import BlockBlobService
blob_service_client = BlockBlobService(account_name='xxx',
account_key='yyy')
blob_names = blob_service_client.list_blobs("root_container")
for blob in blob_names: print(blob.name)
requirements.txt
azure==4.0.0
azure-common==1.1.23
azureml-core==1.0.74
azureml-dataprep-native==13.1.0
azureml-dataprep[fuse,pandas]==1.1.31
azureml-pipeline-core==1.0.74
azureml-pipeline-steps==1.0.74
azureml-pipeline==1.0.74
azureml-sdk==1.0.74
azureml-telemetry==1.0.74
azureml-train-core==1.0.74
azureml-train-restclients-hyperdrive==1.0.74
azureml-train==1.0.74
azure-storage-blob==12.0.0
joblib==0.14.0
pandas==0.25.3
python-dateutil==2.8.0 ; python_version >= '2.7'
requests-oauthlib==1.3.0
requests==2.22.0