I am writing code to run a psyaprk job using AIrflow 2.0.2,and I only need to start the master nodes, no core nodes
My JOB_FLOW_OVERRIDES is throwing an error saying: An instance group must have at least one instance. I already have InstanceCount= 1, still throwing me an error. Can someone help me
JOB_FLOW_OVERRIDES = {
"Name": "Pitchbook ETL",
"LogUri": "s3://aws-logs-5-us-west-1/elasticmapreduce/",
"ReleaseLabel": "emr-6.2.0",
"Applications": [
{
"Name": "Spark"
},
],
"Instances": {
"InstanceGroups": [
{
"Name": "Master nodes",
"Market": "ON_DEMAND",
"InstanceRole": "MASTER",
"InstanceType": "m5.xlarge",
"InstanceCount": 1,
},
{"Name": "Core nodes",
"Market": "ON_DEMAND",
"InstanceRole": "CORE",
"InstanceType": "m5.xlarge",
"InstanceCount": 0,}
],
"KeepJobFlowAliveWhenNoSteps": False,
"TerminationProtected": False,
},
"BootstrapActions": [
{
"Name": "Install Dependencies",
"ScriptBootstrapAction": {
"Path": "s3://"+path+"-pyspark/pitchbook/install_python_modules.sh",
}
}
],
"Configurations": [{"Classification":"core-site",
"Properties":{"io.compression.codec.lzo.class":"",
"io.compression.codecs":"org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,\
org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec"}}
# {"Classification":"export","Properties":{"TZ":"US/Central"}}
],
"VisibleToAllUsers": True,
"EbsRootVolumeSize" :20,
"JobFlowRole": "EMR_EC2_DefaultRole",
"ServiceRole": "EMR_DefaultRole",
"Tags": [
{
"Key": "Environment",
"Value": "Development"
},
{
"Key": "Name",
"Value": "Airflow EMR Project"
},
{
"Key": "Owner",
"Value": "Data Science Team"
}
]
}