0

I am trying to set up a code based data context with a hosted static site having all stores be in azure. Here is the config and test code:

connection_str = <con string>
data_context_config = DataContextConfig(
    config_version=2,
    plugins_directory=None,
    config_variables_file_path=None,
    datasources={
        "spark_datasource": DatasourceConfig(
            class_name="Datasource",
            execution_engine={
                "class_name": "SparkDFExecutionEngine"
            },
            data_connectors={
                "spark_connector": {
                    "module_name": "great_expectations.datasource.data_connector",
                    "class_name": "RuntimeDataConnector",
                    "batch_identifiers": [
                        "some_key_maybe_pipeline_stage",
                        "some_other_key_maybe_run_id",
                    ],
                }
            },
        )
    },
    stores={
        "expectations_AZ_store": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "class_name": "TupleAzureBlobStoreBackend",
                "container": "\\$web",
                "prefix": "",
                "connection_string": connection_str
            },
        },
        "validations_AZ_store": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "class_name": "TupleAzureBlobStoreBackend",
                "container": "\\$web",
                "prefix": "",
                "connection_string": connection_str
            },
        },
        "evaluation_parameter_AZ_store": {
            "class_name": "EvaluationParameterStore",
            "store_backend": {
                "class_name": "TupleAzureBlobStoreBackend",
                "container": "\\$web",
                "prefix": "",
                "connection_string": connection_str
            },
        },
        "checkpoint_AZ_store": {
            "class_name": "CheckpointStore",
            "store_backend": {
                "class_name": "TupleAzureBlobStoreBackend",
                "container": "\\$web",
                "prefix": "",
                "connection_string": connection_str
            },
        },
    },
    expectations_store_name="expectations_AZ_store",
    validations_store_name="validations_AZ_store",
    evaluation_parameter_store_name="evaluation_parameter_AZ_store",
    checkpoint_store_name="checkpoint_AZ_store",
    data_docs_sites={
        "az_site": {
            "class_name": "SiteBuilder",
            "store_backend": {
                "class_name": "TupleAzureBlobStoreBackend",
                "container":  "\\$web",
                "prefix":  "data_docs",
            },
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
        }
    },
    validation_operators={
        "action_list_operator": {
            "class_name": "ActionListValidationOperator",
            "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {"class_name": "StoreValidationResultAction"},
                },
                {
                    "name": "store_evaluation_params",
                    "action": {"class_name": "StoreEvaluationParametersAction"},
                },
                {
                    "name": "update_data_docs",
                    "action": {"class_name": "UpdateDataDocsAction"},
                },
            ],
        }
    },
)

df = spark.read.format("csv")\
    .option("header", "true")\
    .option("inferSchema", "true")\
    .load("/databricks-datasets/nyctaxi/tripdata/yellow/yellow_tripdata_2019-01.csv.gz")

context = BaseDataContext(project_config=data_context_config)

expectation_suite_name = "test_expectation_suite"
context.create_expectation_suite(
    expectation_suite_name=expectation_suite_name, overwrite_existing=True
)
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name,
)

# print(validator.head())
validator.expect_column_values_to_not_be_null(column="passenger_count")
validator.expect_column_values_to_be_between(
    column="congestion_surcharge", min_value=0, max_value=1000
)
validator.save_expectation_suite(discard_failed_expectations=False)
my_checkpoint_name = "test_checkpoint"
checkpoint_config = {
    "name": my_checkpoint_name,
    "config_version": 1.0,
    "class_name": "SimpleCheckpoint",
    "run_name_template": "%Y%m%d-%H%M%S-test_run",
    "validations": [
        {
             "batch_request": {
                "datasource_name": "spark_datasource",
                "data_connector_name": "spark_connector",
                "data_asset_name": "test_data_asset",
             },
             "expectation_suite_name": expectation_suite_name,
             "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {"class_name": "StoreValidationResultAction"},
                },
                {
                    "name": "store_evaluation_params",
                    "action": {"class_name": "StoreEvaluationParametersAction"},
                },
                {
                    "name": "update_data_docs",
                    "action": {"class_name": "UpdateDataDocsAction"},
                },
            ],
        }
     ]
}
my_checkpoint = context.test_yaml_config(yaml.dump(checkpoint_config))
context.add_checkpoint(**checkpoint_config)
checkpoint_result = context.run_checkpoint(
    checkpoint_name=my_checkpoint_name,
    batch_request= {
        "runtime_parameters": {"batch_data": df},
        "batch_identifiers": {
            "some_key_maybe_pipeline_stage": "prod",
            "some_other_key_maybe_run_id": f"my_run_name_test",
        },
    }
)

However when I try to run it, it fails under context.add_checkpoint(**checkpoint_config) with the error message: ValidationMetricIdentifier tuple must have at least six components.

Would appreciate any help as I am struggling to get this working.

0 Answers0