Bring over code from existing GE flows
This commit is contained in:
parent
3b67c64959
commit
e675056e3f
1 changed files with 139 additions and 0 deletions
139
lolafect/data_testing.py
Normal file
139
lolafect/data_testing.py
Normal file
|
|
@ -0,0 +1,139 @@
|
||||||
|
from prefect import task
|
||||||
|
|
||||||
|
|
||||||
|
@task
|
||||||
|
def run_data_test_on_mysql(
|
||||||
|
name,
|
||||||
|
mysql_credentials,
|
||||||
|
query,
|
||||||
|
expectations,
|
||||||
|
):
|
||||||
|
data_context = create_in_memory_data_context(mysql_credentials)
|
||||||
|
|
||||||
|
if expectation_configurations:
|
||||||
|
data_context = create_expectation_suite(
|
||||||
|
data_context, expectation_suite_name, expectation_configurations
|
||||||
|
)
|
||||||
|
|
||||||
|
data_context = create_checkpoint(
|
||||||
|
data_context, query_for_checkpoint, expectation_suite_name
|
||||||
|
)
|
||||||
|
|
||||||
|
results = run_checkpoint(data_context)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def create_in_memory_data_context(dw_connection):
|
||||||
|
|
||||||
|
print("Preparing DataContext.")
|
||||||
|
data_context = BaseDataContext(
|
||||||
|
project_config=DataContextConfig(
|
||||||
|
datasources={
|
||||||
|
"dw-staging": DatasourceConfig(
|
||||||
|
class_name="Datasource",
|
||||||
|
execution_engine={
|
||||||
|
"class_name": "SqlAlchemyExecutionEngine",
|
||||||
|
"connection_string": "mysql+pymysql://%s:%s@%s:%s/staging"
|
||||||
|
% (
|
||||||
|
dw_connection.raw_user,
|
||||||
|
urlquote(dw_connection.raw_password),
|
||||||
|
dw_connection.host,
|
||||||
|
dw_connection.port,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
data_connectors={
|
||||||
|
"default_runtime_data_connector_name": {
|
||||||
|
"class_name": "RuntimeDataConnector",
|
||||||
|
"batch_identifiers": ["default_identifier_name"],
|
||||||
|
},
|
||||||
|
"default_inferred_data_connector_name": {
|
||||||
|
"class_name": "InferredAssetSqlDataConnector",
|
||||||
|
"name": "whole_table",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
},
|
||||||
|
store_backend_defaults=S3StoreBackendDefaults(
|
||||||
|
default_bucket_name="pdo-prod-great-expectations"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
print("DataContext is ready.")
|
||||||
|
|
||||||
|
return data_context
|
||||||
|
|
||||||
|
|
||||||
|
def create_expectation_suite(
|
||||||
|
data_context, expectation_suite_name, expectation_configurations
|
||||||
|
):
|
||||||
|
print("Preparing Expectation Suite.")
|
||||||
|
suite = data_context.create_expectation_suite(
|
||||||
|
expectation_suite_name,
|
||||||
|
overwrite_existing=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for expectation_configuration in expectation_configurations:
|
||||||
|
suite.add_expectation(expectation_configuration=expectation_configuration)
|
||||||
|
|
||||||
|
data_context.save_expectation_suite(suite)
|
||||||
|
|
||||||
|
print("Expectation Suite was stored.")
|
||||||
|
|
||||||
|
return data_context
|
||||||
|
|
||||||
|
|
||||||
|
def create_checkpoint(data_context, query_for_checkpoint, expectation_suite_name):
|
||||||
|
|
||||||
|
print("Preparing Checkpoint.")
|
||||||
|
|
||||||
|
checkpoint_config = {
|
||||||
|
"name": f"{LOLACONFIG.FLOW_NAME_UDCS}_checkpoint",
|
||||||
|
"class_name": "Checkpoint",
|
||||||
|
"config_version": 1,
|
||||||
|
"run_name_template": f"{LOLACONFIG.FLOW_NAME_UDCS}_checkpoint",
|
||||||
|
"action_list": [
|
||||||
|
{
|
||||||
|
"name": "store_validation_result",
|
||||||
|
"action": {"class_name": "StoreValidationResultAction"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "store_evaluation_params",
|
||||||
|
"action": {"class_name": "StoreEvaluationParametersAction"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"validations": [
|
||||||
|
{
|
||||||
|
"batch_request": {
|
||||||
|
"datasource_name": "dw-staging",
|
||||||
|
"data_connector_name": "default_runtime_data_connector_name",
|
||||||
|
"data_asset_name": f"{LOLACONFIG.FLOW_NAME_UDCS}_validation_query",
|
||||||
|
"runtime_parameters": {"query": query_for_checkpoint},
|
||||||
|
"batch_identifiers": {
|
||||||
|
"default_identifier_name": "default_identifier"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"expectation_suite_name": expectation_suite_name,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# The checkpoint gets persisted. Now it can be called again in a different session.
|
||||||
|
data_context.add_checkpoint(**checkpoint_config)
|
||||||
|
|
||||||
|
print("Checkpoint was stored.")
|
||||||
|
|
||||||
|
return data_context
|
||||||
|
|
||||||
|
|
||||||
|
def run_checkpoint(the_data_context):
|
||||||
|
|
||||||
|
print("Running Checkpoint.")
|
||||||
|
|
||||||
|
results = the_data_context.run_checkpoint(
|
||||||
|
checkpoint_name=f"{LOLACONFIG.FLOW_NAME_UDCS}_checkpoint"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Checkpoint finished.")
|
||||||
|
|
||||||
|
return results
|
||||||
Loading…
Add table
Add a link
Reference in a new issue