Skip to content

Catalog

Catalog provides a way to store and retrieve data generated by the individual steps of the dag to downstream steps of the dag. Please refer to concepts for more detailed information.

do-nothing

A noop implementation which does nothing.

Configuration

catalog:
  type: do-nothing

file-system

In this configuration, the local folder is used a catalog store. The default location is .catalog. Every execution of the pipeline will create a new directory by the run_id to store all the generated artifacts.

Configuration

1
2
3
4
catalog:
  type: file-system
  config:
    catalog_location: .catalog # default value

Example


  1. Use local file-system as catalog, default location is .catalog

The files suffixed by .execution.log are stdout and stderr of the command.

.catalog
└── juicy-blackwell-0625
    ├── Create_Content.execution.log
    ├── Setup.execution.log
    └── data
        └── hello.txt

3 directories, 3 files

All the execution logs of steps along with files are stored in the catalog. Please look at the highlighted lines in the run log.

{
    "run_id": "juicy-blackwell-0625",
    "dag_hash": "",
    "use_cached": false,
    "tag": "",
    "original_run_id": "",
    "status": "SUCCESS",
    "steps": {
        "Setup": {
            "name": "Setup",
            "internal_name": "Setup",
            "status": "SUCCESS",
            "step_type": "task",
            "message": "",
            "mock": false,
            "code_identities": [
                {
                    "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a",
                    "code_identifier_type": "git",
                    "code_identifier_dependable": true,
                    "code_identifier_url": "https://github.com/AstraZeneca/runnable-core.git",
                    "code_identifier_message": ""
                }
            ],
            "attempts": [
                {
                    "attempt_number": 1,
                    "start_time": "2024-02-04 06:25:26.014967",
                    "end_time": "2024-02-04 06:25:26.026029",
                    "duration": "0:00:00.011062",
                    "status": "SUCCESS",
                    "message": "",
                    "parameters": {}
                }
            ],
            "user_defined_metrics": {},
            "branches": {},
            "data_catalog": [
                {
                    "name": "Setup.execution.log",
                    "data_hash": "b38eb7b5290ff433276a75fdd7a3935335aedff3ab5ee8714f6ea735d9c9492c",
                    "catalog_relative_path": "juicy-blackwell-0625/Setup.execution.log",
                    "catalog_handler_location": ".catalog",
                    "stage": "put"
                }
            ]
        },
        "Create Content": {
            "name": "Create Content",
            "internal_name": "Create Content",
            "status": "SUCCESS",
            "step_type": "task",
            "message": "",
            "mock": false,
            "code_identities": [
                {
                    "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a",
                    "code_identifier_type": "git",
                    "code_identifier_dependable": true,
                    "code_identifier_url": "https://github.com/AstraZeneca/runnable-core.git",
                    "code_identifier_message": ""
                }
            ],
            "attempts": [
                {
                    "attempt_number": 1,
                    "start_time": "2024-02-04 06:25:26.092282",
                    "end_time": "2024-02-04 06:25:26.100095",
                    "duration": "0:00:00.007813",
                    "status": "SUCCESS",
                    "message": "",
                    "parameters": {}
                }
            ],
            "user_defined_metrics": {},
            "branches": {},
            "data_catalog": [
                {
                    "name": "Create_Content.execution.log",
                    "data_hash": "b38eb7b5290ff433276a75fdd7a3935335aedff3ab5ee8714f6ea735d9c9492c",
                    "catalog_relative_path": "juicy-blackwell-0625/Create_Content.execution.log",
                    "catalog_handler_location": ".catalog",
                    "stage": "put"
                },
                {
                    "name": "data/hello.txt",
                    "data_hash": "50e75c30352e8ef442b2b5be37dd19533f9334faaf8c4e41f2b528df57d3c20c",
                    "catalog_relative_path": "juicy-blackwell-0625/data/hello.txt",
                    "catalog_handler_location": ".catalog",
                    "stage": "put"
                }
            ]
        },
        "success": {
            "name": "success",
            "internal_name": "success",
            "status": "SUCCESS",
            "step_type": "success",
            "message": "",
            "mock": false,
            "code_identities": [
                {
                    "code_identifier": "39cd98770cb2fd6994d8ac08ae4c5506e5ce694a",
                    "code_identifier_type": "git",
                    "code_identifier_dependable": true,
                    "code_identifier_url": "https://github.com/AstraZeneca/runnable-core.git",
                    "code_identifier_message": ""
                }
            ],
            "attempts": [
                {
                    "attempt_number": 1,
                    "start_time": "2024-02-04 06:25:26.165278",
                    "end_time": "2024-02-04 06:25:26.165355",
                    "duration": "0:00:00.000077",
                    "status": "SUCCESS",
                    "message": "",
                    "parameters": {}
                }
            ],
            "user_defined_metrics": {},
            "branches": {},
            "data_catalog": []
        }
    },
    "parameters": {},
    "run_config": {
        "executor": {
            "service_name": "local",
            "service_type": "executor",
            "enable_parallel": false,
            "overrides": {}
        },
        "run_log_store": {
            "service_name": "buffered",
            "service_type": "run_log_store"
        },
        "secrets_handler": {
            "service_name": "do-nothing",
            "service_type": "secrets"
        },
        "catalog_handler": {
            "service_name": "file-system",
            "service_type": "catalog",
            "catalog_location": ".catalog"
        },
        "experiment_tracker": {
            "service_name": "do-nothing",
            "service_type": "experiment_tracker"
        },
        "pipeline_file": "",
        "parameters_file": "",
        "configuration_file": "examples/configs/fs-catalog.yaml",
        "tag": "",
        "run_id": "juicy-blackwell-0625",
        "use_cached": false,
        "original_run_id": "",
        "dag": {
            "start_at": "Setup",
            "name": "",
            "description": "",
            "steps": {
                "Setup": {
                    "type": "task",
                    "name": "Setup",
                    "next": "Create Content",
                    "on_failure": "",
                    "overrides": {},
                    "catalog": null,
                    "max_attempts": 1,
                    "command": "mkdir -p data",
                    "command_type": "shell",
                    "node_name": "Setup"
                },
                "Create Content": {
                    "type": "task",
                    "name": "Create Content",
                    "next": "success",
                    "on_failure": "",
                    "overrides": {},
                    "catalog": {
                        "get": [],
                        "put": [
                            "data/hello.txt"
                        ]
                    },
                    "max_attempts": 1,
                    "command": "echo \"Hello from runnable\" >> data/hello.txt",
                    "command_type": "shell",
                    "node_name": "Create Content"
                },
                "success": {
                    "type": "success",
                    "name": "success"
                },
                "fail": {
                    "type": "fail",
                    "name": "fail"
                }
            }
        },
        "dag_hash": "",
        "execution_plan": "chained"
    }
}