diff --git a/api-python-sql.yaml b/api-python-sql.yaml
index a974853..c764ed2 100644
--- a/api-python-sql.yaml
+++ b/api-python-sql.yaml
@@ -32,7 +32,7 @@ tasks:
FROM read_csv_auto('{{ workingDir }}/in.csv', header=True)
GROUP BY brand
ORDER BY avg_price DESC;
- store: true
+ fetchType: STORE
extend:
title: Extract data from a REST API, process it in Python with Polars in a
diff --git a/business-automation.yaml b/business-automation.yaml
index 9aa578f..1aa5b57 100644
--- a/business-automation.yaml
+++ b/business-automation.yaml
@@ -39,7 +39,7 @@ tasks:
- id: query
type: io.kestra.plugin.jdbc.sqlite.Query
url: jdbc:sqlite:kestra.db
- store: true
+ fetchType: STORE
sql: |
SELECT * FROM features
ORDER BY release_version;
diff --git a/data-engineering-pipeline.yaml b/data-engineering-pipeline.yaml
index ca76b76..977e8c8 100644
--- a/data-engineering-pipeline.yaml
+++ b/data-engineering-pipeline.yaml
@@ -53,7 +53,7 @@ tasks:
FROM read_json_auto('{{ workingDir }}/products.json')
GROUP BY brand
ORDER BY avg_price DESC;
- store: true
+ fetchType: STORE
extend:
title: Getting started with Kestra — a Data Engineering Pipeline example
diff --git a/dremio-sql-python.yaml b/dremio-sql-python.yaml
index ebae8ff..d37f990 100644
--- a/dremio-sql-python.yaml
+++ b/dremio-sql-python.yaml
@@ -11,9 +11,8 @@ tasks:
url: jdbc:dremio:direct=sql.dremio.cloud:443;ssl=true;PROJECT_ID={{vars.project_id}};schema=postgres.public
username: $token
password: "{{ secret('DREMIO_TOKEN') }}"
- sql: SELECT first_name, last_name, hire_date, salary FROM
- postgres.public.employees LIMIT 100;
- store: true
+ sql: SELECT first_name, last_name, hire_date, salary FROM postgres.public.employees LIMIT 100;
+ fetchType: STORE
- id: python
type: io.kestra.plugin.scripts.python.Script
diff --git a/http-check.yaml b/http-check.yaml
index 1138f23..c1a8f47 100644
--- a/http-check.yaml
+++ b/http-check.yaml
@@ -1,13 +1,16 @@
id: http-check
namespace: company.team
+
inputs:
- id: uri
type: URI
defaults: https://kestra.io
+
tasks:
- id: api
type: io.kestra.plugin.core.http.Request
uri: "{{ inputs.uri }}"
+
- id: check_status
type: io.kestra.plugin.core.flow.If
condition: "{{ outputs.api.code != 200 }}"
@@ -15,6 +18,7 @@ tasks:
- id: unhealthy
type: io.kestra.plugin.core.log.Log
message: Server unhealthy!!! Response {{ outputs.api.body }}
+
- id: send_slack_alert
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
url: "{{ secret('SLACK_WEBHOOK') }}"
@@ -27,10 +31,12 @@ tasks:
- id: healthy
type: io.kestra.plugin.core.log.Log
message: Everything is fine!
+
triggers:
- id: daily
type: io.kestra.plugin.core.trigger.Schedule
cron: 0 9 * * *
+
extend:
title: Monitor availability of an HTTP endpoint and send a Slack alert if a
service is unhealthy
diff --git a/hubspot-to-bigquery.yaml b/hubspot-to-bigquery.yaml
index 00e0022..1476b04 100644
--- a/hubspot-to-bigquery.yaml
+++ b/hubspot-to-bigquery.yaml
@@ -1,5 +1,6 @@
id: hubspot-to-bigquery
namespace: company.team
+
tasks:
- id: sync
type: io.kestra.plugin.cloudquery.Sync
@@ -32,10 +33,12 @@ tasks:
- "*"
spec:
max_requests_per_second: 5
+
triggers:
- id: schedule
type: io.kestra.plugin.core.trigger.Schedule
cron: 0 6 * * *
+
extend:
title: Sync Hubspot CRM data to BigQuery on a schedule
description: >-
@@ -43,21 +46,18 @@ extend:
`sync` task from the CloudQuery plugin uses the `hubspot` source and the
`bigquery` destination.
-
Note how we use the `sa.json` credentials file to authenticate with GCP and
the `HUBSPOT_APP_TOKEN` environment variable to authenticate with Hubspot
CRM.
- To avoid rate limiting issues, you can set the `max_requests_per_second` parameter in the `hubspot` source configuration. In this example, we set it to 5 requests per second.
+ To avoid rate limiting issues, you can set the `max_requests_per_second` parameter in the `hubspot` source configuration. In this example, we set it to 5 requests per second.
The `schedule` trigger runs the flow every day at 6:00 AM.
-
Additionally, you can [generate an API
key](https://docs.cloudquery.io/docs/deployment/generate-api-key) to use
premium plugins. You can add the API key as an environment variable:
-
```yaml
- id: hn_to_duckdb
type: io.kestra.plugin.cloudquery.Sync
diff --git a/infrastructure-automation.yaml b/infrastructure-automation.yaml
index ed6dc60..ab79866 100644
--- a/infrastructure-automation.yaml
+++ b/infrastructure-automation.yaml
@@ -1,10 +1,12 @@
id: infrastructure-automation
namespace: tutorial
description: Infrastructure Automation
+
inputs:
- id: docker_image
type: STRING
defaults: kestra/myimage:latest
+
tasks:
- id: build_image
type: io.kestra.plugin.docker.Build
@@ -18,6 +20,7 @@ tasks:
registry: https://index.docker.io/v1/
username: "{{ secret('DOCKERHUB_USERNAME') }}"
password: "{{ secret('DOCKERHUB_PASSWORD') }}"
+
- id: run_container
type: io.kestra.plugin.docker.Run
pullPolicy: NEVER
@@ -26,6 +29,7 @@ tasks:
- pip
- show
- kestra
+
- id: run_terraform
type: io.kestra.plugin.terraform.cli.TerraformCLI
beforeCommands:
@@ -48,24 +52,19 @@ tasks:
}
}
-
provider "http" {}
-
provider "local" {}
-
variable "pokemon_names" {
type = list(string)
default = ["pikachu", "psyduck", "charmander", "bulbasaur"]
}
-
data "http" "pokemon" {
count = length(var.pokemon_names)
url = "https://pokeapi.co/api/v2/pokemon/${var.pokemon_names[count.index]}"
}
-
locals {
pokemon_details = [for i in range(length(var.pokemon_names)) : {
name = jsondecode(data.http.pokemon[i].response_body)["name"]
@@ -75,19 +74,19 @@ tasks:
file_content = join("\n\n", [for detail in local.pokemon_details : "Name: ${detail.name}\nTypes: ${detail.types}"])
}
-
resource "local_file" "pokemon_details_file" {
filename = "${path.module}/pokemon.txt"
content = local.file_content
}
-
output "file_path" {
value = local_file.pokemon_details_file.filename
}
+
- id: log_pokemon
type: io.kestra.plugin.core.log.Log
message: "{{ read(outputs.run_terraform.outputFiles['pokemon.txt']) }}"
+
extend:
title: Getting started with Kestra — an Infrastructure Automation workflow example
description: >-
@@ -98,12 +97,8 @@ extend:
The flow has four tasks:
1. The first task builds a Docker image.
-
2. The second task runs a container using the image.
-
- 3. The third task uses Terraform to create a file with details about
- Pokémon.
-
+ 3. The third task uses Terraform to create a file with details about Pokémon.
4. The fourth task logs the details about Pokémon.
tags:
- Getting Started
diff --git a/ingest-to-datalake-event-driven.yaml b/ingest-to-datalake-event-driven.yaml
index 6da51ae..de7e8db 100644
--- a/ingest-to-datalake-event-driven.yaml
+++ b/ingest-to-datalake-event-driven.yaml
@@ -14,6 +14,7 @@ tasks:
- id: clone_repository
type: io.kestra.plugin.git.Clone
url: https://github.com/kestra-io/scripts
+
- id: etl
type: io.kestra.plugin.scripts.python.Commands
warningOnStdErr: false
@@ -27,6 +28,7 @@ tasks:
commands:
- python etl/aws_iceberg_fruit.py {{ vars.destination_prefix }}/{{
trigger.objects | jq('.[].key') | first }}
+
- id: merge_query
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -43,6 +45,7 @@ tasks:
WHEN NOT MATCHED
THEN INSERT (id, fruit, berry, update_timestamp)
VALUES(r.id, r.fruit, r.berry, current_timestamp);
+
- id: optimize
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -68,13 +71,13 @@ triggers:
region: "{{ secret('AWS_DEFAULT_REGION') }}"
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
+
extend:
title: Event-driven data ingestion to AWS S3 data lake managed by Apache
Iceberg, AWS Glue and Amazon Athena
description: >-
This workflow ingests data to an S3 data lake using a Python script.
-
This script is stored in a public GitHub repository so you can directly use
this workflow as long as you adjust your AWS credentials, S3 bucket name and
the Amazon Athena table name. The script takes the detected S3 object key
diff --git a/ingest-to-datalake-git.yaml b/ingest-to-datalake-git.yaml
index 12c3425..0906ffe 100644
--- a/ingest-to-datalake-git.yaml
+++ b/ingest-to-datalake-git.yaml
@@ -1,9 +1,11 @@
id: ingest-to-datalake-git
namespace: company.team
+
variables:
bucket: kestraio
prefix: inbox
database: default
+
tasks:
- id: list_objects
type: io.kestra.plugin.aws.s3.List
@@ -12,6 +14,7 @@ tasks:
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
region: "{{ secret('AWS_DEFAULT_REGION') }}"
bucket: "{{ vars.bucket }}"
+
- id: check
type: io.kestra.plugin.core.flow.If
condition: "{{ outputs.list_objects.objects }}"
@@ -23,6 +26,7 @@ tasks:
type: io.kestra.plugin.git.Clone
url: https://github.com/kestra-io/scripts
branch: main
+
- id: ingest_to_datalake
type: io.kestra.plugin.scripts.python.Commands
warningOnStdErr: false
@@ -35,6 +39,7 @@ tasks:
containerImage: ghcr.io/kestra-io/aws:latest
commands:
- python etl/aws_iceberg_fruit.py
+
- id: merge_query
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -51,6 +56,7 @@ tasks:
WHEN NOT MATCHED
THEN INSERT (id, fruit, berry, update_timestamp)
VALUES(r.id, r.fruit, r.berry, current_timestamp);
+
- id: optimize
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -60,6 +66,7 @@ tasks:
outputLocation: s3://{{ vars.bucket }}/query_results/
query: |
OPTIMIZE fruits REWRITE DATA USING BIN_PACK;
+
- id: move_to_archive
type: io.kestra.plugin.aws.cli.AwsCLI
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -68,11 +75,13 @@ tasks:
commands:
- aws s3 mv s3://{{ vars.bucket }}/{{ vars.prefix }}/ s3://{{
vars.bucket }}/archive/{{ vars.prefix }}/ --recursive
+
triggers:
- id: hourly_schedule
type: io.kestra.plugin.core.trigger.Schedule
disabled: true
cron: "@hourly"
+
extend:
title: Ingest data to AWS S3 with Git, Python, Apache Iceberg, AWS Glue and
Amazon Athena
diff --git a/ingest-to-datalake-inline-python.yaml b/ingest-to-datalake-inline-python.yaml
index 089921a..4b363a2 100644
--- a/ingest-to-datalake-inline-python.yaml
+++ b/ingest-to-datalake-inline-python.yaml
@@ -1,9 +1,11 @@
id: ingest-to-datalake-inline-python
namespace: company.team
+
variables:
bucket: kestraio
prefix: inbox
database: default
+
tasks:
- id: list_objects
type: io.kestra.plugin.aws.s3.List
@@ -12,6 +14,7 @@ tasks:
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
region: "{{ secret('AWS_DEFAULT_REGION') }}"
bucket: "{{ vars.bucket }}"
+
- id: check
type: io.kestra.plugin.core.flow.If
condition: "{{outputs.list_objects.objects}}"
@@ -28,51 +31,33 @@ tasks:
containerImage: ghcr.io/kestra-io/aws:latest
script: >
import awswrangler as wr
-
from kestra import Kestra
-
# Iceberg table
-
BUCKET_NAME = "{{ vars.bucket }}"
-
DATABASE = "{{ vars.database }}"
-
TABLE = "raw_fruits"
-
# Iceberg table's location
-
S3_PATH = f"s3://{BUCKET_NAME}/{TABLE}"
-
S3_PATH_TMP = f"{S3_PATH}_tmp"
-
# File to ingest
-
PREFIX = "{{ vars.prefix }}"
-
INGEST_S3_KEY_PATH = f"s3://{BUCKET_NAME}/{PREFIX}/"
-
df = wr.s3.read_csv(INGEST_S3_KEY_PATH)
-
nr_rows = df.id.nunique()
-
print(f"Ingesting {nr_rows} rows")
Kestra.counter("nr_rows", nr_rows, {"table": TABLE})
-
df = df[~df["fruit"].isin(["Blueberry", "Banana"])]
-
df = df.drop_duplicates(subset=["fruit"], ignore_index=True,
keep="first")
-
wr.catalog.delete_table_if_exists(database=DATABASE, table=TABLE)
-
wr.athena.to_iceberg(
df=df,
database=DATABASE,
@@ -84,6 +69,7 @@ tasks:
)
print(f"New data successfully ingested into {S3_PATH}")
+
- id: merge_query
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -100,6 +86,7 @@ tasks:
WHEN NOT MATCHED
THEN INSERT (id, fruit, berry, update_timestamp)
VALUES(r.id, r.fruit, r.berry, current_timestamp);
+
- id: optimize
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -109,19 +96,21 @@ tasks:
outputLocation: s3://{{ vars.bucket }}/query_results/
query: |
OPTIMIZE fruits REWRITE DATA USING BIN_PACK;
+
- id: move_to_archive
type: io.kestra.plugin.aws.cli.AwsCLI
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
region: "{{ secret('AWS_DEFAULT_REGION') }}"
commands:
- - aws s3 mv s3://{{ vars.bucket }}/{{ vars.prefix }}/ s3://{{
- vars.bucket }}/archive/{{ vars.prefix }}/ --recursive
+ - aws s3 mv s3://{{ vars.bucket }}/{{ vars.prefix }}/ s3://{{ vars.bucket }}/archive/{{ vars.prefix }}/ --recursive
+
triggers:
- id: hourly_schedule
type: io.kestra.plugin.core.trigger.Schedule
cron: "@hourly"
disabled: true
+
extend:
title: Ingest data to AWS S3 with Python, Apache Iceberg, AWS Glue and Athena
description: >-
diff --git a/input-file-upload-gcs.yaml b/input-file-upload-gcs.yaml
index 74be228..d7b37d6 100644
--- a/input-file-upload-gcs.yaml
+++ b/input-file-upload-gcs.yaml
@@ -1,21 +1,23 @@
id: input-file-upload-gcs
namespace: company.team
+
inputs:
- id: file
type: FILE
- id: rename
type: STRING
+
tasks:
- id: upload
type: io.kestra.plugin.gcp.gcs.Upload
from: "{{ inputs.file }}"
to: gs://kestra-demo/{{ inputs.rename }}
+
extend:
title: Read a file from inputs and upload it to GCS
description: >-
This blueprint shows how to read a file from inputs and upload it to GCS.
-
> Note: Authentication to GCP can be done by setting the
`GOOGLE_APPLICATION_CREDENTIALS` variable in environment (via a service
account for example).
diff --git a/input-file.yaml b/input-file.yaml
index 7345879..1da9b3c 100644
--- a/input-file.yaml
+++ b/input-file.yaml
@@ -1,8 +1,10 @@
id: input-file
namespace: company.team
+
inputs:
- id: text_file
type: FILE
+
tasks:
- id: read_file
type: io.kestra.plugin.scripts.shell.Commands
@@ -10,6 +12,7 @@ tasks:
type: io.kestra.plugin.core.runner.Process
commands:
- cat "{{ inputs.text_file }}"
+
extend:
title: Read a file from inputs - a parametrized workflow with files input parameters
description: This example shows how to read a file from flow inputs.
diff --git a/json-from-api-to-mongodb.yaml b/json-from-api-to-mongodb.yaml
index ad39bb8..cdbf4e4 100644
--- a/json-from-api-to-mongodb.yaml
+++ b/json-from-api-to-mongodb.yaml
@@ -1,5 +1,6 @@
id: json-from-api-to-mongodb
namespace: company.team
+
tasks:
- id: generate_json
type: io.kestra.plugin.scripts.python.Script
@@ -20,6 +21,7 @@ tasks:
json.dump(data, output_file)
Kestra.outputs({'data': data, 'status': response.status_code})
+
- id: load_to_mongodb
type: io.kestra.plugin.mongodb.Load
connection:
@@ -27,6 +29,7 @@ tasks:
database: local
collection: github
from: "{{ outputs.generate_json.outputFiles['output.json'] }}"
+
extend:
title: Scrape API in a Python task running in a Docker container and load the
JSON document to a MongoDB collection
diff --git a/kafka-realtime-trigger.yaml b/kafka-realtime-trigger.yaml
index e18a674..6f7cb7f 100644
--- a/kafka-realtime-trigger.yaml
+++ b/kafka-realtime-trigger.yaml
@@ -1,5 +1,6 @@
id: kafka-realtime-trigger
namespace: company.team
+
tasks:
- id: insert_into_mongodb
type: io.kestra.plugin.mongodb.InsertOne
@@ -14,6 +15,7 @@ tasks:
"category": "{{ trigger.value | jq('.product_category') | first }}",
"brand": "{{ trigger.value | jq('.brand') | first }}"
}
+
triggers:
- id: realtime_trigger
type: io.kestra.plugin.kafka.RealtimeTrigger
@@ -23,76 +25,57 @@ triggers:
serdeProperties:
valueDeserializer: JSON
groupId: kestraConsumer
+
extend:
title: Use Kafka Realtime Trigger to push events into MongoDB
description: >
This flow will:
- 1. Get
- [triggered](https://kestra.io/plugins/plugin-kafka/triggers/io.kestra.plugin.kafka.realtimetrigger)
+ 1. Get [triggered](https://kestra.io/plugins/plugin-kafka/triggers/io.kestra.plugin.kafka.realtimetrigger)
every time the event lands in Kafka
2. The flow will push the data onto a collection in MongoDB using the
[InsertOne
task](https://kestra.io/plugins/plugin-mongodb/tasks/io.kestra.plugin.mongodb.insertone)
-
To setup Apache Kafka locally, follow the instructions mentioned in the
[official documentation](https://kafka.apache.org/quickstart). Once Apache
Kafka is installed, you can create the `products` topic, and start producing
data into the topic using the following commands:
```
-
# Create topic
-
$ bin/kafka-topics.sh --create --topic products --bootstrap-server
localhost:9092
-
# Produce data into Kafka topic
-
$ bin/kafka-console-producer.sh --topic products --bootstrap-server
localhost:9092
> {"product_id": 1, "product_name": "streamline turn-key systems",
"product_category": "Electronics", "brand": "gomez"}
-
```
-
To setup MongoDB server locally, you can use the following docker command:
-
```
-
docker run -d --name my-mongo \
-
- -e MONGO_INITDB_ROOT_USERNAME=mongoadmin \
-
- -e MONGO_INITDB_ROOT_PASSWORD=secret \
-
- -p 27017:27017 mongo
-
+ -e MONGO_INITDB_ROOT_USERNAME=mongoadmin \
+ -e MONGO_INITDB_ROOT_PASSWORD=secret \
+ -p 27017:27017 mongo
```
-
- You can use [MongoDB
- Compass](https://www.mongodb.com/products/tools/compass) as the UI client to
+ You can use [MongoDB Compass](https://www.mongodb.com/products/tools/compass) as the UI client to
work with MongoDB.
-
We are using the product JSON records generated from
[products.csv](https://huggingface.co/datasets/kestra/datasets/raw/main/csv/products.csv)
- in this blueprint. A sample event that can be produced into Kafka topic
- `products` can be:
-
+ in this blueprint. A sample event that can be produced into Kafka topic `products` can be:
```
-
{"product_id": 1, "product_name": "streamline turn-key systems",
"product_category": "Electronics", "brand": "gomez"}
-
```
+
tags:
- Realtime Trigger
- Queue
diff --git a/kubernetes-script-runner.yaml b/kubernetes-script-runner.yaml
index aac782f..0748fcf 100644
--- a/kubernetes-script-runner.yaml
+++ b/kubernetes-script-runner.yaml
@@ -1,5 +1,6 @@
id: kubernetes-script-runner
namespace: company.team
+
tasks:
- id: send_data
type: io.kestra.plugin.scripts.python.Script
@@ -34,7 +35,6 @@ tasks:
platform = platform.platform()
os_arch = f"{sys.platform}/{platform.machine()}"
-
def print_environment_info():
print(f"Host's network name: {host}")
print(f"Python version: {py_version}")
@@ -53,9 +53,9 @@ tasks:
with open(filename, "w") as json_file:
json.dump(env_info, json_file, indent=4)
-
if __name__ == '__main__':
print_environment_info()
+
extend:
title: Run a Python script in a Kubernetes pod
description: >-
diff --git a/limit-memory.yaml b/limit-memory.yaml
index 5c1df88..f3aef5a 100644
--- a/limit-memory.yaml
+++ b/limit-memory.yaml
@@ -1,5 +1,6 @@
id: limit-memory
namespace: company.team
+
tasks:
- id: docker_memory
type: io.kestra.plugin.scripts.python.Script
@@ -11,6 +12,7 @@ tasks:
script: |
import time
time.sleep(2)
+
extend:
title: Limit Docker container memory to 500MB for a Python script
description: The example below will use no more than 500MB of memory for the
diff --git a/listen-debezium.yaml b/listen-debezium.yaml
index 95da421..c47843e 100644
--- a/listen-debezium.yaml
+++ b/listen-debezium.yaml
@@ -1,5 +1,6 @@
id: listen-debezium
namespace: company.team
+
tasks:
- id: slack_notificaiton
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
@@ -9,9 +10,11 @@ tasks:
"channel": "U052JMPLBM3",
"text": "{{ trigger.size }} new rows have been added to the database"
}
+
- id: json
type: io.kestra.plugin.serdes.json.IonToJson
from: "{{ trigger.uris['postgres.order'] }}"
+
- id: python
type: io.kestra.plugin.scripts.python.Script
script: |
@@ -21,6 +24,7 @@ tasks:
data = json.load(fopen)
print(data)
+
triggers:
- id: listen_debezium
type: io.kestra.plugin.debezium.postgres.Trigger
@@ -33,6 +37,7 @@ triggers:
snapshotMode: INITIAL
format: INLINE
interval: PT30S
+
extend:
title: Use Debezium to trigger a flow whenever new entries hit a Postgres
database, then send notification to Slack and process data in Python
@@ -44,16 +49,11 @@ extend:
notification through Slack with the number of rows ingested and then execute
a Python script that read the corresponding data in json.
-
-
This blueprint can be reproduced with the following `docker-compose.yml`
setup
-
```
-
services:
-
db:
image: debezium/postgres:latest
restart: always
@@ -67,11 +67,11 @@ extend:
restart: always
ports:
- 8082:8080
- ```
+ ```
- You can access localhost:8082 to create and edit databases or tables via the adminer interface. The database is accessible on `5433` port.
+ You can access localhost:8082 to create and edit databases or tables via the adminer interface. The database is accessible on `5433` port.
- Note that depending of your database installation, you might need to change the `pluginName` property of the debezium plugin. Other options can be seen in corresponding documentation.
+ Note that depending of your database installation, you might need to change the `pluginName` property of the debezium plugin. Other options can be seen in corresponding documentation.
tags:
- Postgres
- Trigger
diff --git a/load-multiple-csv-files-into-excel.yaml b/load-multiple-csv-files-into-excel.yaml
index e5c677a..2cb33ed 100644
--- a/load-multiple-csv-files-into-excel.yaml
+++ b/load-multiple-csv-files-into-excel.yaml
@@ -1,23 +1,29 @@
id: load-multiple-csv-files-into-excel
namespace: company.team
+
tasks:
- id: dataset1
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/products.csv
+
- id: dataset2
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/fruit.csv
+
- id: convert1
type: io.kestra.plugin.serdes.csv.CsvToIon
from: "{{ outputs.dataset1.uri }}"
+
- id: convert2
type: io.kestra.plugin.serdes.csv.CsvToIon
from: "{{ outputs.dataset2.uri }}"
+
- id: write_to_excel
type: io.kestra.plugin.serdes.excel.IonToExcel
from:
Sheet_1: "{{ outputs.convert1.uri }}"
Sheet_2: "{{ outputs.convert2.uri }}"
+
extend:
title: Load multiple CSV files from an HTTP API into Excel Sheets
description: >
@@ -26,20 +32,14 @@ extend:
`IonToExcel` plugins to download the CSV files, convert them to Ion format,
and write them to an Excel file.
-
The flow is composed of the following tasks:
1. Download the first CSV file from an HTTP API.
-
2. Download the second CSV file from an HTTP API.
-
3. Convert the first CSV file to Ion format.
-
4. Convert the second CSV file to Ion format.
-
5. Write the Ion data to an Excel file with two sheets.
-
The flow can be used to download multiple CSV files from an HTTP API and
load them into separate sheets in an Excel file. It's useful if you need to
get data from external sources and store them in Excel format.
diff --git a/load-pokemon.yaml b/load-pokemon.yaml
index 6f68076..c101c6e 100644
--- a/load-pokemon.yaml
+++ b/load-pokemon.yaml
@@ -1,14 +1,17 @@
id: load-pokemon
namespace: company.team
+
inputs:
- id: pokemon
type: STRING
defaults: psyduck
+
tasks:
- id: fetch_pokemon
type: io.kestra.plugin.core.http.Request
uri: https://pokeapi.co/api/v2/pokemon/{{ inputs.pokemon }}
method: GET
+
- id: load
type: io.kestra.plugin.mongodb.InsertOne
connection:
@@ -16,6 +19,7 @@ tasks:
database: local
collection: pokemon
document: "{{ outputs.fetch_pokemon.body }}"
+
extend:
title: Extract JSON data from an API and load it as a document to MongoDB
description: >-
diff --git a/load-to-cloud-storage.yaml b/load-to-cloud-storage.yaml
index b588344..f92a3be 100644
--- a/load-to-cloud-storage.yaml
+++ b/load-to-cloud-storage.yaml
@@ -1,20 +1,22 @@
id: load-to-cloud-storage
namespace: company.team
+
tasks:
- id: data
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/orders.csv
+
- id: cloud_storage
type: io.kestra.plugin.gcp.gcs.Upload
from: "{{ outputs.data.uri }}"
to: gs://kestra-demo/data.csv
+
extend:
title: Download data and upload to Google Cloud Storage
description: >-
This blueprint shows how to download a CSV file via http Download, and
upload it to GCS.
-
> Note: Authentication to GCP can be done by setting the
`GOOGLE_APPLICATION_CREDENTIALS` variable in environment (via a service
account for example).
diff --git a/log-flow.yaml b/log-flow.yaml
index 7a6c590..d5eb136 100644
--- a/log-flow.yaml
+++ b/log-flow.yaml
@@ -1,9 +1,11 @@
id: log-flow
namespace: company.team
+
tasks:
- id: log
type: io.kestra.plugin.core.log.Log
message: Hello world!
+
extend:
title: Log content in the console
description: A simple example to show how to display message in the console.
diff --git a/loguru.yaml b/loguru.yaml
index d8d98ed..ae5a8cf 100644
--- a/loguru.yaml
+++ b/loguru.yaml
@@ -1,9 +1,11 @@
id: loguru
namespace: company.team
+
inputs:
- id: nr_logs
type: INT
defaults: 100
+
tasks:
- id: reproducer
type: io.kestra.plugin.scripts.python.Script
@@ -13,22 +15,14 @@ tasks:
containerImage: ghcr.io/kestra-io/pydata:latest
script: >
from loguru import logger
-
from faker import Faker
-
import time
-
import sys
-
logger.remove()
-
logger.add(sys.stdout, level="INFO")
-
logger.add(sys.stderr, level="WARNING")
-
-
def generate_logs(fake, num_logs):
logger.debug("This message will not show up as the log level is set to INFO")
logger.warning("Starting to generate log messages")
@@ -38,10 +32,10 @@ tasks:
time.sleep(0.01)
logger.warning("Finished generating log messages")
-
if __name__ == "__main__":
faker_ = Faker()
generate_logs(faker_, int("{{ inputs.nr_logs }}"))
+
extend:
title: Logging configuration in a Python script using Loguru
description: >-
@@ -50,7 +44,6 @@ extend:
by default, 100 random log messages, but this number of logs can be changed
at runtime using the input parameter `nr_logs`.
-
- The `warningOnStdErr` property is set to `false` to prevent the `Script`
task from failing when the `logger.warning` method is used.
@@ -61,7 +54,6 @@ extend:
- The `script` property contains the Python code that will be executed by
the `Script` task.
-
The log level is set to `INFO` in the `Script` task. Therefore, the
`logger.debug` message will NOT show up in the logs. The `logger.warning`
messages will be translated to WARN-level logs in Kestra. The `logger.info`
diff --git a/manage-aiven-resources-from-cli.yaml b/manage-aiven-resources-from-cli.yaml
index 04d6ba8..38990a5 100644
--- a/manage-aiven-resources-from-cli.yaml
+++ b/manage-aiven-resources-from-cli.yaml
@@ -1,5 +1,6 @@
id: manage-aiven-resources-from-cli
namespace: company.team
+
tasks:
- id: cli
type: io.kestra.plugin.scripts.python.Commands
@@ -17,10 +18,12 @@ tasks:
--power-on
env:
AVN_AUTH_TOKEN: "{{ secret('AVN_AUTH_TOKEN') }}"
+
triggers:
- id: every_morning
type: io.kestra.plugin.core.trigger.Schedule
cron: 0 9 * * *
+
extend:
title: Manage Aiven resources from the CLI — start and stop services or
databases on schedule
@@ -32,25 +35,20 @@ extend:
API token. It's recommended to use Secrets to store sensitive data such as
API tokens.
-
Once you've configured the Aiven secret, you can reproduce this flow without
any changes.
-
The first command is great to test the setup — the command will just list
your Aiven projects. However, there is a lot more you can do with the Aiven
CLI. Check out the [Aiven CLI
guide](https://aiven.io/developer/aiven-cmdline) for more information.
-
For example, you can use it to start and stop specific services in your
Aiven projects using scheduled flows in Kestra. This is useful if you want
to save money by stopping your services when you don't need them, e.g. at
night or during the weekend.
-
- You can also use the CLI to create and delete services or databases on
- demand.
+ You can also use the CLI to create and delete services or databases on demand.
tags:
- CLI
- Python
diff --git a/metrics-from-shell-commands.yaml b/metrics-from-shell-commands.yaml
index d9330de..7ebb60c 100644
--- a/metrics-from-shell-commands.yaml
+++ b/metrics-from-shell-commands.yaml
@@ -1,16 +1,17 @@
id: metrics-from-shell-commands
namespace: company.team
+
tasks:
- id: process
type: io.kestra.plugin.scripts.shell.Commands
commands:
- echo '::{"metrics":[{"name":"count","type":"counter","value":1}]}::'
+
extend:
title: Expose custom metrics from a Shell script
description: >
This blueprint shows how to expose metrics within a Shell script.
-
Metrics are intended to track custom numeric (metric `type: counter`) or
duration (metric `type: timer`) attributes that you may want to visualize
across task runs and flow executions.
diff --git a/microservices-and-apis.yaml b/microservices-and-apis.yaml
index 31c4e84..ee6d781 100644
--- a/microservices-and-apis.yaml
+++ b/microservices-and-apis.yaml
@@ -1,6 +1,7 @@
id: microservices-and-apis
namespace: tutorial
description: Microservices and APIs
+
inputs:
- id: server_uri
type: URI
@@ -8,6 +9,7 @@ inputs:
- id: slack_webhook_uri
type: URI
defaults: https://reqres.in/api/slack
+
tasks:
- id: http_status_check
type: io.kestra.plugin.core.flow.AllowFailure
@@ -15,6 +17,7 @@ tasks:
- id: http_request
type: io.kestra.plugin.core.http.Request
uri: "{{ inputs.server_uri }}"
+
- id: check_status
type: io.kestra.plugin.core.flow.If
condition: "{{ outputs.http_request.code != 200 }}"
@@ -22,6 +25,7 @@ tasks:
- id: unhealthy
type: io.kestra.plugin.core.log.Log
message: Server is unhealthy! Response {{ outputs.http_request.body }}
+
- id: send_slack_alert
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
url: "{{ inputs.slack_webhook_uri }}"
@@ -34,6 +38,7 @@ tasks:
- id: healthy
type: io.kestra.plugin.core.log.Log
message: Everything is fine!
+
errors:
- id: server_unreachable
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
@@ -43,25 +48,23 @@ tasks:
"channel": "#alerts",
"text": "The server {{ inputs.server_uri }} is unreachable!"
}
+
triggers:
- id: daily
type: io.kestra.plugin.core.trigger.Schedule
disabled: true
cron: 0 9 * * *
+
extend:
title: Getting started with Kestra — a Microservices and APIs workflow example
description: >-
This flow is a simple example of a microservices and APIs use case. It
checks the health of a server and sends a Slack alert if the server is down.
-
The flow has two tasks:
-
1. The first task checks the health of a server.
-
2. The second task sends a Slack alert if the server is down.
-
The flow also has a trigger that runs the flow daily at 9:00 AM to check the
server's health regularly.
tags:
diff --git a/monthly-sales-report.yaml b/monthly-sales-report.yaml
index a7a9c01..ad01a44 100644
--- a/monthly-sales-report.yaml
+++ b/monthly-sales-report.yaml
@@ -1,7 +1,9 @@
id: monthly-sales-report
namespace: company.team
+
variables:
bucket: kestraio
+
tasks:
- id: raw_data_to_s3
type: io.kestra.plugin.scripts.python.Script
@@ -14,16 +16,11 @@ tasks:
AWS_DEFAULT_REGION: "{{ secret('AWS_DEFAULT_REGION') }}"
script: >
import requests
-
import boto3
-
from kestra import Kestra
-
BUCKET = "{{ vars.bucket }}"
-
-
def extract_and_upload(file):
url = f"https://huggingface.co/datasets/kestra/datasets/blob/main/{file}"
@@ -37,6 +34,7 @@ tasks:
filename = f"monthly_orders/2023_{str(month).zfill(2)}.csv"
extract_and_upload(filename)
Kestra.outputs({f"{filename}": f"s3://{BUCKET}/{filename}"})
+
- id: query
type: io.kestra.plugin.jdbc.duckdb.Query
sql: |
@@ -49,11 +47,13 @@ tasks:
FROM read_csv_auto('s3://kestraio/monthly_orders/*.csv', FILENAME = 1)
GROUP BY 1
ORDER BY 2 desc;
- store: true
+ fetchType: STORE
timeout: PT30S
+
- id: csv
type: io.kestra.plugin.serdes.csv.IonToCsv
from: "{{ outputs.query.uri }}"
+
- id: email
type: io.kestra.plugin.notifications.mail.MailSend
subject: The monthly sales report is ready
@@ -71,10 +71,12 @@ tasks:
Please find attached the current sales report.
Best regards,
Data Team
+
triggers:
- id: monthly
type: io.kestra.plugin.core.trigger.Schedule
cron: 0 9 1 * *
+
extend:
title: Upload data to S3 in Python using boto3, transform it in a SQL query with
DuckDB and send a CSV report via email every first day of the month
@@ -82,19 +84,14 @@ extend:
Replace the S3 bucket `kestraio` with your bucket name to reproduce the
example.
-
This flow assumes:
-
- an in-process DuckDB
-
- AWS credentials with S3 access permissions stored using Kestra Secret.
-
If you use [MotherDuck](https://motherduck.com/) and [MotherDuck's managed
S3 secrets](https://motherduck.com/docs/authenticating-to-s3), you can
replace the `query` task with the following simpler configuration:
-
```yaml
- id: query
type: io.kestra.plugin.jdbc.duckdb.Query
@@ -103,7 +100,7 @@ extend:
FROM read_csv_auto('s3://{{vars.bucket}}/monthly_orders/*.csv', FILENAME = 1)
GROUP BY 1
ORDER BY 2 desc;
- store: true
+ fetchType: STORE
timeout: PT30S
url: "jdbc:duckdb:md:my_db?motherduck_token={{ secret('MOTHERDUCK_TOKEN') }}"
```
diff --git a/motherduck.yaml b/motherduck.yaml
index dad0429..e73354d 100644
--- a/motherduck.yaml
+++ b/motherduck.yaml
@@ -8,7 +8,7 @@ tasks:
FROM sample_data.hn.hacker_news
GROUP BY by
ORDER BY nr_comments DESC;
- store: true
+ fetchType: STORE
- id: csv
type: io.kestra.plugin.serdes.csv.IonToCsv
from: "{{ outputs.query.uri }}"
diff --git a/new-shell.yaml b/new-shell.yaml
index 7cbf05b..6811ebb 100644
--- a/new-shell.yaml
+++ b/new-shell.yaml
@@ -1,8 +1,10 @@
id: new-shell
namespace: company.team
+
variables:
project_id: myProjectId
region: eu-west-2
+
tasks:
- id: shell
type: io.kestra.plugin.scripts.shell.Commands
@@ -13,25 +15,20 @@ tasks:
serviceAccount: "{{ secret('GOOGLE_SA') }}"
commands:
- echo "Hello World"
+
extend:
title: Run a Shell script on Google Cloud with Cloud Run
description: >-
This flow runs a simple Shell command in a Cloud Run container.
-
The `containerImage` property is required because Cloud Run executes tasks
as containers. You can use any image from a public or private registry.
-
Your service account needs to have the following IAM roles attached to use
the service:
-
- Cloud Run Developer
-
- Logs Viewer
-
- Storage Admin (to upload files to GCS and download files from GCS)
-
- Owner/Editor of the Compute Engine default service account (to be able to
provision compute resources for the Cloud Run container)
tags:
diff --git a/node-custom-package.yaml b/node-custom-package.yaml
index 8dbe8bb..b5bcd11 100644
--- a/node-custom-package.yaml
+++ b/node-custom-package.yaml
@@ -1,5 +1,6 @@
id: node-custom-package
namespace: company.team
+
tasks:
- id: script
type: io.kestra.plugin.scripts.node.Script
@@ -18,6 +19,7 @@ tasks:
script: |
import colors from 'colors';
console.log(colors.red("Hello"));
+
extend:
title: Install custom Node packages from package.json before running a Node.js
script
diff --git a/notify-about-github-stars-via-slack.yaml b/notify-about-github-stars-via-slack.yaml
index 290b04e..7f83c7e 100644
--- a/notify-about-github-stars-via-slack.yaml
+++ b/notify-about-github-stars-via-slack.yaml
@@ -1,9 +1,11 @@
id: notify-about-github-stars-via-slack
namespace: company.team
+
inputs:
- id: repo
type: STRING
defaults: kestra-io/kestra
+
tasks:
- id: api_query
type: io.kestra.plugin.core.http.Request
@@ -11,17 +13,17 @@ tasks:
headers:
User-Agent: kestra
uri: https://api.github.com/repos/{{inputs.repo}}
+
- id: get_stars
type: io.kestra.plugin.core.log.Log
- message: ✨✨✨ Total GitHub stars {{json(outputs.api_query.body).stargazers_count
- }} ✨✨✨
+ message: ✨✨✨ Total GitHub stars {{ json(outputs.api_query.body).stargazers_count }} ✨✨✨
+
extend:
title: Extract field from JSON object in API call and pass to subsequent task
description: >+
A common use-case may be to retrieve a specific field from a JSON payload in
an API request and use that further downstream.
-
In this simple example we will query the number of stars for a given GitHub
repo and then output it as a message.
diff --git a/on-demand-cluster-job.yaml b/on-demand-cluster-job.yaml
index 1cd4bc7..f036d6c 100644
--- a/on-demand-cluster-job.yaml
+++ b/on-demand-cluster-job.yaml
@@ -1,5 +1,6 @@
id: on-demand-cluster-job
namespace: company.team
+
tasks:
- id: create_cluster
type: io.kestra.plugin.databricks.cluster.CreateCluster
@@ -10,6 +11,7 @@ tasks:
nodeTypeId: n2-highmem-4
numWorkers: 1
sparkVersion: 13.0.x-scala2.12
+
- id: allow_failure
type: io.kestra.plugin.core.flow.AllowFailure
tasks:
@@ -25,12 +27,14 @@ tasks:
pythonFile: /Shared/hello.py
sparkPythonTaskSource: WORKSPACE
waitForCompletion: PT5M
+
- id: delete_cluster
type: io.kestra.plugin.databricks.cluster.DeleteCluster
authentication:
token: "{{ secret('DATABRICKS_TOKEN') }}"
host: "{{ secret('DATABRICKS_HOST') }}"
clusterId: "{{ outputs.create_cluster.clusterId }}"
+
extend:
title: Run a task on an on-demand Databricks cluster
description: >-
@@ -44,7 +48,6 @@ extend:
minutes (as declared on the `waitForCompletion` property) for the task to
complete.
-
Even if the job fails, the `AllowFailure` tasks ensures that Databricks
cluster will be deleted in the end.
tags:
diff --git a/on-failure-alert.yaml b/on-failure-alert.yaml
index 44054a7..6f91011 100644
--- a/on-failure-alert.yaml
+++ b/on-failure-alert.yaml
@@ -1,5 +1,6 @@
id: on-failure-alert
namespace: company.team
+
tasks:
- id: fail
type: io.kestra.plugin.scripts.shell.Commands
@@ -7,6 +8,7 @@ tasks:
type: io.kestra.plugin.core.runner.Process
commands:
- exit 1
+
errors:
- id: slack
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
@@ -16,6 +18,7 @@ errors:
"channel": "#alerts",
"text": "Failure alert for flow {{ flow.namespace }}.{{ flow.id }} with ID {{ execution.id }}"
}
+
extend:
title: "Error handling: send Slack alert on failure"
description: This flow will fail and the `errors` section declares tasks that
diff --git a/openai-dall-e-create-image.yaml b/openai-dall-e-create-image.yaml
index 16944f0..29fbf88 100644
--- a/openai-dall-e-create-image.yaml
+++ b/openai-dall-e-create-image.yaml
@@ -1,5 +1,6 @@
id: openai-dall-e-create-image
namespace: company.team
+
tasks:
- id: puppy
type: io.kestra.plugin.openai.CreateImage
@@ -7,6 +8,7 @@ tasks:
n: 1
download: true
prompt: the cutest little happy smiling puppy
+
extend:
title: Create an image using OpenAI's DALL-E
description: >-
@@ -14,10 +16,8 @@ extend:
you set the download attribute to `true`, the image will be available for
download from the Outputs tab on the Executions page.
-
Example result:
-
![dog](https://storage.googleapis.com/strapi--kestra-prd/dog_bf751be6a4/dog_bf751be6a4.png)
tags:
- AI
diff --git a/openai.yaml b/openai.yaml
index 30fc11f..9fddb76 100644
--- a/openai.yaml
+++ b/openai.yaml
@@ -1,21 +1,23 @@
id: openai
namespace: company.team
+
tasks:
- id: prompt
type: io.kestra.plugin.openai.ChatCompletion
apiKey: "{{ secret('OPENAI_API_KEY') }}"
model: gpt-4
prompt: Explain in one sentence why data engineers build data pipelines
+
- id: use_output
type: io.kestra.plugin.core.log.Log
message: "{{ outputs.prompt.choices | jq('.[].message.content') | first }}"
+
extend:
title: Send a prompt to OpenAI's ChatCompletion API
description: >-
This flow will send a prompt to OpenAI. You can select the desired model and
additional configuration such as temperature.
-
The next task shows how you can retrieve the message content from the API
response.
tags:
diff --git a/opsgenie-notify-on-failure.yaml b/opsgenie-notify-on-failure.yaml
index b6a5ee3..6bded87 100644
--- a/opsgenie-notify-on-failure.yaml
+++ b/opsgenie-notify-on-failure.yaml
@@ -1,5 +1,6 @@
id: opsgenie-notify-on-failure
namespace: company.team
+
tasks:
- id: send_notification
type: io.kestra.plugin.notifications.opsgenie.OpsgenieExecution
@@ -23,6 +24,7 @@ tasks:
- Execution
authorizationToken: sampleAuthorizationToken
executionId: "{{ trigger.executionId }}"
+
triggers:
- id: on_failure
type: io.kestra.plugin.core.trigger.Flow
@@ -34,40 +36,33 @@ triggers:
- type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
namespace: company
comparison: PREFIX
+
extend:
title: Send a notification via Opsgenie when a workflow fails
description: >-
This system flow will send a notification via Opsgenie anytime a workflow in
a `company` namespace (or any nested child namespace) fails.
-
Using this pattern, you can send notifications for Kestra workflow execution
failures alongside other notifications.
-
You can customize that system flow by modifying the task, adding more tasks
to the flow or adjusting the trigger conditions. Read more about that
pattern in the [Administrator
Guide](https://kestra.io/docs/administrator-guide/monitoring).
-
Let's create a flow in the namespace with prefix `company` that will always
fail.
-
```yaml
-
id: failure_flow
-
namespace: company.team
-
tasks:
- id: always_fails
type: io.kestra.plugin.core.execution.Fail
```
-
Whenever you run the `failure_flow`, it will trigger an execution of the
`opsgenie_notify_on_failure` flow. As a result, a notification will be sent
using Opsgenie so that prompt action can be taken.
diff --git a/outputs-from-shell-commands.yaml b/outputs-from-shell-commands.yaml
index 9e54cda..616c59b 100644
--- a/outputs-from-shell-commands.yaml
+++ b/outputs-from-shell-commands.yaml
@@ -1,19 +1,21 @@
id: outputs-from-shell-commands
namespace: company.team
+
tasks:
- id: process
type: io.kestra.plugin.scripts.shell.Commands
commands:
- echo '::{"outputs":{"test":"value","int":2,"bool":true,"float":3.65}}::'
+
- id: return
type: io.kestra.plugin.core.debug.Return
format: "{{ outputs.process.vars.test }}"
+
extend:
title: Create custom outputs from a Shell script
description: >-
This blueprint shows how to expose custom outputs from a shell script.
-
The `::{"outputs":{"test":"value"}}::` allow to expose your data in task
output. Those outputs are accessible through the `{{
outputs..vars. }}` command in other tasks.
diff --git a/papermill-notebook.yaml b/papermill-notebook.yaml
index d950bae..b731887 100644
--- a/papermill-notebook.yaml
+++ b/papermill-notebook.yaml
@@ -1,5 +1,6 @@
id: papermill-notebook
namespace: company.team
+
tasks:
- id: python
type: io.kestra.plugin.scripts.python.Commands
@@ -12,13 +13,13 @@ tasks:
- papermill src/example.ipynb.py output.ipynb -k python3.12.0
outputFiles:
- output.ipynb
+
extend:
title: Run a Papermill notebook
description: >-
This blueprint shows how to execute a Jupyter Notebook within a Kestra flow
using the Papermill library.
-
Here we use Namespace Files where we created the `src/example.ipynb.py`
notebook. We expose the outputs of the notebook execution into the
`output.ipynb` file.
diff --git a/parallel-files.yaml b/parallel-files.yaml
index e011c01..1e0a727 100644
--- a/parallel-files.yaml
+++ b/parallel-files.yaml
@@ -1,5 +1,6 @@
id: parallel-files
namespace: company.team
+
tasks:
- id: bash
type: io.kestra.plugin.scripts.shell.Commands
@@ -13,6 +14,7 @@ tasks:
- echo "Hello from 2" >> out/output2.txt
- echo "Hello from 3" >> out/output3.txt
- echo "Hello from 4" >> out/output4.txt
+
- id: each
type: io.kestra.plugin.core.flow.EachParallel
value: "{{ outputs.bash.outputFiles | jq('.[]') }}"
@@ -20,18 +22,19 @@ tasks:
- id: path
type: io.kestra.plugin.core.debug.Return
format: "{{ taskrun.value }}"
+
- id: contents
type: io.kestra.plugin.scripts.shell.Commands
taskRunner:
type: io.kestra.plugin.core.runner.Process
commands:
- cat "{{ taskrun.value }}"
+
extend:
title: Process files in parallel
description: >
This example demonstrates how to process files in parallel.
-
In the `bash` task, we generate multiple files, and store them in the
internal storage.
@@ -39,7 +42,6 @@ extend:
tasks `path` and `contents` run for each of the 4 output files, resulting in
8 parallel task runs.
-
Instead of the `bash` script, you may have a Python/R/Node.js script that
generates such files.
tags:
diff --git a/parallel-python.yaml b/parallel-python.yaml
index 3b7e4d0..e31104e 100644
--- a/parallel-python.yaml
+++ b/parallel-python.yaml
@@ -1,5 +1,6 @@
id: parallel-python
namespace: company.team
+
tasks:
- id: parallel
type: io.kestra.plugin.core.flow.EachParallel
@@ -23,6 +24,7 @@ tasks:
containerImage: ghcr.io/kestra-io/pydata:latest
commands:
- python parametrized.py --num {{ taskrun.value }}
+
extend:
title: Add a parametrized Python script as a Namespace File and run it in
parallel in Docker containers
@@ -31,30 +33,18 @@ extend:
in parallel with different parameter `values` using a Python script added as
a Namespace File.
-
Here is the content of the `parametrized.py` script:
-
```python
-
import argparse
-
parser = argparse.ArgumentParser()
-
-
parser.add_argument("--num", type=int, default=42, help="Enter an integer")
-
-
args = parser.parse_args()
-
result = args.num * 2
-
print(result)
-
```
-
You can add that file directly from the embedded Visual Studio Code Editor
in the Kestra UI.
tags:
diff --git a/parallel-sequences.yaml b/parallel-sequences.yaml
index 0de1e6d..579b316 100644
--- a/parallel-sequences.yaml
+++ b/parallel-sequences.yaml
@@ -1,5 +1,6 @@
id: parallel-sequences
namespace: company.team
+
tasks:
- id: parallel
type: io.kestra.plugin.core.flow.Parallel
@@ -10,24 +11,27 @@ tasks:
- id: task1
type: io.kestra.plugin.core.debug.Return
format: "{{ task.id }}"
+
- id: task2
type: io.kestra.plugin.core.debug.Return
format: "{{ task.id }}"
+
- id: sequence2
type: io.kestra.plugin.core.flow.Sequential
tasks:
- id: task3
type: io.kestra.plugin.core.debug.Return
format: "{{ task.id }}"
+
- id: task4
type: io.kestra.plugin.core.debug.Return
format: "{{ task.id }}"
+
extend:
title: Run two sequences in parallel
description: >-
This blueprint shows how to run two independent task sequences in parallel.
-
The two sequences, sequence1 and sequence2, start in parallel. The tasks in
these sequences however run one after the other serially, i.e. task2 starts
after task1 finishes, and task4 starts after task3 finishes.
diff --git a/parallel-tasks.yaml b/parallel-tasks.yaml
index c73a221..89d87aa 100644
--- a/parallel-tasks.yaml
+++ b/parallel-tasks.yaml
@@ -1,5 +1,6 @@
id: parallel-tasks
namespace: company.team
+
tasks:
- id: parallel
type: io.kestra.plugin.core.flow.Parallel
@@ -7,9 +8,11 @@ tasks:
- id: task1
type: io.kestra.plugin.core.debug.Return
format: "{{ task.id }}"
+
- id: task2
type: io.kestra.plugin.core.debug.Return
format: "{{ task.id }}"
+
extend:
title: Run two tasks in parallel
description: This blueprints show how to run two tasks in parallel.
diff --git a/parallelSubflows.yaml b/parallelSubflows.yaml
index 772ac28..dedd298 100644
--- a/parallelSubflows.yaml
+++ b/parallelSubflows.yaml
@@ -1,5 +1,6 @@
id: parallelSubflows
namespace: company.team
+
tasks:
- id: parallel
type: io.kestra.plugin.core.flow.Parallel
@@ -8,76 +9,68 @@ tasks:
type: io.kestra.plugin.core.flow.Subflow
flowId: flow1
namespace: company.team
+
- id: flow2
type: io.kestra.plugin.core.flow.Subflow
flowId: flow2
namespace: company.team
+
- id: flow3
type: io.kestra.plugin.core.flow.Subflow
flowId: flow3
namespace: company.team
+
pluginDefaults:
- type: io.kestra.plugin.core.flow.Flow
values:
namespace: company.team
wait: true
transmitFailed: true
+
extend:
title: Run multiple subflows in parallel and wait for their completion - use
taskDefaults to avoid boilerplate code
description: >-
Add the child flows first:
-
First flow:
```yaml
-
id: flow1
-
namespace: company.team
tasks:
-
- - id: get
- type: io.kestra.plugin.core.debug.Return
- format: hi from {{ flow.id }}
+ - id: get
+ type: io.kestra.plugin.core.debug.Return
+ format: hi from {{ flow.id }}
```
-
Second flow:
```yaml
-
id: flow2
-
namespace: company.team
tasks:
-
- - id: get
- type: io.kestra.plugin.core.debug.Return
- format: hi from {{ flow.id }}
+ - id: get
+ type: io.kestra.plugin.core.debug.Return
+ format: hi from {{ flow.id }}
```
-
Third flow:
```yaml
-
id: flow3
-
namespace: company.team
tasks:
-
- - id: get
- type: io.kestra.plugin.core.debug.Return
- format: hi from {{ flow.id }}
+ - id: get
+ type: io.kestra.plugin.core.debug.Return
+ format: hi from {{ flow.id }}
```
-
- Then run the parent flow `parallelSubflows` to trigger multiple subflows in
- parallel.
+
+ Then run the parent flow `parallelSubflows` to trigger multiple subflows in parallel.
+
tags:
- Parallel
ee: false
diff --git a/parametrized-flow-with-multiple-schedules.yaml b/parametrized-flow-with-multiple-schedules.yaml
index 3fd228b..a17cc84 100644
--- a/parametrized-flow-with-multiple-schedules.yaml
+++ b/parametrized-flow-with-multiple-schedules.yaml
@@ -1,14 +1,17 @@
id: parametrized-flow-with-multiple-schedules
namespace: company.team
+
inputs:
- id: user
type: STRING
defaults: Data Engineer
required: false
+
tasks:
- id: hello
type: io.kestra.plugin.core.log.Log
message: Hello {{ inputs.user }} from Kestra!
+
triggers:
- id: quarter_hourly
type: io.kestra.plugin.core.trigger.Schedule
@@ -16,6 +19,7 @@ triggers:
cron: "*/15 * * * *"
inputs:
name: user
+
- id: every_minute
type: io.kestra.plugin.core.trigger.Schedule
disabled: true
@@ -23,6 +27,7 @@ triggers:
inputs:
name: user
value: custom value
+
extend:
title: Parametrized workflow with multiple schedules
description: >
@@ -30,12 +35,9 @@ extend:
console.
The flow has two scheduled attached to it:
-
- one that runs every 15 minutes with the default input parameter value
-
- another one that runs every 1 minute with a custom input parameter value
-
Note that both schedules are currently disabled.
To start scheduling the flow, set the `disabled` property to `false` or
diff --git a/parquet-duckdb-to-excel.yaml b/parquet-duckdb-to-excel.yaml
index 479e985..76c1b4f 100644
--- a/parquet-duckdb-to-excel.yaml
+++ b/parquet-duckdb-to-excel.yaml
@@ -1,27 +1,24 @@
id: parquet-duckdb-to-excel
namespace: company.team
+
tasks:
- id: parquet_duckdb
type: io.kestra.plugin.jdbc.duckdb.Query
sql: >
INSTALL parquet;
-
LOAD parquet;
-
INSTALL httpfs;
-
LOAD httpfs;
-
SELECT *
-
FROM
read_parquet('https://huggingface.co/datasets/kestra/datasets/resolve/main/jaffle-large/raw_items.parquet?download=true')
-
LIMIT 1000000;
- store: true
+ fetchType: STORE
+
- id: duckdb_to_excel
type: io.kestra.plugin.serdes.excel.IonToExcel
from: "{{ outputs.parquet_duckdb.uri }}"
+
extend:
title: Extract and transform a Parquet file using DuckDB and export it in Excel
format
diff --git a/parse-image-metadata-using-apache-tika.yaml b/parse-image-metadata-using-apache-tika.yaml
index 9a66175..658a61a 100644
--- a/parse-image-metadata-using-apache-tika.yaml
+++ b/parse-image-metadata-using-apache-tika.yaml
@@ -1,9 +1,11 @@
id: parse-image-metadata-using-apache-tika
namespace: company.team
+
tasks:
- id: get_image
type: io.kestra.plugin.core.http.Download
uri: https://kestra.io/blogs/2023-05-31-beginner-guide-kestra.jpg
+
- id: tika
type: io.kestra.plugin.tika.Parse
from: "{{ outputs.get_image.uri }}"
@@ -11,6 +13,7 @@ tasks:
contentType: TEXT
ocrOptions:
strategy: OCR_AND_TEXT_EXTRACTION
+
extend:
title: Extract image metadata using Apache Tika
description: This flow extracts metadata from an image using Apache Tika.
diff --git a/parse-pdf.yaml b/parse-pdf.yaml
index bd25f72..a6c10ae 100644
--- a/parse-pdf.yaml
+++ b/parse-pdf.yaml
@@ -1,17 +1,21 @@
id: parse-pdf
namespace: company.team
+
tasks:
- id: download_pdf
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/resolve/main/pdf/app_store.pdf
+
- id: parse_text
type: io.kestra.plugin.tika.Parse
from: "{{ outputs.download_pdf.uri }}"
contentType: TEXT
store: false
+
- id: log_extracted_text
type: io.kestra.plugin.core.log.Log
message: "{{ outputs.parse_text.result.content }}"
+
extend:
title: Download a PDF file and extract text from it using Apache Tika
description: |-
diff --git a/parse-twitter-json-payload.yaml b/parse-twitter-json-payload.yaml
index f612c56..97667e8 100644
--- a/parse-twitter-json-payload.yaml
+++ b/parse-twitter-json-payload.yaml
@@ -1,5 +1,6 @@
id: parse-twitter-json-payload
namespace: company.team
+
inputs:
- id: json
type: JSON
@@ -13,27 +14,32 @@ inputs:
"previous_token": "77qp8"
}
}
+
tasks:
- id: jq_filter
type: io.kestra.plugin.core.log.Log
message: |
{{ inputs.json | jq('.meta | has("next_token")') | first }}
+
- id: contains
type: io.kestra.plugin.core.debug.Return
format: |
- {{inputs.json["meta"] contains "next_token"}}
+ {{ inputs.json["meta"] contains "next_token" }}
+
- id: contains_if_else_operator
type: io.kestra.plugin.core.debug.Return
format: |
{% if inputs.json["meta"] contains "next_token" %} true
{% else %} false
{% endif %}
+
- id: is_not_null_operator
type: io.kestra.plugin.core.debug.Return
format: |
{% if inputs.json["meta"]["next_token"] is not null %}
true
{% endif %}
+
extend:
title: Check if a given key exists in a JSON REST API payload
description: >-
@@ -42,7 +48,6 @@ extend:
accomplishing that — a JQuery filter, a `contains` operator, and an `is not
null` operator.
-
Check the [Expressions](https://kestra.io/docs/concepts/expression)
documentation for more examples.
tags:
diff --git a/pass-data-between-subflows.yaml b/pass-data-between-subflows.yaml
index 4f752f1..d3ef0b6 100644
--- a/pass-data-between-subflows.yaml
+++ b/pass-data-between-subflows.yaml
@@ -1,31 +1,30 @@
id: pass-data-between-subflows
namespace: company.team
+
tasks:
- id: call_child_flow
type: io.kestra.plugin.core.flow.Subflow
namespace: company.team
flowId: child_flow
wait: true
+
- id: log
type: io.kestra.plugin.core.log.Log
message: "{{ outputs.call_child_flow.outputs.data_from_child_flow }}"
+
extend:
title: Pass data between subflows — use outputs from the child flow in a parent flow
description: >-
First, create a child flow:
-
```yaml
-
id: child_flow
-
namespace: company.team
tasks:
- id: return_data
type: io.kestra.plugin.core.debug.Return
format: this is a secret message returned from {{ flow.id }}
-
outputs:
- id: data_from_child_flow
@@ -33,7 +32,6 @@ extend:
value: "{{ outputs.return_data.value }}"
```
-
Then, you can run this parent flow that will retrieve data from the subflow
and store it under a specified key. In this example, the subflow uses the
key `data_from_child_flow`.
diff --git a/pass-data-between-tasks.yaml b/pass-data-between-tasks.yaml
index eacf78a..9bdf157 100644
--- a/pass-data-between-tasks.yaml
+++ b/pass-data-between-tasks.yaml
@@ -1,9 +1,11 @@
id: pass-data-between-tasks
namespace: company.team
+
tasks:
- id: pass_output
type: io.kestra.plugin.core.debug.Return
format: hello
+
- id: py_outputs
type: io.kestra.plugin.scripts.python.Script
taskRunner:
@@ -20,17 +22,20 @@ tasks:
with open('myoutput.json', 'w') as f:
json.dump(my_kv_pair, f)
+
- id: take_inputs
type: io.kestra.plugin.core.log.Log
message: >
data from previous tasks: {{ outputs.pass_output.value }} and {{
outputs.py_outputs.vars.mykey }}
+
- id: check_output_file
type: io.kestra.plugin.scripts.shell.Commands
taskRunner:
type: io.kestra.plugin.core.runner.Process
commands:
- cat {{ outputs.py_outputs.outputFiles['myoutput.json'] }}
+
extend:
title: Pass data between Python script tasks and Shell tasks using Outputs
description: >+
@@ -39,19 +44,16 @@ extend:
The first two tasks return some outputs and the next 2 tasks read those
values for further processing.
-
Check the "Outputs" section in each task documentation to see what outputs
it returns, and check the "Outputs" tab on the Execution page to validate
what outputs are generated by this flow.
- In case of the `Return` task, it returns a value under the `value` key.
-
- All script tasks, including Python, return a map of outputs under the
`vars` key. To access outputs in the downstream tasks, use the format `{{
outputs.task_name.vars.key_name }}`. Additionally, script tasks can return
files as shown with the `myoutput.json` file.
-
tags:
- Python
- Outputs
diff --git a/pip-packages-docker.yaml b/pip-packages-docker.yaml
index eef4570..336bcb1 100644
--- a/pip-packages-docker.yaml
+++ b/pip-packages-docker.yaml
@@ -1,5 +1,6 @@
id: pip-packages-docker
namespace: company.team
+
tasks:
- id: run_python
type: io.kestra.plugin.scripts.python.Script
@@ -13,6 +14,7 @@ tasks:
response = requests.get("https://api.github.com")
data = response.json()
print(data)
+
extend:
title: Docker container installing pip packages before starting a Python Script task
description: >
@@ -21,7 +23,6 @@ extend:
box and you can add several `beforeCommands` to install custom Pip packages,
and prepare the environment for the task.
-
Adding `warningOnStdErr: false` ensures that warnings raised during pip
package installation don't set the task to a `WARNING` state. However, by
default, any warning raised during the setup process (i.e. when executing
diff --git a/postgres-s3-python-git.yaml b/postgres-s3-python-git.yaml
index 26c7194..1915092 100644
--- a/postgres-s3-python-git.yaml
+++ b/postgres-s3-python-git.yaml
@@ -1,5 +1,6 @@
id: postgres-s3-python-git
namespace: company.team
+
tasks:
- id: wdir
type: io.kestra.plugin.core.flow.WorkingDirectory
@@ -8,6 +9,7 @@ tasks:
type: io.kestra.plugin.git.Clone
url: https://github.com/kestra-io/scripts
branch: main
+
- id: get_users
type: io.kestra.plugin.scripts.python.Commands
taskRunner:
@@ -16,6 +18,7 @@ tasks:
warningOnStdErr: false
commands:
- python etl/get_users_from_api.py
+
- id: save_users_pg
type: io.kestra.plugin.scripts.python.Commands
beforeCommands:
@@ -28,6 +31,7 @@ tasks:
DB_PASSWORD: "{{ secret('DB_PASSWORD') }}"
DB_HOST: host.docker.internal
DB_PORT: "5432"
+
extend:
title: Extract data from an API and load it to Postgres using Python, Git and
Docker (passing custom environment variables to the container)
@@ -40,7 +44,6 @@ extend:
2. The second Python script reads that extracted raw data file and loads it
to Postgres using Python and Pandas.
-
**The benefits of this approach:**
- your **orchestration logic** (YAML) is decoupled from your **business
diff --git a/postgres-s3-python-script.yaml b/postgres-s3-python-script.yaml
index b8f9394..2522e97 100644
--- a/postgres-s3-python-script.yaml
+++ b/postgres-s3-python-script.yaml
@@ -1,5 +1,6 @@
id: postgres-s3-python-script
namespace: company.team
+
tasks:
- id: api_to_postgres
type: io.kestra.plugin.scripts.python.Script
@@ -27,6 +28,7 @@ tasks:
df_users.to_sql("users", engine, if_exists="append", index=False)
df_users.to_json("users.json")
+
- id: s3_upload
type: io.kestra.plugin.aws.s3.Upload
from: "{{ outputs.api_to_postgres.outputFiles['users.json'] }}"
@@ -35,6 +37,7 @@ tasks:
region: eu-central-1
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
+
extend:
title: Extract data from an API using Python, then load it to Postgres and S3
description: >-
@@ -45,7 +48,6 @@ extend:
2. Loads that extracted data to Postgres and a local JSON file. The local
file is then uploaded to S3 in the following task.
-
The Python task runs in a Docker container. Before starting the script,
Kestra will install custom package dependencies, as defined by the
`beforeCommands` property.
diff --git a/postgres-to-bigquery.yaml b/postgres-to-bigquery.yaml
index 9162526..b2a762c 100644
--- a/postgres-to-bigquery.yaml
+++ b/postgres-to-bigquery.yaml
@@ -1,5 +1,6 @@
id: postgres-to-bigquery
namespace: company.team
+
tasks:
- id: extract
type: io.kestra.plugin.singer.taps.PipelinewisePostgres
@@ -13,6 +14,7 @@ tasks:
streamsConfigurations:
- replicationMethod: FULL_TABLE
selected: true
+
- id: load
type: io.kestra.plugin.singer.targets.AdswerveBigQuery
addMetadataColumns: true
@@ -23,6 +25,7 @@ tasks:
location: US
projectId: yourProjectName
serviceAccount: "{{ secret('GCP_CREDS') }}"
+
extend:
title: Load data from Postgres to BigQuery using Singer
description: >-
diff --git a/postgres-to-pandas-dataframes.yaml b/postgres-to-pandas-dataframes.yaml
index 105edf0..41b13b0 100644
--- a/postgres-to-pandas-dataframes.yaml
+++ b/postgres-to-pandas-dataframes.yaml
@@ -1,7 +1,9 @@
id: postgres-to-pandas-dataframes
namespace: company.team
+
variables:
db_host: host.docker.internal
+
tasks:
- id: get_tables
type: io.kestra.plugin.core.flow.Parallel
@@ -10,9 +12,11 @@ tasks:
- id: products
type: io.kestra.plugin.jdbc.postgresql.CopyOut
sql: SELECT * FROM products
+
- id: orders
type: io.kestra.plugin.jdbc.postgresql.CopyOut
sql: SELECT * FROM orders
+
- id: pandas
type: io.kestra.plugin.scripts.python.Script
warningOnStdErr: false
@@ -38,6 +42,7 @@ tasks:
)
top.to_json("bestsellers_pandas.json", orient="records")
+
pluginDefaults:
- type: io.kestra.plugin.jdbc.postgresql.CopyOut
values:
@@ -47,10 +52,12 @@ pluginDefaults:
format: CSV
header: true
delimiter: ","
+
triggers:
- id: every_morning
type: io.kestra.plugin.core.trigger.Schedule
cron: 0 9 * * *
+
extend:
title: Extract multiple tables from Postgres using SQL queries and process those
as Pandas dataframes on schedule
@@ -59,7 +66,6 @@ extend:
limit of how many tasks will run at the same time is defined using the
`concurrent` property.
-
The flow extracts data from a Postgres database. That data is then passed to
a Python task using `inputFiles`. The Python task reads the input files,
and performs operations on the data using Pandas.
diff --git a/process-s3-file-if-changed.yaml b/process-s3-file-if-changed.yaml
index 8ce8c79..45c07ce 100644
--- a/process-s3-file-if-changed.yaml
+++ b/process-s3-file-if-changed.yaml
@@ -1,8 +1,10 @@
id: process-s3-file-if-changed
namespace: company.team
+
variables:
bucket: kestraio
object: hello.txt
+
tasks:
- id: process_file_if_changed
type: io.kestra.plugin.scripts.python.Commands
@@ -18,25 +20,22 @@ tasks:
AWS_ACCESS_KEY_ID: "{{ secret('AWS_ACCESS_KEY_ID') }}"
AWS_SECRET_ACCESS_KEY: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
AWS_DEFAULT_REGION: "{{ secret('AWS_DEFAULT_REGION') }}"
+
triggers:
- id: schedule
type: io.kestra.plugin.core.trigger.Schedule
cron: "*/5 * * * *"
+
extend:
title: Process a file from S3 only if it changed since the last execution
description: >-
Add the following Python script named `s3_modified.py` in the Editor:
-
```python
-
import boto3
-
from datetime import datetime
-
import argparse
-
def parse_date(date_str):
if date_str.endswith('Z'):
return datetime.fromisoformat(date_str.replace('Z', '+00:00'))
@@ -68,7 +67,6 @@ extend:
main()
```
-
Make sure to add Secrets for your AWS credentials and adjust the variables
to point to your S3 bucket and object.
tags:
diff --git a/process-script-runner.yaml b/process-script-runner.yaml
index 1621b87..7ed405f 100644
--- a/process-script-runner.yaml
+++ b/process-script-runner.yaml
@@ -1,5 +1,6 @@
id: process-script-runner
namespace: company.team
+
tasks:
- id: shell
type: io.kestra.plugin.scripts.shell.Commands
@@ -7,13 +8,13 @@ tasks:
type: io.kestra.plugin.core.runner.Process
commands:
- echo "Hello World!"
+
extend:
title: Run a Shell script as a subprocess on the Kestra host
description: >-
Here is an example of a Shell script configured with the Process task runner
which runs a Shell command as a child process within the Kestra host.
-
The Process task runner doesn’t have any additional configuration beyond the
`type` property.
tags:
diff --git a/produce-kafka-message.yaml b/produce-kafka-message.yaml
index 01c4378..d09a819 100644
--- a/produce-kafka-message.yaml
+++ b/produce-kafka-message.yaml
@@ -1,9 +1,11 @@
id: produce-kafka-message
namespace: company.team
+
tasks:
- id: api
type: io.kestra.plugin.core.http.Request
uri: https://dummyjson.com/products
+
- id: produce
type: io.kestra.plugin.kafka.Produce
from:
@@ -17,6 +19,7 @@ tasks:
topic: mytopic
properties:
bootstrap.servers: my.kafka.k8s.com:9094
+
extend:
title: Extract data from a REST API and send it to a Kafka topic using the Kafka
producer task
@@ -26,12 +29,10 @@ extend:
running, and that you created a topic named `mytopic`. Make sure to replace
the `bootstrap.servers` value with your Kafka cluster URL.
-
The `from` argument expects a map or a list of maps with key-value pairs.
The allowed keys are: `key`, `value`, `partition`, `timestamp`, and
`headers`.
-
In this example, we're using the `outputs.api.body` value, which is a
JSON-formatted response body from the `api` task. This is why the
`valueSerializer` argument is set to `JSON`.
diff --git a/produce-to-rabbitmq.yaml b/produce-to-rabbitmq.yaml
index 4f06bd4..20895b7 100644
--- a/produce-to-rabbitmq.yaml
+++ b/produce-to-rabbitmq.yaml
@@ -1,8 +1,10 @@
id: produce-to-rabbitmq
namespace: company.team
+
inputs:
- id: order
type: STRING
+
tasks:
- id: publish_to_rabbitmq
type: io.kestra.plugin.amqp.Publish
@@ -10,29 +12,24 @@ tasks:
exchange: test-queue
from:
- data: "{{ read(inputs.order) }}"
+
extend:
title: Read a CSV file and load each row into RabbitMQ
description: >-
This blueprint has two flows: `read_orders` and `produce_to_rabbitmq`.
-
1. `read_orders` reads the CSV file from a URL, converts it into ION, and
generates an execution of `produce_to_rabbitmq` flow for each row of the ION
file.
2. `produce_to_rabbitmq` publishes the record into RabbitMQ.
-
Here is the code of the parent flow `read_orders`:
-
```yaml
-
id: read_orders
-
namespace: company.team
-
tasks:
- id: csv
type: io.kestra.plugin.fs.http.Download
@@ -55,21 +52,15 @@ extend:
order: "{{ taskrun.items }}"
```
-
Execute the `read_orders` flow. This flow execution will trigger the
`produce_to_rabbitmq` flow for each record.
-
You can run RabbitMQ locally using Docker with the following command:
```bash
-
- docker run -it --rm --name rabbitmq -p 5672:5672 -p 15672:15672
- rabbitmq:latest
-
+ docker run -it --rm --name rabbitmq -p 5672:5672 -p 15672:15672 rabbitmq:latest
```
-
You can open the RabbitMQ UI locally on `http://localhost:15672/` and login
using `guest`/`guest`.
tags: []
diff --git a/pubsub-realtime-trigger.yaml b/pubsub-realtime-trigger.yaml
index 7388ed2..8586d98 100644
--- a/pubsub-realtime-trigger.yaml
+++ b/pubsub-realtime-trigger.yaml
@@ -1,5 +1,6 @@
id: pubsub-realtime-trigger
namespace: company.team
+
tasks:
- id: insert_into_firestore
type: io.kestra.plugin.gcp.firestore.Set
@@ -13,6 +14,7 @@ tasks:
price: "{{ trigger.data | jq('.price') | first }}"
quantity: "{{ trigger.data | jq('.quantity') | first }}"
total: "{{ trigger.data | jq('.total') | first }}"
+
triggers:
- id: realtime_trigger
type: io.kestra.plugin.gcp.pubsub.RealtimeTrigger
@@ -20,37 +22,30 @@ triggers:
topic: orders
subscription: kestra-subscription
serdeType: JSON
+
extend:
title: Use GCP Pub/Sub Realtime Trigger to push events into Firestore
description: >-
This flow will:
-
- 1. Get
- [triggered](https://kestra.io/plugins/plugin-gcp/triggers/io.kestra.plugin.gcp.pubsub.realtimetrigger)
+ 1. Get [triggered](https://kestra.io/plugins/plugin-gcp/triggers/io.kestra.plugin.gcp.pubsub.realtimetrigger)
every time the event lands in the Pub/Sub topic
2. The flow will push the data into Firestore table
-
For this, create a Pub/Sub topic named `orders`. We will be producing JSON
messages into the Pub/Sub topic generated from the
[orders.csv](https://huggingface.co/datasets/kestra/datasets/raw/main/csv/orders.csv).
One sample produced message can be:
-
```
-
{"order_id": "1", "customer_name": "Kelly Olsen", "customer_email":
"jenniferschneider@example.com", "product_id": "20", "price": "166.89",
"quantity": "1", "total": "166.89"}
-
```
-
Create `orders` table in Firestore.
-
When you produce the message onto Pub/Sub topic, the flow will get
triggered, and you can see that a corresponding new record gets into the
Firestore table.
diff --git a/pulsar-realtime-trigger.yaml b/pulsar-realtime-trigger.yaml
index 54ad242..40cb79f 100644
--- a/pulsar-realtime-trigger.yaml
+++ b/pulsar-realtime-trigger.yaml
@@ -1,5 +1,6 @@
id: pulsar-realtime-trigger
namespace: company.team
+
tasks:
- id: create_mysql_table
type: io.kestra.plugin.jdbc.mysql.Query
@@ -10,62 +11,57 @@ tasks:
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(log_id)
)
+
- id: insert_into_logs_table
type: io.kestra.plugin.jdbc.mysql.Query
sql: insert into logs(message) values("{{ trigger.value }}")
+
triggers:
- id: realtime_trigger
type: io.kestra.plugin.pulsar.RealtimeTrigger
topic: apache/pulsar/logs
uri: pulsar://localhost:26650
subscriptionName: kestra_trigger_sub
+
pluginDefaults:
- type: io.kestra.plugin.jdbc.mysql.Query
values:
url: jdbc:mysql://localhost:3306/kestra
username: mysql_user
password: mysql_passwd
+
extend:
title: Use Pulsar Realtime Trigger to push events into MySQL
description: >
This flow will:
-
- 1. Get
- [triggered](https://kestra.io/plugins/plugin-pulsar/triggers/io.kestra.plugin.pulsar.realtimetrigger)
+ 1. Get [triggered](https://kestra.io/plugins/plugin-pulsar/triggers/io.kestra.plugin.pulsar.realtimetrigger)
every time the event lands in Apache Pulsar topic
2. The flow will push the data into a table in MySQL database
-
To setup Apache Pulsar locally, you can install the [standalone
cluster](https://pulsar.apache.org/docs/next/getting-started-standalone/) or
- [docker
- cluster](https://pulsar.apache.org/docs/next/getting-started-docker/) for
+ [docker cluster](https://pulsar.apache.org/docs/next/getting-started-docker/) for
Apache Pulsar. You can run the following commands to create the topic, and
produce data to the topic:
-
1) Setup a tenant
`bin/pulsar-admin tenants create apache`
-
2) Create a namespace
`bin/pulsar-admin namespaces create apache/pulsar`
-
3) Create a topic
`bin/pulsar-admin topics create-partitioned-topic apache/pulsar/logs -p 4`
-
4) Produce data to topic
`bin/pulsar-client produce apache/pulsar/logs -m '--Hello World--' -n 1`
-
To setup MySQL server locally, follow the official installation steps using
[docker](https://hub.docker.com/r/mysql/mysql-server/).
tags:
diff --git a/purge.yaml b/purge.yaml
index 3ba6eb6..f0ad810 100644
--- a/purge.yaml
+++ b/purge.yaml
@@ -1,5 +1,6 @@
id: purge
namespace: system
+
tasks:
- id: purge_executions
type: io.kestra.plugin.core.execution.PurgeExecutions
@@ -7,14 +8,17 @@ tasks:
purgeLog: false
states:
- SUCCESS
+
- id: purge_logs
type: io.kestra.plugin.core.log.PurgeLogs
endDate: "{{ now() | dateAdd(-1, 'MONTHS') }}"
+
triggers:
- id: daily
type: io.kestra.plugin.core.trigger.Schedule
disabled: true
cron: 0 9 * * *
+
extend:
title: Purge execution data including logs, metrics and outputs on a schedule
description: >
@@ -26,22 +30,18 @@ extend:
empty to purge all execution data, regardless of the execution status, or
adjust it to your needs.
-
Given that logs often consistute the largest chunk of data that needs to be
purged, we use a dedicated task to purge logs (so that you can run it
independently or rerun only this step in case something fails). Keep in mind
though that, by default, the `PurgeExecutions` task would also automatically
purge the logs.
-
It is recommended to run this flow daily to keep your Kestra instance clean
and save storage space.
-
**Before using this flow, make sure to set the `disabled` property to false
(or remove that line entirely).**
-
Note that this flow will not purge the flow definitions or the namespace
files — your code will be safe. Only the execution-related data will be
purged.
diff --git a/push-to-git.yaml b/push-to-git.yaml
index 223d3e6..0eaaa0b 100644
--- a/push-to-git.yaml
+++ b/push-to-git.yaml
@@ -32,12 +32,12 @@ triggers:
- id: every_full_hour
type: io.kestra.plugin.core.trigger.Schedule
cron: "*/15 * * * *"
+
extend:
title: Push code to Git at regular intervals
description: >-
This flow will push code to Git every 15 minutes.
-
We will be using PushFlows and PushNamespaceFiles task to push flows and
namespace files respectively.
tags:
diff --git a/python-aws-ecr.yaml b/python-aws-ecr.yaml
index 2713a40..1dca636 100644
--- a/python-aws-ecr.yaml
+++ b/python-aws-ecr.yaml
@@ -1,11 +1,13 @@
id: python-aws-ecr
namespace: company.team
+
tasks:
- id: ecr
type: io.kestra.plugin.aws.ecr.GetAuthToken
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
region: eu-central-1
+
- id: py
type: io.kestra.plugin.scripts.python.Commands
taskRunner:
@@ -16,6 +18,7 @@ tasks:
containerImage: 123456789.dkr.ecr.eu-central-1.amazonaws.com/data-infastructure:latest
commands:
- python --version
+
extend:
title: Pull a container image from Amazon ECR registry and run a Python script
description: >-
@@ -23,7 +26,6 @@ extend:
Amazon ECR. Then, it will pull the specified image and will run a Python
script (or whichever command you wish) in a Docker container.
-
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID` and
`AWS_SECRET_ACCESS_KEY`.
tags:
diff --git a/python-csv-each-parallel.yaml b/python-csv-each-parallel.yaml
index ea26edf..24243e7 100644
--- a/python-csv-each-parallel.yaml
+++ b/python-csv-each-parallel.yaml
@@ -1,5 +1,6 @@
id: python-csv-each-parallel
namespace: company.team
+
tasks:
- id: csv
type: io.kestra.plugin.core.flow.EachParallel
@@ -18,6 +19,7 @@ tasks:
import pandas as pd
df = pd.read_csv("{{ taskrun.value }}")
df.info()
+
extend:
title: Transform data from CSV files with Pandas in Python containers (in parallel)
description: This flow reads a list of CSV files and processes each file in
diff --git a/python-docker-artifact-registry-gcp.yaml b/python-docker-artifact-registry-gcp.yaml
index 10a783c..5c3b827 100644
--- a/python-docker-artifact-registry-gcp.yaml
+++ b/python-docker-artifact-registry-gcp.yaml
@@ -1,5 +1,6 @@
id: python-docker-artifact-registry-gcp
namespace: company.team
+
tasks:
- id: wdir
type: io.kestra.plugin.core.flow.WorkingDirectory
@@ -7,10 +8,12 @@ tasks:
- id: download_csv
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/orders.csv
+
- id: fetch_auth_token
type: io.kestra.plugin.gcp.auth.OauthAccessToken
projectId: YOUR_GCP_PROJECT_NAME
serviceAccount: "{{ secret('GCP_CREDS') }}"
+
- id: analyze_sales
type: io.kestra.plugin.scripts.python.Script
inputFiles:
@@ -39,6 +42,7 @@ tasks:
}
}
containerImage: yourGcpRegion-docker.pkg.dev/YOUR_GCP_PROJECT_NAME/REPO_NAME/python:latest
+
extend:
title: Run Python script in a Docker container based on Google Artifact Registry
container image
@@ -50,23 +54,12 @@ extend:
The Docker image is stored in Google Artifact Registry.
-
To push an image to Google Artifact Registry, you need to:
-
- - Create a Google Cloud Platform service account with the `Artifact Registry
- Writer` role.
-
+ - Create a Google Cloud Platform service account with the `Artifact Registry Writer` role.
- Create a JSON key for the service account.
-
- Create a secret with the contents of the JSON key.
-
- - Build a Docker image: `docker build -t
- yourGcpRegion-docker.pkg.dev/YOUR_GCP_PROJECT_NAME/REPO_NAME/python:latest
- .`
-
- - Push the image to Google Artifact Registry: `docker push
- yourGcpRegion-docker.pkg.dev/YOUR_GCP_PROJECT_NAME/REPO_NAME/python:latest`
-
+ - Build a Docker image: `docker build -t yourGcpRegion-docker.pkg.dev/YOUR_GCP_PROJECT_NAME/REPO_NAME/python:latest .`
+ - Push the image to Google Artifact Registry: `docker push yourGcpRegion-docker.pkg.dev/YOUR_GCP_PROJECT_NAME/REPO_NAME/python:latest`
Note that the `OauthAccessToken` task is necessary to securely fetch a
short-lived [access
diff --git a/python-generate-logs.yaml b/python-generate-logs.yaml
index 8d109dc..8371d66 100644
--- a/python-generate-logs.yaml
+++ b/python-generate-logs.yaml
@@ -27,6 +27,7 @@ tasks:
time.sleep(0.5)
logger.critical("CRITICAL means a severe failure.")
+
extend:
title: Run a Python script and capture logs
description: >-
diff --git a/python-generate-output-file.yaml b/python-generate-output-file.yaml
index 8622eea..6bef2eb 100644
--- a/python-generate-output-file.yaml
+++ b/python-generate-output-file.yaml
@@ -14,6 +14,7 @@ tasks:
- id: log_file_contents
type: io.kestra.plugin.core.log.Log
message: "{{ read(outputs.generate_output_file.outputFiles['my_file.txt']) }}"
+
extend:
title: Generate an output file using Python script
description: >-
diff --git a/python-generate-outputs-simple.yaml b/python-generate-outputs-simple.yaml
index cab083c..c9cfe7a 100644
--- a/python-generate-outputs-simple.yaml
+++ b/python-generate-outputs-simple.yaml
@@ -17,6 +17,7 @@ tasks:
message:
- "Total Marks: {{ outputs.generate_output.vars.total_marks }}"
- "Average Marks: {{ outputs.generate_output.vars.average_marks }}"
+
extend:
title: Run a simple Python script to generate outputs and log them
description: >-
@@ -25,7 +26,6 @@ extend:
The flow has two tasks:
1. Generate outputs using Python script
-
2. Log the outputs generated in the prior task
tags:
- Python
diff --git a/python-generate-outputs.yaml b/python-generate-outputs.yaml
index 701eee6..b5412c2 100644
--- a/python-generate-outputs.yaml
+++ b/python-generate-outputs.yaml
@@ -14,23 +14,16 @@ tasks:
- "*.csv"
script: >
import csv
-
import random
-
import time
-
from faker import Faker
-
from kestra import Kestra
-
start_time = time.time()
fake = Faker()
-
# list of columns for the CSV file
-
columns = [
"order_id",
"customer_name",
@@ -42,14 +35,10 @@ tasks:
]
filename = "{{ vars.file }}"
-
tags = {'file': filename}
-
# Generate 100 random orders
-
orders = []
-
for i in range(100):
order_id = i + 1
customer_name = fake.name()
@@ -63,37 +52,27 @@ tasks:
)
# Write the orders to a CSV file
-
with open(filename, "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(columns)
writer.writerows(orders)
# Calculate and print the sum and average of the "total" column
-
total_sum = sum(order[6] for order in orders)
-
average_order = round(total_sum / len(orders), 2)
-
print(f"Total sum: {total_sum}")
-
print(f"Average Order value: {average_order}")
-
Kestra.outputs({"total_sum": total_sum, "average_order": average_order})
-
Kestra.counter('total_sum', total_sum, tags)
-
Kestra.counter('average_order', average_order, tags)
-
end_time = time.time()
-
processing_time = end_time - start_time
-
Kestra.timer('processing_time', processing_time, tags)
print(f"The script execution took: {processing_time} seconds")
+
extend:
title: Run a Python script and generate outputs, metrics and files specified with a variable
description: >-
@@ -101,13 +80,11 @@ extend:
the sum and average of the "total" column. It then reports the results as
outputs and metrics.
-
The CSV file generated by a Python task is set as `outputFiles`, allowing
you to download the file from the UI's Execution page. It is helpful to
share the results of your workflow with business stakeholders who can
download the file from the UI and use it in their processes.
-
To avoid hardcoding values, the filename `orders.csv` is specified as a
variable.
tags:
diff --git a/python-partitions-metrics.yaml b/python-partitions-metrics.yaml
index d15d5a4..e874ccb 100644
--- a/python-partitions-metrics.yaml
+++ b/python-partitions-metrics.yaml
@@ -1,6 +1,7 @@
id: python-partitions-metrics
namespace: company.team
description: Process partitions in parallel
+
tasks:
- id: get_partitions
type: io.kestra.plugin.scripts.python.Script
@@ -11,6 +12,7 @@ tasks:
from kestra import Kestra
partitions = [f"file_{nr}.parquet" for nr in range(1, 10)]
Kestra.outputs({'partitions': partitions})
+
- id: process_partitions
type: io.kestra.plugin.core.flow.EachParallel
value: "{{ outputs.get_partitions.vars.partitions }}"
@@ -22,22 +24,14 @@ tasks:
containerImage: ghcr.io/kestra-io/pydata:latest
script: >
import random
-
import time
-
from kestra import Kestra
-
filename = '{{ taskrun.value }}'
-
print(f"Reading and processing partition {filename}")
-
nr_rows = random.randint(1, 1000)
-
processing_time = random.randint(1, 20)
-
time.sleep(processing_time)
-
Kestra.counter('nr_rows', nr_rows, {'partition': filename})
Kestra.timer('processing_time', processing_time, {'partition':
diff --git a/python-subflow-component.yaml b/python-subflow-component.yaml
index 4ee82e7..891824e 100644
--- a/python-subflow-component.yaml
+++ b/python-subflow-component.yaml
@@ -1,10 +1,12 @@
id: python-subflow-component
namespace: company.team
+
inputs:
- id: arg1
type: INT
- id: arg2
type: INT
+
tasks:
- id: python
type: io.kestra.plugin.scripts.python.Commands
@@ -14,10 +16,8 @@ tasks:
inputFiles:
main.py: >
import argparse
-
from kestra import Kestra
-
def multiply_arguments(arg1, arg2):
return arg1 * arg2
@@ -32,13 +32,13 @@ tasks:
Kestra.outputs({'result': result})
commands:
- python main.py --arg1 {{ inputs.arg1 }} --arg2 {{ inputs.arg2 }}
+
extend:
title: Create a Python subflow, acting like an abstracted component
description: >-
This flow shows how you can create a templated flow (subflow) to run a
custom script.
-
This flow can be used in another flow, acting like a separated component. We
can imagine having a complex flow, with many tasks but abstracted with
inputs and outputs, so users can only deal with a simple interface.
@@ -46,12 +46,8 @@ extend:
Here is an example of calling this flow, giving inputs and retrieve the
desired outputs.
-
-
```
-
id: call_python_component
-
namespace: company.team
tasks:
diff --git a/query-clickhouse.yaml b/query-clickhouse.yaml
index 1d69f73..6c30255 100644
--- a/query-clickhouse.yaml
+++ b/query-clickhouse.yaml
@@ -1,9 +1,11 @@
id: query-clickhouse
namespace: company.team
+
tasks:
- id: create_database
type: io.kestra.plugin.jdbc.clickhouse.Query
sql: CREATE DATABASE IF NOT EXISTS helloworld
+
- id: create_table
type: io.kestra.plugin.jdbc.clickhouse.Query
sql: |
@@ -16,6 +18,7 @@ tasks:
)
ENGINE = MergeTree()
PRIMARY KEY (user_id, timestamp)
+
- id: insert_data
type: io.kestra.plugin.jdbc.clickhouse.Query
sql: >
@@ -25,15 +28,18 @@ tasks:
(102, 'Insert a lot of rows per batch', yesterday(), 1.41421 ),
(102, 'Sort your data based on your commonly-used queries', today(), 2.718 ),
(101, 'Granules are the smallest chunks of data read', now() + 5, 3.14159 )
+
- id: query_and_store_as_json
type: io.kestra.plugin.jdbc.clickhouse.Query
sql: SELECT user_id, message FROM helloworld.my_first_table
- store: true
+ fetchType: STORE
+
pluginDefaults:
- type: io.kestra.plugin.jdbc.clickhouse.Query
values:
url: jdbc:clickhouse://host.docker.internal:8123/
username: default
+
extend:
title: Ingest data to and query data from ClickHouse
description: >
@@ -41,15 +47,11 @@ extend:
already exist. It will then insert some data into the table and finally
query the table to show the data.
-
To test this flow, you can start ClickHouse in a Docker container:
-
```
-
docker run -d -p 8123:8123 -p 9000:9000 --name myclickhouse --ulimit
nofile=262144:262144 clickhouse/clickhouse-server
-
```
tags:
- Ingest
diff --git a/r-script.yaml b/r-script.yaml
index 23737f1..f340ca3 100644
--- a/r-script.yaml
+++ b/r-script.yaml
@@ -1,5 +1,6 @@
id: r-script
namespace: company.team
+
tasks:
- id: r_script
type: io.kestra.plugin.scripts.r.Script
@@ -31,6 +32,7 @@ tasks:
print(df)
write_parquet(df, "women.parquet")
write_csv_arrow(df, "women.csv")
+
extend:
title: Run R script in a Docker container and output downloadable artifacts
description: >-
@@ -38,7 +40,6 @@ extend:
using the `dplyr` package. Finally, it stores the result as both CSV and
Parquet files, which both can be downloaded from the Execution Outputs tab.
-
The R script is executed in a Docker container, providing isolated
environment for the task and avoiding any dependency conflicts. All
dependencies for the task are baked into a publicly available Docker image,
@@ -46,9 +47,7 @@ extend:
image with your own, or install custom dependencies at runtime using the
`beforeCommands` property, for example:
-
```
-
beforeCommands:
- Rscript -e "install.packages(c('httr', 'RSQLite'))" > /dev/null 2>&1
```
diff --git a/react-to-sqs-trigger.yaml b/react-to-sqs-trigger.yaml
index c2676d6..3383e55 100644
--- a/react-to-sqs-trigger.yaml
+++ b/react-to-sqs-trigger.yaml
@@ -1,5 +1,6 @@
id: react-to-sqs-trigger
namespace: company.team
+
tasks:
- id: print_message
type: io.kestra.plugin.scripts.shell.Commands
@@ -7,6 +8,7 @@ tasks:
type: io.kestra.plugin.core.runner.Process
commands:
- cat "{{ trigger.uri }}"
+
triggers:
- id: sqs
type: io.kestra.plugin.aws.sqs.Trigger
@@ -15,18 +17,17 @@ triggers:
region: "{{ secret('AWS_DEFAULT_REGION') }}"
queueUrl: https://sqs.eu-central-1.amazonaws.com/123456789/kestra
maxRecords: 1
+
extend:
title: React to an SQS trigger
description: >-
This flow reacts to an SQS trigger. Any time there is a new message in the
queue, the flow is triggered.
-
The queue URL points to an already existing queue. The `{{ trigger.uri }}`
points to a file in Kestra's internal storage containing the content of the
SQS message. You can read the contents of that file in any task.
-
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`.
tags:
diff --git a/redis-key-value-store.yaml b/redis-key-value-store.yaml
index 2c5f9c0..edbe029 100644
--- a/redis-key-value-store.yaml
+++ b/redis-key-value-store.yaml
@@ -1,5 +1,6 @@
id: redis-key-value-store
namespace: company.team
+
inputs:
- id: key
type: STRING
@@ -22,6 +23,7 @@ inputs:
"isPremium": true,
"interests": ["programming", "reading", "traveling"]
}
+
tasks:
- id: set
type: io.kestra.plugin.redis.string.Set
@@ -29,25 +31,23 @@ tasks:
serdeType: JSON
key: "{{ inputs.key }}"
value: "{{ inputs.value }}"
+
- id: get
type: io.kestra.plugin.redis.string.Get
url: redis://host.docker.internal:6379/0
serdeType: JSON
key: "{{ inputs.key }}"
+
extend:
title: Store and retrieve JSON data using Redis
description: >-
This flow will set a key-value pair in Redis and then retrieve it. The
key-value pair will be set using inputs which can be provided at runtime.
-
To test this flow, you can start Redis in a Docker container:
-
```
-
docker run --name myredis -p 6379:6379 -d redis
-
```
tags:
- Ingest
diff --git a/redis-list-realtime-trigger.yaml b/redis-list-realtime-trigger.yaml
index 93ae8f3..a41a60c 100644
--- a/redis-list-realtime-trigger.yaml
+++ b/redis-list-realtime-trigger.yaml
@@ -1,5 +1,6 @@
id: redis-list-realtime-trigger
namespace: company.team
+
tasks:
- id: insert_into_cassandra
type: io.kestra.plugin.cassandra.Query
@@ -9,59 +10,45 @@ tasks:
port: 9042
localDatacenter: datacenter1
cql: >
- INSERT INTO kestra.products (product_id, product_name, product_category,
- brand)
+ INSERT INTO kestra.products (product_id, product_name, product_category, brand)
+
+ VALUES ({{ trigger.value | jq(".product_id") | first }}, '{{ trigger.value | jq(".product_name") | first }}',
- VALUES ({{ trigger.value | jq(".product_id") | first }}, '{{ trigger.value
- | jq(".product_name") | first }}',
+ '{{ trigger.value | jq(".product_category") | first }}', '{{ trigger.value | jq(".brand") | first }}')
- '{{ trigger.value | jq(".product_category") | first }}', '{{ trigger.value
- | jq(".brand") | first }}')
triggers:
- id: realtime_trigger
type: io.kestra.plugin.redis.list.RealtimeTrigger
url: redis://localhost:6379/0
key: products
+
extend:
title: Use Redis List Realtime Trigger to push events into Cassandra
description: >-
This flow will:
-
- 1. Get
- [triggered](https://kestra.io/plugins/plugin-redis/triggers/io.kestra.plugin.redis.list.realtimetrigger)
+ 1. Get [triggered](https://kestra.io/plugins/plugin-redis/triggers/io.kestra.plugin.redis.list.realtimetrigger)
every time you push data onto Redis List
2. The flow will push the data into a table in Cassandra
-
To setup Cassandra server locally, use the following docker command:
```
-
docker run --name my-cassandra -p 9042:9042 -d cassandra
-
```
-
- You can use the cqlsh in the Cassandra docker container, and run the
- following commands:
+ You can use the cqlsh in the Cassandra docker container, and run the following commands:
```
-
# Create the keyspace
-
> create keyspace if not exists kestra with replication = {'class' :
'SimpleStrategy', 'replication_factor' : 1};
-
# Use the keyspace
-
> use kestra;
-
# Create the table
-
> CREATE TABLE kestra.products (
product_id int,
product_name text,
@@ -70,35 +57,26 @@ extend:
PRIMARY KEY (product_id));
```
-
To setup Redis locally, use the following Docker command:
```
-
docker run --name my-redis -p 6379:6379 -d redis
-
```
-
You can use the redis-cli in the Redis docker container, and push data onto
Redis using:
```
-
> LPUSH products '{"product_id": 1, "product_name": "streamline turn-key
systems", "product_category": "Electronics", "brand": "gomez"}'
-
```
-
We will be using the JSON records generated from the data in
[products.csv](https://huggingface.co/datasets/kestra/datasets/raw/main/csv/products.csv).
-
Whenever you push the data onto Redis List, the flow will be triggered
immediately, and will insert the data from the trigger into Cassandra table.
- Whenever
tags:
- Realtime Trigger
- Trigger
diff --git a/redis-list.yaml b/redis-list.yaml
index 2ed12a7..061a576 100644
--- a/redis-list.yaml
+++ b/redis-list.yaml
@@ -1,13 +1,16 @@
id: redis-list
namespace: company.team
+
variables:
key: favorite_plugins
+
tasks:
- id: clear_list
type: io.kestra.plugin.redis.list.ListPop
url: redis://host.docker.internal:6379/0
key: "{{ vars.key }}"
maxRecords: 1
+
- id: publish_list
type: io.kestra.plugin.redis.list.ListPush
url: redis://host.docker.internal:6379/0
@@ -17,6 +20,7 @@ tasks:
- duckdb
- gcp
- aws
+
extend:
title: Add a list of strings to Redis
description: >-
@@ -25,14 +29,10 @@ extend:
entries. To prevent this, the `ListPop` task is used to empty the list
before `ListPush` adds the new values.
-
To test this flow, you can start Redis in a Docker container:
-
```
-
docker run --name myredis -p 6379:6379 -d redis
-
```
tags:
- Ingest
diff --git a/redis-set-parallel.yaml b/redis-set-parallel.yaml
index 98de04f..e69a0e3 100644
--- a/redis-set-parallel.yaml
+++ b/redis-set-parallel.yaml
@@ -1,5 +1,6 @@
id: redis-set-parallel
namespace: company.team
+
inputs:
- id: values
type: JSON
@@ -10,6 +11,7 @@ inputs:
{"aws": ["s3", "sqs", "sns", "athena"]},
{"gcp": ["big-query", "gcs", "cloudrun"]}
]
+
tasks:
- id: parallel
type: io.kestra.plugin.core.flow.EachParallel
@@ -22,20 +24,17 @@ tasks:
key: "{{ json(taskrun.value) | keys | first }}"
value: |
{{ taskrun.value | jq('.[]') | first }}
+
extend:
title: Add multiple Redis keys in parallel from JSON input
description: >-
This flow adds multiple keys in parallel to a Redis data store based on JSON
input provided by the user at runtime.
-
To test this flow, you can start Redis in a Docker container:
-
```
-
docker run --name myredis -p 6379:6379 -d redis
-
```
tags:
- Ingest
diff --git a/regex-input.yaml b/regex-input.yaml
index d40835e..5b8f19d 100644
--- a/regex-input.yaml
+++ b/regex-input.yaml
@@ -1,5 +1,6 @@
id: regex-input
namespace: company.team
+
inputs:
- id: age
type: INT
@@ -7,55 +8,46 @@ inputs:
required: false
min: 18
max: 64
+
- id: user
type: STRING
defaults: student
required: false
validator: ^student(\d+)?$
+
tasks:
- id: validator
type: io.kestra.plugin.core.log.Log
message: User {{ inputs.user }}, age {{ inputs.age }}
+
extend:
title: Parametrized flow with custom validators to ensure correct integer value
range and Regex-based string pattern validation
description: >
This flow uses several input validators.
-
The `age` property must be within a valid range between `min` and `max`
integer value.
-
The Regex expression `^student(\d+)?$` is used to validate that the input
argument `user` is of type STRING and that it follows a given pattern:
- `^`: Asserts the start of the string.
-
- `student`: Matches the word "student".
-
- `\d`: Matches any digit (0-9).
-
- `+`: Asserts that there is one or more of the preceding token (i.e., one
or more digits).
-
- `()?`: The parentheses group the digits together, and the question mark
makes the entire group optional.
-
- `$`: Asserts the end of the string. This ensures that the string doesn't
contain any characters after the optional digits.
-
With this pattern:
-
- "student" would be a match.
-
- "student123" would be a match.
-
- "studentabc" would not be a match because "abc" isn't a sequence of
digits.
-
Try running this flow with various inputs or adjust the Regex pattern to see
how the input validation works.
tags:
diff --git a/request-resources.yaml b/request-resources.yaml
index 0d85dea..573f108 100644
--- a/request-resources.yaml
+++ b/request-resources.yaml
@@ -1,5 +1,6 @@
id: request-resources
namespace: company.team
+
inputs:
- id: resource_type
displayName: Resource Type
@@ -10,6 +11,7 @@ inputs:
- SaaS application
- Development tool
- Cloud VM
+
- id: access_permissions
displayName: Access Permissions
type: SELECT
@@ -19,6 +21,7 @@ inputs:
inputs:
- resource_type
condition: "{{ inputs.resource_type equals 'Access permissions' }}"
+
- id: saas_applications
displayName: SaaS Application
type: MULTISELECT
@@ -28,6 +31,7 @@ inputs:
inputs:
- resource_type
condition: "{{ inputs.resource_type equals 'SaaS application' }}"
+
- id: development_tools
displayName: Development Tool
type: SELECT
@@ -37,6 +41,7 @@ inputs:
inputs:
- resource_type
condition: "{{ inputs.resource_type equals 'Development tool' }}"
+
- id: cloud_provider
displayName: Cloud Provider
type: SELECT
@@ -48,6 +53,7 @@ inputs:
inputs:
- resource_type
condition: "{{ inputs.resource_type equals 'Cloud VM' }}"
+
- id: cloud_vms
displayName: Cloud VM
type: SELECT
@@ -58,6 +64,7 @@ inputs:
- resource_type
- cloud_provider
condition: "{{ inputs.resource_type equals 'Cloud VM' }}"
+
- id: region
displayName: Cloud Region
type: SELECT
@@ -68,6 +75,7 @@ inputs:
- cloud_provider
- cloud_vms
condition: "{{ inputs.resource_type equals 'Cloud VM' }}"
+
variables:
slack_message: >
Validate resource request.
@@ -75,6 +83,7 @@ variables:
To approve the request, click on the Resume button here
http://localhost:28080/ui/executions/{{flow.namespace}}/{{flow.id}}/{{execution.id}}.
+
tasks:
- id: send_approval_request
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
@@ -84,6 +93,7 @@ tasks:
"channel": "#devops",
"text": {{ render(vars.slack_message) | json }}
}
+
- id: wait_for_approval
type: io.kestra.plugin.core.flow.Pause
onResume:
@@ -95,132 +105,89 @@ tasks:
description: Extra comments about the provisioned resources
type: STRING
defaults: All requested resources are approved
+
- id: approve
type: io.kestra.plugin.core.http.Request
uri: https://reqres.in/api/resources
method: POST
contentType: application/json
body: "{{ inputs }}"
+
- id: log
type: io.kestra.plugin.core.log.Log
message: |
Status of the request {{ outputs.wait_for_approval.onResume.comment }}.
Process finished with {{ outputs.approve.body }}.
+
extend:
title: Use conditional inputs to request compute resources and wait for approval
description: "This flow shows how to use conditional inputs to build dynamic
approval workflows. The workflow takes user input and sends those in a Slack
message for approval — the execution is paused until manually resumed.
-
Using the `dependsOn` input property, you can set up a chain of
dependencies, where one input depends on other inputs or conditions. In this
example, the `access_permissions`, `saas_applications`, `development_tools`,
and `cloud_vms` inputs are conditionally displayed based on the chosen
`resource_type` input value.
-
Before running this flow, make sure to add the required KV pairs e.g. by
using the following flow:
-
```yaml
-
id: add_kv_pairs
-
namespace: company.team
-
tasks:
-
\ - id: access_permissions
-
\ type: io.kestra.plugin.core.kv.Set
-
\ key: \"{{ task.id }}\"
-
\ kvType: JSON
-
\ value: |
-
\ [\"Admin\", \"Developer\", \"Editor\", \"Launcher\", \"Viewer\"]
-
-
+ \
\ - id: saas_applications
-
\ type: io.kestra.plugin.core.kv.Set
-
\ key: \"{{ task.id }}\"
-
\ kvType: JSON
-
\ value: |
-
\ [\"Slack\", \"Notion\", \"HubSpot\", \"GitHub\", \"Jira\"]
-
-
+ \
\ - id: development_tools
-
\ type: io.kestra.plugin.core.kv.Set
-
\ key: \"{{ task.id }}\"
-
\ kvType: JSON
-
\ value: |
-
\ [\"Cursor\", \"IntelliJ IDEA\", \"PyCharm Professional\",
\"Datagrip\"]
-
-
+ \
\ - id: cloud_vms
-
\ type: io.kestra.plugin.core.kv.Set
-
\ key: \"{{ task.id }}\"
-
\ kvType: JSON
-
\ value: |
-
\ {
-
\ \"AWS\": [\"t2.micro\", \"t2.small\", \"t2.medium\", \"t2.large\"],
-
\ \"GCP\": [\"f1-micro\", \"g1-small\", \"n1-standard-1\",
\"n1-standard-2\"],
-
\ \"Azure\": [\"Standard_B1s\", \"Standard_B1ms\", \"Standard_B2s\",
\"Standard_B2ms\"]
-
\ }
-
-
+ \
\ - id: cloud_regions
-
\ type: io.kestra.plugin.core.kv.Set
-
\ key: \"{{ task.id }}\"
-
\ kvType: JSON
-
\ value: |
-
\ {
-
\ \"AWS\": [\"us-east-1\", \"us-west-1\", \"us-west-2\",
\"eu-west-1\"],
-
\ \"GCP\": [\"us-central1\", \"us-east1\", \"us-west1\",
\"europe-west1\"],
-
\ \"Azure\": [\"eastus\", \"westus\", \"centralus\",
\"northcentralus\"]
-
\ }
-
```
-
\ "
tags:
- Inputs
diff --git a/retries.yaml b/retries.yaml
index a35552e..f95c35e 100644
--- a/retries.yaml
+++ b/retries.yaml
@@ -1,5 +1,6 @@
id: retries
namespace: company.team
+
tasks:
- id: fail_4_times
type: io.kestra.plugin.scripts.shell.Commands
@@ -13,10 +14,12 @@ tasks:
maxAttempt: 5
maxDuration: PT1M
warningOnRetry: false
+
errors:
- id: will_never_happen
type: io.kestra.plugin.core.debug.Return
format: This will never be executed as retries will fix the issue
+
extend:
title: Retry a failing task up to 4 times (allowing up to 5 attempts with up to
4 retries)
diff --git a/run-airflow-dag-from-kestra.yaml b/run-airflow-dag-from-kestra.yaml
index 1c8d54b..fb77736 100644
--- a/run-airflow-dag-from-kestra.yaml
+++ b/run-airflow-dag-from-kestra.yaml
@@ -1,5 +1,6 @@
id: run-airflow-dag-from-kestra
namespace: company.team
+
tasks:
- id: run_dag
type: io.kestra.plugin.airflow.dags.TriggerDagRun
@@ -17,6 +18,7 @@ tasks:
flow: "{{ flow.id }}"
task: "{{ task.id }}"
execution: "{{ execution.id }}"
+
extend:
title: Trigger an Apache Airflow DAG run from Kestra and wait for its completion
description: >-
@@ -24,11 +26,9 @@ extend:
waits for its completion. Under the hood, the plugin uses the Airflow REST
API to trigger the DAG run and check its status.
-
The `conf` field in the request body can be used to pass extra metadata
about the execution that triggered the Airflow DAG run.
-
The flow can be useful for users migrating to Kestra from Airflow or
orchestrating workflows across both platforms.
tags:
diff --git a/run-tasks-on-databricks.yaml b/run-tasks-on-databricks.yaml
index 5251c81..b0e7feb 100644
--- a/run-tasks-on-databricks.yaml
+++ b/run-tasks-on-databricks.yaml
@@ -1,5 +1,6 @@
id: run-tasks-on-databricks
namespace: company.team
+
tasks:
- id: submit_run
type: io.kestra.plugin.databricks.job.SubmitRun
@@ -13,9 +14,11 @@ tasks:
pythonFile: /Shared/hello.py
sparkPythonTaskSource: WORKSPACE
waitForCompletion: PT5M
+
- id: log_status
type: io.kestra.plugin.core.log.Log
message: The job finished, all done!
+
extend:
title: Execute a Spark or Python script on an existing Databricks cluster and
wait for its completion
diff --git a/s3-map-over-objects.yaml b/s3-map-over-objects.yaml
index 0bd482d..5530e66 100644
--- a/s3-map-over-objects.yaml
+++ b/s3-map-over-objects.yaml
@@ -1,9 +1,11 @@
id: s3-map-over-objects
namespace: company.team
+
inputs:
- id: bucket
type: STRING
defaults: declarative-data-orchestration
+
tasks:
- id: list_objects
type: io.kestra.plugin.aws.s3.List
@@ -12,24 +14,25 @@ tasks:
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
region: "{{ secret('AWS_DEFAULT_REGION') }}"
+
- id: print_objects
type: io.kestra.plugin.core.log.Log
- message: found objects {{ outputs.list_objects.objects }}
+ message: "Found objects {{ outputs.list_objects.objects }}"
+
- id: map_over_s3_objects
type: io.kestra.plugin.core.flow.EachParallel
value: "{{ outputs.list_objects.objects }}"
tasks:
- id: filename
type: io.kestra.plugin.core.log.Log
- message: filename {{ json(taskrun.value).key }} with size {{
- json(taskrun.value).size }}
+ message: "Filename {{ json(taskrun.value).key }} with size {{ json(taskrun.value).size }}"
+
extend:
title: List objects in an S3 bucket and process them in parallel
description: >
This flow lists objects with a specific prefix in an S3 bucket and then
processes each object in parallel.
-
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`.
tags:
diff --git a/s3-parallel-uploads.yaml b/s3-parallel-uploads.yaml
index cea6e09..bc63c06 100644
--- a/s3-parallel-uploads.yaml
+++ b/s3-parallel-uploads.yaml
@@ -1,17 +1,21 @@
id: s3-parallel-uploads
namespace: company.team
+
inputs:
- id: bucket
type: STRING
defaults: declarative-data-orchestration
+
tasks:
- id: get_zip_file
type: io.kestra.plugin.core.http.Download
uri: https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip
+
- id: unzip
type: io.kestra.plugin.compress.ArchiveDecompress
algorithm: ZIP
from: "{{ outputs.get_zip_file.uri }}"
+
- id: parallel_upload_to_s3
type: io.kestra.plugin.core.flow.Parallel
tasks:
@@ -19,16 +23,17 @@ tasks:
type: io.kestra.plugin.aws.s3.Upload
from: "{{ outputs.unzip.files['global_power_plant_database.csv'] }}"
key: powerplant/global_power_plant_database.csv
+
- id: pdf
type: io.kestra.plugin.aws.s3.Upload
- from: "{{
- outputs.unzip.files['Estimating_Power_Plant_Generation_in_the_Global_P\
- ower_Plant_Database.pdf'] }}"
+ from: "{{ outputs.unzip.files['Estimating_Power_Plant_Generation_in_the_Global_Power_Plant_Database.pdf'] }}"
key: powerplant/Estimating_Power_Plant_Generation_in_the_Global_Power_Plant_Database.pdf
+
- id: txt
type: io.kestra.plugin.aws.s3.Upload
from: "{{ outputs.unzip.files['RELEASE_NOTES.txt'] }}"
key: powerplant/RELEASE_NOTES.txt
+
pluginDefaults:
- type: io.kestra.plugin.aws.s3.Upload
values:
@@ -36,19 +41,17 @@ pluginDefaults:
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
region: "{{ secret('AWS_DEFAULT_REGION') }}"
bucket: "{{ inputs.bucket }}"
+
extend:
title: Download a zip file, unzip it and upload all files in parallel to AWS S3
- using pluginDefaults to avoid boilerplate code
description: >-
- This flow downloads a zip file, unzips it, and uploads all files to S3 in
- parallel.
-
+ This flow downloads a zip file, unzips it, and uploads all files to S3 in parallel.
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION` configured using
`pluginDefaults` to avoid boilerplate configuration.
-
The flow does not create the S3 bucket, and assumes that the bucket name
provided in the inputs already exists in the `AWS_DEFAULT_REGION`.
tags:
diff --git a/s3-trigger-duckdb.yaml b/s3-trigger-duckdb.yaml
index 35bc54a..799cc62 100644
--- a/s3-trigger-duckdb.yaml
+++ b/s3-trigger-duckdb.yaml
@@ -1,35 +1,32 @@
id: s3-trigger-duckdb
namespace: company.team
+
variables:
bucket: kestraio
source_prefix: monthly_orders
destination_prefix: stage_orders
+
tasks:
- id: query
type: io.kestra.plugin.jdbc.duckdb.Query
description: Validate new file for anomalies
sql: >
INSTALL httpfs;
-
LOAD httpfs;
-
SET s3_region='{{ secret('AWS_DEFAULT_REGION') }}';
-
SET s3_access_key_id='{{ secret('AWS_ACCESS_KEY_ID') }}';
-
SET s3_secret_access_key='{{ secret('AWS_SECRET_ACCESS_KEY') }}';
-
SELECT *
-
FROM read_csv_auto('s3://{{ vars.bucket }}/{{ vars.destination_prefix
}}/{{ trigger.objects | jq('.[].key') | first }}')
-
WHERE price * quantity != total;
- store: true
+ fetchType: STORE
+
- id: csv
type: io.kestra.plugin.serdes.csv.IonToCsv
description: Create CSV file from query results
from: "{{ outputs.query.uri }}"
+
- id: if_anomalies_detected
type: io.kestra.plugin.core.flow.If
condition: "{{ outputs.query.size }}"
@@ -58,6 +55,7 @@ tasks:
Best regards,
Data Team
+
triggers:
- id: poll_for_new_s3_files
type: io.kestra.plugin.aws.s3.Trigger
@@ -72,6 +70,7 @@ triggers:
region: "{{ secret('AWS_DEFAULT_REGION') }}"
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
+
extend:
title: Anomaly detection using DuckDB SQL query and S3 file event trigger,
sending a CSV file attachment via email if anomalies are detected
@@ -79,24 +78,20 @@ extend:
This flow will be triggered any time a new file arrives in a given S3
`bucket` and `source_prefix` folder.
-
The flow will check for anomalies in the data from that file using a DuckDB
query, and will move the file to the same S3 bucket below the
`destination_prefix` folder.
-
If anomalies are detected, the flow will send an email to the recipients
specified on the `to` property, and will send anomalous rows as a CSV file
attachment in the same email.
-
If you use [MotherDuck](https://motherduck.com/), use Kestra Secret to store
the [MotherDuck service
token](https://motherduck.com/docs/authenticating-to-motherduck). Then,
modify the `query` task as follows to point the task to your MotherDuck
database:
-
```yaml
- id: query
type: io.kestra.plugin.jdbc.duckdb.Query
@@ -105,10 +100,9 @@ extend:
SELECT *
FROM read_csv_auto('s3://{{ vars.bucket }}/{{ vars.destination_prefix }}/{{ trigger.objects | jq('.[].key') | first }}')
WHERE price * quantity != total;
- store: true
+ fetchType: STORE
url: "jdbc:duckdb:md:my_db?motherduck_token={{ secret('MOTHERDUCK_TOKEN') }}"
```
-
tags:
- S3
- Trigger
diff --git a/s3-trigger-python.yaml b/s3-trigger-python.yaml
index a099e99..8369a32 100644
--- a/s3-trigger-python.yaml
+++ b/s3-trigger-python.yaml
@@ -33,6 +33,7 @@ triggers:
moveTo:
key: archive/
maxKeys: 1
+
extend:
title: "Detect New Files in S3 and process them in Python"
description: >-
@@ -44,11 +45,9 @@ extend:
The Python code will read the file as an `inputFile` called `input.csv`
and processing it to generate a new file called `data.csv`.
-
It's recommended to set the `accessKeyId` and `secretKeyId` properties as
secrets.
-
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY`.
tags:
diff --git a/s3-trigger.yaml b/s3-trigger.yaml
index 279a751..465056f 100644
--- a/s3-trigger.yaml
+++ b/s3-trigger.yaml
@@ -1,5 +1,6 @@
id: s3-trigger
namespace: company.team
+
tasks:
- id: each
type: io.kestra.plugin.core.flow.EachParallel
@@ -8,6 +9,7 @@ tasks:
type: io.kestra.plugin.core.debug.Return
format: "{{ taskrun.value }}"
value: "{{ trigger.objects | jq('.[].uri') }}"
+
triggers:
- id: wait_for_s3_object
type: io.kestra.plugin.aws.s3.Trigger
@@ -21,6 +23,7 @@ triggers:
region: "{{ secret('AWS_DEFAULT_REGION') }}"
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
+
extend:
title: "AWS S3 Event Trigger "
description: >-
@@ -29,13 +32,9 @@ extend:
internal storage and move the S3 objects to an `archive` folder (i.e. S3
object prefix with the name `archive`).
-
The `EachParallel` task will iterate over the objects and print their URIs.
-
- It's recommended to set the `accessKeyId` and `secretKeyId` properties as
- secrets.
-
+ It's recommended to set the `accessKeyId` and `secretKeyId` properties as secrets.
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`.
diff --git a/scan-dynamodb-table.yaml b/scan-dynamodb-table.yaml
index 7bceaeb..9480736 100644
--- a/scan-dynamodb-table.yaml
+++ b/scan-dynamodb-table.yaml
@@ -1,5 +1,6 @@
id: scan-dynamodb-table
namespace: company.team
+
tasks:
- id: extract_data
type: io.kestra.plugin.aws.dynamodb.Scan
@@ -8,25 +9,23 @@ tasks:
region: "{{ secret('AWS_DEFAULT_REGION') }}"
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
+
- id: process_data
type: io.kestra.plugin.scripts.shell.Commands
taskRunner:
type: io.kestra.plugin.core.runner.Process
commands:
- echo {{ outputs.extract_data.rows }}
+
extend:
title: Extract and process data from DynamoDB
description: >-
This flow scans a DynamoDB table and outputs the extracted data as a JSON
string. The subsequent task processes that data.
-
The `tableName` property must point to an already existing DynamoDB table.
-
- It's recommended to set the `accessKeyId` and `secretKeyId` properties as
- secrets.
-
+ It's recommended to set the `accessKeyId` and `secretKeyId` properties as secrets.
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`.
diff --git a/send-email-with-attachment.yaml b/send-email-with-attachment.yaml
index 47d6e49..0e24980 100644
--- a/send-email-with-attachment.yaml
+++ b/send-email-with-attachment.yaml
@@ -1,9 +1,11 @@
id: send-email-with-attachment
namespace: company.team
+
tasks:
- id: dataset1
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/products.csv
+
- id: send_email
type: io.kestra.plugin.notifications.mail.MailSend
from: onboardin@resend.dev
@@ -17,6 +19,7 @@ tasks:
- name: data.csv
uri: "{{ outputs.dataset1.uri }}"
htmlTextContent: Please find attached your dataset as a CSV filie
+
extend:
title: Send an email and corresponding attachments with Resend
description: >-
diff --git a/send-sms.yaml b/send-sms.yaml
index e39acd6..e8b53d7 100644
--- a/send-sms.yaml
+++ b/send-sms.yaml
@@ -1,9 +1,11 @@
id: send-sms
namespace: company.team
+
inputs:
- id: sms_text
type: STRING
defaults: Hello from Kestra and AWS SNS!
+
tasks:
- id: send_sms
type: io.kestra.plugin.aws.sns.Publish
@@ -14,6 +16,7 @@ tasks:
from:
data: |
{{ inputs.sms_text }}
+
extend:
title: Send an SMS message using AWS SNS based on a runtime-specific input
description: >-
@@ -21,7 +24,6 @@ extend:
number must be registered when creating an AWS SNS topic. You can override
the SMS message text at runtime by leveraging the input argument `sms_text`.
-
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`.
tags:
diff --git a/sengrid-notify-on-failure.yaml b/sengrid-notify-on-failure.yaml
index caaff8c..4e3f90e 100644
--- a/sengrid-notify-on-failure.yaml
+++ b/sengrid-notify-on-failure.yaml
@@ -1,5 +1,6 @@
id: sengrid-notify-on-failure
namespace: company.team
+
tasks:
- id: send_email_notification
type: io.kestra.plugin.notifications.sendgrid.SendGridMailExecution
@@ -10,6 +11,7 @@ tasks:
taskrun.startDate }}"
sendgridApiKey: "{{ secret('SENDGRID_API_KEY') }}"
executionId: "{{ trigger.executionId }}"
+
triggers:
- id: on_failure
type: io.kestra.plugin.core.trigger.Flow
@@ -21,40 +23,31 @@ triggers:
- type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
namespace: company
comparison: PREFIX
+
extend:
title: Send an SendGrid email notification when a workflow fails
description: >-
This system flow will send an SendGrid email notification anytime a workflow
in a `company` namespace (or any nested child namespace) fails.
-
Using this pattern, you can send email notifications for Kestra workflow
execution failures alongside other notifications.
-
You can customize that system flow by modifying the task, adding more tasks
to the flow or adjusting the trigger conditions. Read more about that
- pattern in the [Administrator
- Guide](https://kestra.io/docs/administrator-guide/monitoring).
-
-
- Let's create a flow in the namespace with prefix `company` that will always
- fail.
+ pattern in the [Administrator Guide](https://kestra.io/docs/administrator-guide/monitoring).
+ Let's create a flow in the namespace with prefix `company` that will always fail.
```yaml
-
id: failure_flow
-
namespace: company.team
-
tasks:
- id: always_fails
type: io.kestra.plugin.core.execution.Fail
```
-
Whenever you run the `failure_flow`, it will trigger an execution of the
`sengrid_notify_on_failure` flow. As a result, an email notification will be
created using SendGrid so that prompt action can be taken.
diff --git a/sensitive-data.yaml b/sensitive-data.yaml
index 41db322..4932fca 100644
--- a/sensitive-data.yaml
+++ b/sensitive-data.yaml
@@ -1,9 +1,11 @@
id: sensitive-data
namespace: company.team
+
tasks:
- id: extract
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/orders.csv
+
- id: transform
type: io.kestra.plugin.jdbc.duckdb.Query
inputFiles:
@@ -24,6 +26,7 @@ tasks:
DELIMITER ',');
outputFiles:
- "*.csv"
+
- id: load
type: io.kestra.plugin.gcp.bigquery.Load
from: "{{ outputs.transform.outputFiles.csv }}"
@@ -34,12 +37,12 @@ tasks:
autodetect: true
csvOptions:
fieldDelimiter: ","
+
extend:
title: Extract data, mask sensitive columns using DuckDB and load it to BigQuery
description: >-
This flow has three tasks: `extract`, `transform` and `load`.
-
1. The `extract` task here is a simple HTTP Download task, but you can
replace it with any task or custom script that extracts data.
@@ -49,13 +52,11 @@ extend:
3. The `load` task loads that extracted and transformed data to BigQuery.
-
If you use [MotherDuck](https://motherduck.com/), use Kestra Secret to store
the [MotherDuck service
token](https://motherduck.com/docs/authenticating-to-motherduck). Then, add
the `url` property to point the task to your MotherDuck database.
-
```yaml
- id: transform
type: io.kestra.plugin.jdbc.duckdb.Query
diff --git a/sentry-alert.yaml b/sentry-alert.yaml
index 4761d47..4c762d4 100644
--- a/sentry-alert.yaml
+++ b/sentry-alert.yaml
@@ -1,8 +1,10 @@
id: sentry-alert
namespace: company.team
+
tasks:
- id: fail
type: io.kestra.plugin.core.execution.Fail
+
errors:
- id: alert_on_failure
type: io.kestra.plugin.notifications.sentry.SentryAlert
@@ -21,31 +23,28 @@ errors:
"Link": "http://localhost:8080/ui/executions/{{flow.namespace}}/{{flow.id}}/{{execution.id}}"
}
}
+
extend:
title: Send an alert to Sentry when a flow fails
description: >-
This flow shows how to send an alert to Sentry when a flow fails.
-
The only required input is a DSN string value, which you can find when you
go to your Sentry project settings and go to the section "Client Keys
(DSN)". You can find more detailed description of how to find your DSN in
the [following Sentry
documentation](https://docs.sentry.io/product/sentry-basics/concepts/dsn-explainer/#where-to-find-your-dsn).
-
You can customize the alert `payload`, which is a JSON object, or you can
skip it and use the default payload created by kestra. For more information
about the payload, check the [Sentry Event Payloads
documentation](https://develop.sentry.dev/sdk/event-payloads/).
-
The `event_id` is an optional payload attribute that you can use to override
the default event ID. If you don't specify it (recommended), kestra will
generate a random UUID. You can use this attribute to group events together,
but note that this must be a UUID type. For more information, check the
- [Sentry
- documentation](https://docs.sentry.io/product/issues/grouping-and-fingerprints/).
+ [Sentry documentation](https://docs.sentry.io/product/issues/grouping-and-fingerprints/).
tags:
- Notifications
ee: false
diff --git a/shell-scripts.yaml b/shell-scripts.yaml
index b0ad121..c6ce2b1 100644
--- a/shell-scripts.yaml
+++ b/shell-scripts.yaml
@@ -1,5 +1,6 @@
id: shell-scripts
namespace: company.team
+
tasks:
- id: working_directory
type: io.kestra.plugin.core.flow.WorkingDirectory
@@ -15,12 +16,14 @@ tasks:
do
echo "$i,$RANDOM,$RANDOM" >> file.csv
done
+
- id: inspect_file
type: io.kestra.plugin.scripts.shell.Commands
taskRunner:
type: io.kestra.plugin.core.runner.Process
commands:
- cat file.csv
+
- id: filter_file
type: io.kestra.plugin.scripts.shell.Commands
description: select only the first five rows of the second column
@@ -28,6 +31,7 @@ tasks:
type: io.kestra.plugin.core.runner.Process
commands:
- cut -d ',' -f 2 file.csv | head -n 6
+
extend:
title: Run Shell Scripts and Shell commands in a working directory using a
Process Task Runner
diff --git a/slack-failure-alert.yaml b/slack-failure-alert.yaml
index 5f18e57..179b559 100644
--- a/slack-failure-alert.yaml
+++ b/slack-failure-alert.yaml
@@ -1,11 +1,13 @@
id: slack-failure-alert
namespace: company.monitoring
+
tasks:
- id: send
type: io.kestra.plugin.notifications.slack.SlackExecution
url: "{{ secret('SLACK_WEBHOOK') }}"
channel: "#general"
executionId: "{{ trigger.executionId }}"
+
triggers:
- id: listen
type: io.kestra.plugin.core.trigger.Flow
@@ -17,6 +19,7 @@ triggers:
- type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
namespace: company.analytics
prefix: true
+
extend:
title: Failure notifications to Slack to monitor the health of production workflows
description: >-
@@ -24,7 +27,6 @@ extend:
namespace finishes with errors or warnings. Thanks to the `executionId`
variable, the alert includes a link to the failed flow's execution page.
-
Given that this flow runs on a Flow trigger, there is no need for
boilerplate code to define alert logic in each flow separately. Instead, the
Flow trigger allows you to define that logic only once. The trigger will
@@ -32,7 +34,6 @@ extend:
namespace, including all child namespaces, and will automatically send Slack
messages on failure.
-
This flow assumes that you stored the Slack webhook URL as a secret.
tags:
- Notifications
diff --git a/slack-incoming-webhook.yaml b/slack-incoming-webhook.yaml
index 59ee115..27b9814 100644
--- a/slack-incoming-webhook.yaml
+++ b/slack-incoming-webhook.yaml
@@ -1,5 +1,6 @@
id: slack-incoming-webhook
namespace: company.team
+
tasks:
- id: slack
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
@@ -9,10 +10,10 @@ tasks:
"channel": "#alerts",
"text": "Flow {{ flow.namespace }}.{{ flow.id }} started with execution {{ execution.id }}"
}
+
extend:
title: Send a Slack message via incoming webhook
- description: Send messages through [Slack Incoming
- Webhook](https://api.slack.com/messaging/webhooks).
+ description: Send messages through [Slack Incoming Webhook](https://api.slack.com/messaging/webhooks).
tags:
- Notifications
- Software Engineering
diff --git a/snowflake-query.yaml b/snowflake-query.yaml
index b4cf9f1..dc5b929 100644
--- a/snowflake-query.yaml
+++ b/snowflake-query.yaml
@@ -1,23 +1,24 @@
id: snowflake-query
namespace: company.team
+
tasks:
- id: query
type: io.kestra.plugin.jdbc.snowflake.Query
url: jdbc:snowflake://accountID.snowflakecomputing.com?warehouse=COMPUTE_WH
username: yourSnowflakeUser
password: "{{ secret('SNOWFLAKE_PASSWORD') }}"
- fetchOne: true
+ fetchType: FETCH_ONE
sql: |
SELECT * FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.CUSTOMER
+
extend:
title: Query data in Snowflake
description: >-
- This flow runs a query within a Snowflake data warehouse. The `fetchOne`
- property will retrieve only the first row, while using the `fetch` property
- will retrieve all rows. Setting `store: true` will provide the results as a
+ This flow runs a query within a Snowflake data warehouse. The `fetchType`
+ property value `FETCH_ONE` will retrieve only the first row, while using the `FETCH` value
+ will retrieve all rows. Setting `fetchType: STORE` will provide the results as a
downloadable file.
-
The flow assumes the password is stored as a secret.
tags:
- Snowflake
diff --git a/snowflake.yaml b/snowflake.yaml
index 783122c..d531aa4 100644
--- a/snowflake.yaml
+++ b/snowflake.yaml
@@ -1,9 +1,11 @@
id: snowflake
namespace: company.team
+
tasks:
- id: create_database
type: io.kestra.plugin.jdbc.snowflake.Query
sql: CREATE OR REPLACE DATABASE kestra;
+
- id: create_table
type: io.kestra.plugin.jdbc.snowflake.Query
sql: |
@@ -15,9 +17,11 @@ tasks:
city STRING ,
start_date DATE
);
+
- id: extract
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/employees/employees00.csv
+
- id: load_to_internal_stage
type: io.kestra.plugin.jdbc.snowflake.Upload
from: "{{ outputs.extract.uri }}"
@@ -25,46 +29,43 @@ tasks:
prefix: raw
stageName: "@kestra.public.%employees"
compress: true
+
- id: load_from_stage_to_table
type: io.kestra.plugin.jdbc.snowflake.Query
sql: >
COPY INTO KESTRA.PUBLIC.EMPLOYEES
-
FROM @kestra.public.%employees
-
- FILE_FORMAT = (type = csv field_optionally_enclosed_by='"' skip_header =
- 1)
-
+ FILE_FORMAT = (type = csv field_optionally_enclosed_by='"' skip_header = 1)
PATTERN = '.*employees0[0-9].csv.gz'
-
ON_ERROR = 'skip_file';
+
- id: analyze
type: io.kestra.plugin.jdbc.snowflake.Query
description: Growth of new hires per month
sql: >
- SELECT year(START_DATE) as year, monthname(START_DATE) as month, count(*)
- as nr_employees
-
+ SELECT year(START_DATE) as year, monthname(START_DATE) as month, count(*) as nr_employees
FROM kestra.public.EMPLOYEES
-
GROUP BY year(START_DATE), monthname(START_DATE)
-
ORDER BY nr_employees desc;
- store: true
+ fetchType: STORE
+
- id: csv_report
type: io.kestra.plugin.serdes.csv.IonToCsv
from: "{{ outputs.analyze.uri }}"
+
pluginDefaults:
- type: io.kestra.plugin.jdbc.snowflake.Query
values:
url: jdbc:snowflake://accountID.snowflakecomputing.com?warehouse=COMPUTE_WH
username: yourSnowflakeUser
password: "{{ secret('SNOWFLAKE_PASSWORD') }}"
+
- type: io.kestra.plugin.jdbc.snowflake.Upload
values:
url: jdbc:snowflake://accountID.snowflakecomputing.com?warehouse=COMPUTE_WH
username: yourSnowflakeUser
password: "{{ secret('SNOWFLAKE_PASSWORD') }}"
+
extend:
title: "Snowflake ETL: load files to internal stage, copy from stage to a table
and run analytical SQL queries "
@@ -73,10 +74,8 @@ extend:
and a table. It then extracts data from an external source, and loads that
data as a CSV file into Snowflake's internal stage.
-
The CSV file uploaded to stage is then loaded into the table.
-
Finally, we do some analytics by aggregating (imaginary) new hires at Kestra
over time. The final result is fetched into the Kestra's internal storage
and converted to a CSV file that you can download from the Outputs tab on
diff --git a/sqlmesh.yaml b/sqlmesh.yaml
index a2ee1a8..6beeb30 100644
--- a/sqlmesh.yaml
+++ b/sqlmesh.yaml
@@ -1,6 +1,7 @@
id: sqlmesh
namespace: company.team
description: Clone SQLMesh project and run the project, and query with DuckDB
+
tasks:
- id: working_dir
type: io.kestra.plugin.core.flow.WorkingDirectory
@@ -9,6 +10,7 @@ tasks:
type: io.kestra.plugin.git.Clone
url: https://github.com/TobikoData/sqlmesh-examples.git
branch: main
+
- id: sqlmesh
type: io.kestra.plugin.sqlmesh.cli.SQLMeshCLI
beforeCommands:
@@ -17,16 +19,16 @@ tasks:
- sqlmesh plan --auto-apply
outputFiles:
- 001_sushi/1_simple/db/sushi-example.db
+
- id: query
type: io.kestra.plugin.jdbc.duckdb.Query
inputFiles:
- data.db: "{{
- outputs.sqlmesh.outputFiles['001_sushi/1_simple/db/sushi-example.db']
- }}"
+ data.db: "{{ outputs.sqlmesh.outputFiles['001_sushi/1_simple/db/sushi-example.db'] }}"
sql: |
ATTACH '{{ workingDir }}/data.db';
SELECT * FROM sushisimple.top_waiters;
- store: true
+ fetchType: STORE
+
extend:
title: Orchestrate SQLMesh and DuckDB engine
description: This blueprint shows how you can pull a SQLMesh project from a Git
diff --git a/sqs-publish-message.yaml b/sqs-publish-message.yaml
index a4b7f00..790c8aa 100644
--- a/sqs-publish-message.yaml
+++ b/sqs-publish-message.yaml
@@ -1,9 +1,11 @@
id: sqs-publish-message
namespace: company.team
+
inputs:
- id: message
type: STRING
defaults: Hi from Kestra!
+
tasks:
- id: publish_message
type: io.kestra.plugin.aws.sqs.Publish
@@ -13,13 +15,13 @@ tasks:
queueUrl: https://sqs.eu-central-1.amazonaws.com/123456789/kestra
from:
data: "{{ inputs.message }}"
+
extend:
title: Publish a message to an SQS queue
description: >-
This flow publishes a message to an SQS queue. The queue URL points to an
already existing queue.
-
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`.
tags:
diff --git a/sqs-realtime-trigger.yaml b/sqs-realtime-trigger.yaml
index 7e63ca4..373d7e8 100644
--- a/sqs-realtime-trigger.yaml
+++ b/sqs-realtime-trigger.yaml
@@ -1,5 +1,6 @@
id: sqs-realtime-trigger
namespace: company.team
+
tasks:
- id: insert_into_dynamoDB
type: io.kestra.plugin.aws.dynamodb.PutItem
@@ -15,6 +16,7 @@ tasks:
price: "{{ trigger.data | jq('.price') | first }}"
quantity: "{{ trigger.data | jq('.quantity') | first }}"
total: "{{ trigger.data | jq('.total') | first }}"
+
triggers:
- id: realtime_trigger
type: io.kestra.plugin.aws.sqs.RealtimeTrigger
@@ -23,41 +25,33 @@ triggers:
region: eu-central-1
queueUrl: https://sqs.eu-central-1.amazonaws.com/000000000000/orders
serdeType: JSON
+
extend:
title: Use AWS SQS Realtime Trigger to push events into DynamoDB
description: >-
This flow will:
-
- 1. Get
- [triggered](https://kestra.io/plugins/plugin-aws/triggers/io.kestra.plugin.aws.sqs.realtimetrigger)
+ 1. Get [triggered](https://kestra.io/plugins/plugin-aws/triggers/io.kestra.plugin.aws.sqs.realtimetrigger)
every time the event lands in the AWS SQS queue
2. The flow will push the data into a table in DynamoDB
-
For this, create a SQS queue named `orders`. We will be producing JSON
messages into the queue generated from the
[orders.csv](https://huggingface.co/datasets/kestra/datasets/raw/main/csv/orders.csv).
One sample produced message can be:
-
```
-
{"order_id": "1", "customer_name": "Kelly Olsen", "customer_email":
"jenniferschneider@example.com", "product_id": "20", "price": "166.89",
"quantity": "1", "total": "166.89"}
-
```
-
Create `orders` table in the DynamoDB.
-
We get the AWS access key and secret key from the secrets
`AWS_ACCESS_KEY_ID` and `AWS_SECRET_KEY_ID`.
-
When you produce the message onto SQS queue, the flow will get triggered,
and you can see that a corresponding new records gets into the DynamoDB
table.
diff --git a/start-job-on-existing-cluster.yaml b/start-job-on-existing-cluster.yaml
index 515efa9..53e8594 100644
--- a/start-job-on-existing-cluster.yaml
+++ b/start-job-on-existing-cluster.yaml
@@ -1,5 +1,6 @@
id: start-job-on-existing-cluster
namespace: company.team
+
tasks:
- id: create_job
type: io.kestra.plugin.databricks.job.CreateJob
@@ -13,9 +14,11 @@ tasks:
pythonFile: /Shared/hello.py
sparkPythonTaskSource: WORKSPACE
waitForCompletion: PT5H
+
- id: log_status
type: io.kestra.plugin.core.log.Log
message: The job finished, all done!
+
extend:
title: Create a Spark job on a Databricks cluster and wait for its completion
description: This flow will start a job on an existing Databricks cluster. The
diff --git a/subflow-for-each-value.yaml b/subflow-for-each-value.yaml
index 447f4ba..020ca32 100644
--- a/subflow-for-each-value.yaml
+++ b/subflow-for-each-value.yaml
@@ -1,5 +1,6 @@
id: subflow-for-each-value
namespace: company.team
+
tasks:
- id: parallel
type: io.kestra.plugin.core.flow.ForEach
@@ -12,20 +13,17 @@ tasks:
namespace: company.team
inputs:
my_input: "{{ taskrun.value }}"
+
extend:
title: Run a subflow for each value in parallel and wait for their completion —
recommended pattern to iterate over hundreds or thousands of list items
description: >-
First, create the following flow that we'll use as a parametrized subflow:
-
```yaml
-
id: my_subflow
-
namespace: company.team
-
inputs:
- id: my_input
type: STRING
@@ -37,17 +35,14 @@ extend:
format: hi from {{ flow.id }} using input {{ inputs.my_input }}
```
-
Then, create the flow `subflow_for_each_value`.
-
This flow will trigger multiple executions of the flow `my_subflow`. Each
execution will be triggered in parallel using input from the list of values.
In this example, you should see three executions of the subflow, one with
the input user1, another with the input user2 and yet another execution with
the input user3.
-
This pattern is particularly useful if the list of values you iterate over
is large. As explained in the [Flow best practices
documentation](https://kestra.io/docs/developer-guide/best-practice), it's
diff --git a/surreal-db-slack.yaml b/surreal-db-slack.yaml
index bab7f86..dd0586b 100644
--- a/surreal-db-slack.yaml
+++ b/surreal-db-slack.yaml
@@ -1,5 +1,6 @@
id: surreal-db-slack
namespace: company.team
+
tasks:
- id: company
type: io.kestra.plugin.surrealdb.Query
@@ -7,9 +8,11 @@ tasks:
CREATE company SET
name = 'Kestra',
created_at = time::now()
+
- id: delete_anna
type: io.kestra.plugin.surrealdb.Query
query: DELETE author:anna;
+
- id: add_author_tbl
type: io.kestra.plugin.surrealdb.Query
disabled: true
@@ -19,29 +22,30 @@ tasks:
name.last = 'Geller',
name.full = string::join(' ', name.first, name.last),
admin = true
+
- id: fix_admin_permission
type: io.kestra.plugin.surrealdb.Query
query: UPDATE author:anna SET admin = false WHERE name.last = 'Geller';
+
- id: create_article_tbl
type: io.kestra.plugin.surrealdb.Query
query: >
CREATE article SET
-
created_at = time::now(),
-
author = author:anna,
-
title = 'Kestra 0.12 simplifies building modular, event-driven and
containerized workflows',
-
company = (SELECT VALUE id FROM company WHERE name = 'Kestra' LIMIT 1)[0]
+
- id: query
type: io.kestra.plugin.surrealdb.Query
query: SELECT title FROM article;
fetchType: FETCH_ONE
+
- id: log_query_results
type: io.kestra.plugin.core.log.Log
message: "{{ outputs.query.row }}"
+
- id: slack
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
url: "{{ secret('SLACK_WEBHOOK') }}"
@@ -50,6 +54,7 @@ tasks:
"channel": "#general",
"text": "{{ outputs.query.row.title }}"
}
+
pluginDefaults:
- type: io.kestra.plugin.surrealdb.Query
values:
@@ -58,6 +63,7 @@ pluginDefaults:
namespace: test
username: root
password: root
+
extend:
title: "CRUD operations in SurrealQL: run multiple SurrealDB queries and send
the query results via Slack"
@@ -66,7 +72,6 @@ extend:
as well as insert, update and delete data. The flow parses the final query
result and sends it in a Slack message.
-
This flow assumes that the Slack Incoming Webhook URL is stored as a secret
named `SLACK_WEBHOOK`.
tags: []
diff --git a/surreal-db.yaml b/surreal-db.yaml
index 9a48880..1f2db89 100644
--- a/surreal-db.yaml
+++ b/surreal-db.yaml
@@ -1,5 +1,6 @@
id: surreal-db
namespace: company.team
+
tasks:
- id: article
type: io.kestra.plugin.surrealdb.Query
@@ -11,6 +12,7 @@ tasks:
text = 'Donec eleifend, nunc vitae commodo accumsan, mauris est fringilla.',
account = (SELECT VALUE id FROM account WHERE name = 'ACME Inc' LIMIT 1)[0]
;
+
- id: account
type: io.kestra.plugin.surrealdb.Query
query: |
@@ -18,14 +20,17 @@ tasks:
name = 'ACME Inc',
created_at = time::now()
;
+
- id: query
type: io.kestra.plugin.surrealdb.Query
query: SELECT * FROM article, account;
fetchType: STORE
+
- id: query_condition
type: io.kestra.plugin.surrealdb.Query
query: SELECT * FROM article WHERE author.age < 30 FETCH author, account;
fetchType: STORE
+
pluginDefaults:
- type: io.kestra.plugin.surrealdb.Query
values:
@@ -34,6 +39,7 @@ pluginDefaults:
namespace: test
username: root
password: root
+
extend:
title: Query SurrealDB and store the result as a downloadable artifact
description: >-
@@ -41,7 +47,6 @@ extend:
storage. The result of the query can be used by other tasks in the same flow
using the syntax `{{ outputs.query.uri }}`.
-
To install and run SurrealDB, follow the instructions in the [SurrealDB
documentation](https://surrealdb.com/docs/introduction/start).
tags: []
diff --git a/switch.yaml b/switch.yaml
index 64f25b6..02bd6de 100644
--- a/switch.yaml
+++ b/switch.yaml
@@ -1,8 +1,10 @@
id: switch
namespace: company.team
+
inputs:
- id: string
type: STRING
+
tasks:
- id: switch
type: io.kestra.plugin.core.flow.Switch
@@ -20,6 +22,7 @@ tasks:
- id: default
type: io.kestra.plugin.core.debug.Return
format: This is the default case
+
extend:
title: Switch tasks depending on a specific value
description: The `switch` task will drive the flow depending of the input value.
diff --git a/sync-from-git.yaml b/sync-from-git.yaml
index 68bc6e6..5efc13b 100644
--- a/sync-from-git.yaml
+++ b/sync-from-git.yaml
@@ -29,12 +29,12 @@ triggers:
- id: every_full_hour
type: io.kestra.plugin.core.trigger.Schedule
cron: "*/15 * * * *"
+
extend:
title: Sync code from Git at regular intervals
description: >-
This flow will sync code from Git every 15 minutes.
-
We will be using SyncFlows and SyncNamespaceFiles task to sync flows and
namespace files respectively.
tags:
diff --git a/task-outputs.yaml b/task-outputs.yaml
index 5b49267..5e65311 100644
--- a/task-outputs.yaml
+++ b/task-outputs.yaml
@@ -1,12 +1,15 @@
id: task-outputs
namespace: company.team
+
tasks:
- id: task1
type: io.kestra.plugin.core.debug.Return
format: Hello
+
- id: task2
type: io.kestra.plugin.core.log.Log
message: "{{ outputs.task1.value }} World!"
+
extend:
title: Task outputs
description: This blueprint shows how to use outputs from one task to another.
diff --git a/taxi-trip-data.yaml b/taxi-trip-data.yaml
index 2b9e475..d43fc8b 100644
--- a/taxi-trip-data.yaml
+++ b/taxi-trip-data.yaml
@@ -1,11 +1,13 @@
id: taxi-trip-data
namespace: company.team
+
tasks:
- id: log
type: io.kestra.plugin.core.log.Log
message: running backfill for file
https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_{{
trigger.date ?? execution.startDate | date("yyyy-MM") }}.parquet
+
triggers:
- id: schedule
type: io.kestra.plugin.core.trigger.Schedule
@@ -13,13 +15,13 @@ triggers:
timezone: US/Eastern
backfill:
start: 2023-01-01T00:00:00Z
+
extend:
title: Backfill a flow for the Yellow Taxi Trip dataset
description: >-
To backfill a flow, add a `start` date from which Kestra should backfill the
past runs before running the regular schedule.
-
The expression `{{ trigger.date ?? execution.startDate | date("yyyy-MM") }}`
will always give you the execution date regardless of whether you triggered
the flow ad-hoc or whether it ran based on a schedule. The date function
diff --git a/telegram-notify-on-failure.yaml b/telegram-notify-on-failure.yaml
index ad8d625..0bcfe28 100644
--- a/telegram-notify-on-failure.yaml
+++ b/telegram-notify-on-failure.yaml
@@ -1,13 +1,14 @@
id: telegram-notify-on-failure
namespace: company.team
+
tasks:
- id: send_notification
type: io.kestra.plugin.notifications.telegram.TelegramExecution
token: "{{ secret('TELEGRAM_TOKEN') }}"
channel: "2072728690"
- payload: "Kestra Workflow Failure: {{ trigger.executionId }} has failed on {{
- taskrun.startDate }}"
+ payload: "Kestra Workflow Failure: {{ trigger.executionId }} has failed on {{ taskrun.startDate }}"
executionId: "{{ trigger.executionId }}"
+
triggers:
- id: on_failure
type: io.kestra.plugin.core.trigger.Flow
@@ -19,40 +20,31 @@ triggers:
- type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
namespace: company
comparison: PREFIX
+
extend:
title: Send a Telegram notification when a workflow fails
description: >-
This system flow will send a Telegram notification anytime a workflow in a
`company` namespace (or any nested child namespace) fails.
-
Using this pattern, you can send Telegram notifications for Kestra workflow
execution failures alongside other notifications.
-
You can customize that system flow by modifying the task, adding more tasks
to the flow or adjusting the trigger conditions. Read more about that
- pattern in the [Administrator
- Guide](https://kestra.io/docs/administrator-guide/monitoring).
-
-
- Let's create a flow in the namespace with prefix `company` that will always
- fail.
+ pattern in the [Administrator Guide](https://kestra.io/docs/administrator-guide/monitoring).
+ Let's create a flow in the namespace with prefix `company` that will always fail.
```yaml
-
id: failure_flow
-
namespace: company.team
-
tasks:
- id: always_fails
type: io.kestra.plugin.core.execution.Fail
```
-
Whenever you run the `failure_flow`, it will trigger an execution of the
`telegram_notify_on_failure` flow. As a result, a Telegram notification will
be sent so that prompt action can be taken.
diff --git a/trigger-subflow.yaml b/trigger-subflow.yaml
index f030346..b5925f4 100644
--- a/trigger-subflow.yaml
+++ b/trigger-subflow.yaml
@@ -1,9 +1,11 @@
id: trigger-subflow
namespace: company.team
+
tasks:
- id: task_a
type: io.kestra.plugin.core.debug.Return
format: "{{ task.id }} - flow_a"
+
- id: flow_b
type: io.kestra.plugin.core.flow.Subflow
description: This task triggers the flow `subflow` with corresponding inputs.
@@ -11,6 +13,7 @@ tasks:
flowId: subflow
inputs:
data: "{{ outputs.task_a.value }}"
+
extend:
title: Trigger a sub Flow
description: >-
@@ -18,16 +21,12 @@ extend:
great pattern to reuse common flows. Thanks to inputs you can parameter your
flows and reuse them easily.
-
You can create this subflow first:
- ```
-
+ ```yaml
id: subflow
-
namespace: company.team
-
inputs:
- id: data
type: STRING
@@ -38,7 +37,6 @@ extend:
format: "{{ task.id }} - subflow - {{ inputs.data }}"
```
-
The flow will trigger this subflow in the task `flow_b`.
tags:
- Trigger
diff --git a/trino-query.yaml b/trino-query.yaml
index 7380a1f..915500e 100644
--- a/trino-query.yaml
+++ b/trino-query.yaml
@@ -1,5 +1,6 @@
id: trino-query
namespace: company.team
+
tasks:
- id: analyze_orders
type: io.kestra.plugin.jdbc.trino.Query
@@ -10,24 +11,22 @@ tasks:
from tpch.tiny.orders
group by orderpriority
order by orderpriority
- store: true
+ fetchType: STORE
+
- id: csv_report
type: io.kestra.plugin.serdes.csv.IonToCsv
from: "{{ outputs.analyze_orders.uri }}"
+
extend:
title: Generate a CSV file report from a SQL query using Trino
description: >-
This flow queries data using Trino SQL and generates a downloadable CSV
report.
-
To test this integration, you can start Trino in a Docker container:
-
```bash
-
docker run -d -p 8090:8080 --name trino trinodb/trino
-
```
tags:
- SQL
diff --git a/twilio-notify-on-failure.yaml b/twilio-notify-on-failure.yaml
index 110d3c5..54aff15 100644
--- a/twilio-notify-on-failure.yaml
+++ b/twilio-notify-on-failure.yaml
@@ -1,5 +1,6 @@
id: twilio-notify-on-failure
namespace: company.team
+
tasks:
- id: send_twilio_notification
type: io.kestra.plugin.notifications.twilio.TwilioExecution
@@ -10,6 +11,7 @@ tasks:
authToken: "{{ secret('TWILIO_AUTH_TOKEN') }}"
body: "Kestra Workflow Failure: {{ trigger.executionId }} has failed on {{
taskrun.startDate }}"
+
triggers:
- id: on_failure
type: io.kestra.plugin.core.trigger.Flow
@@ -21,40 +23,31 @@ triggers:
- type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
namespace: company
comparison: PREFIX
+
extend:
title: Send Twilio notification when a workflow fails
description: >-
This system flow will send a notification anytime a workflow in a `company`
namespace (or any nested child namespace) fails.
-
Using this pattern, you can get notification for Kestra workflow execution
failures alongside other notifications.
-
You can customize that system flow by modifying the task, adding more tasks
to the flow or adjusting the trigger conditions. Read more about that
- pattern in the [Administrator
- Guide](https://kestra.io/docs/administrator-guide/monitoring).
-
-
- Let's create a flow in the namespace with prefix `company` that will always
- fail.
+ pattern in the [Administrator Guide](https://kestra.io/docs/administrator-guide/monitoring).
+ Let's create a flow in the namespace with prefix `company` that will always fail.
```yaml
-
id: failure_flow
-
namespace: company.team
-
tasks:
- id: always_fails
type: io.kestra.plugin.core.execution.Fail
```
-
Whenever you run the `failure_flow`, it will trigger an execution of the
`twilio_notify_on_failure` flow. As a result, a notification will be sent
using Twilio so that prompt action can be taken.
diff --git a/unreliable-flow.yaml b/unreliable-flow.yaml
index c99c65a..654f3b5 100644
--- a/unreliable-flow.yaml
+++ b/unreliable-flow.yaml
@@ -1,10 +1,12 @@
id: unreliable-flow
namespace: company.team
+
tasks:
- id: fail
type: io.kestra.plugin.scripts.shell.Commands
commands:
- exit 1
+
errors:
- id: alert_on_failure
type: io.kestra.plugin.notifications.zenduty.ZendutyAlert
@@ -28,6 +30,7 @@ errors:
}
]
}
+
extend:
title: Send an alert to Zenduty when a flow fails
description: >-
diff --git a/upload-file-to-s3.yaml b/upload-file-to-s3.yaml
index b53f5e4..ec04ec5 100644
--- a/upload-file-to-s3.yaml
+++ b/upload-file-to-s3.yaml
@@ -1,16 +1,20 @@
id: upload-file-to-s3
namespace: company.team
+
inputs:
- id: bucket
type: STRING
defaults: declarative-data-orchestration
+
- id: file_url
type: STRING
defaults: https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip
+
tasks:
- id: download_file
type: io.kestra.plugin.core.http.Download
uri: "{{ inputs.file_url }}"
+
- id: upload_to_s3
type: io.kestra.plugin.aws.s3.Upload
from: "{{ outputs.download_file.uri }}"
@@ -19,12 +23,12 @@ tasks:
region: "{{ secret('AWS_DEFAULT_REGION') }}"
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
+
extend:
title: Download a file and upload it to S3
description: >
This flow downloads a single file and uploads it to an S3 bucket.
-
This flow assumes AWS credentials stored as secrets `AWS_ACCESS_KEY_ID`,
`AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`.
tags:
diff --git a/upload-google-drive.yaml b/upload-google-drive.yaml
index 0eb107a..dded72a 100644
--- a/upload-google-drive.yaml
+++ b/upload-google-drive.yaml
@@ -1,9 +1,11 @@
id: upload-google-drive
namespace: company.team
+
tasks:
- id: download
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/orders.csv
+
- id: upload
type: io.kestra.plugin.googleworkspace.drive.Upload
from: "{{ outputs.download.uri }}"
@@ -12,18 +14,17 @@ tasks:
name: Orders
contentType: text/csv
mimeType: application/vnd.google-apps.spreadsheet
+
extend:
title: Upload file to Google Drive
description: >-
In this blueprint, we upload a file to Google Drive.
-
> Note: The `parents` property here refers to an existing Google Drive
directory. To be able to use Google Drive you will have to enable the API in
your Google Cloud Platform project and share the folder with your service
account email address.
-
Make sure to add the `GOOGLE_APPLICATION_CREDENTIALS` environment variable
with a value of the path to the JSON keyfile (note this must be a path to a
file, not the contents of the Service Account).
diff --git a/upload-parquet-to-databricks.yaml b/upload-parquet-to-databricks.yaml
index 6f2d960..46749f7 100644
--- a/upload-parquet-to-databricks.yaml
+++ b/upload-parquet-to-databricks.yaml
@@ -1,8 +1,10 @@
id: upload-parquet-to-databricks
namespace: company.team
+
inputs:
- id: my_file
type: FILE
+
tasks:
- id: upload_file
type: io.kestra.plugin.databricks.dbfs.Upload
@@ -11,6 +13,7 @@ tasks:
host: "{{ secret('DATABRICKS_HOST') }}"
from: "{{ inputs.my_file }}"
to: /Shared/myFile.parquet
+
extend:
title: Upload a Parquet file to Databricks
description: This flow will upload a local Parquet file to Databricks File System (DBFS).
diff --git a/upload-to-s3.yaml b/upload-to-s3.yaml
index 8c5935c..720c941 100644
--- a/upload-to-s3.yaml
+++ b/upload-to-s3.yaml
@@ -1,27 +1,32 @@
id: upload-to-s3
namespace: company.team
+
inputs:
- id: bucket
type: STRING
defaults: declarative-data-orchestration
+
tasks:
- id: get_zip_file
type: io.kestra.plugin.core.http.Download
uri: https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip
+
- id: unzip
type: io.kestra.plugin.compress.ArchiveDecompress
algorithm: ZIP
- from: "{{outputs.get_zip_file.uri}}"
+ from: "{{ outputs.get_zip_file.uri }}"
+
- id: csv_upload
type: io.kestra.plugin.aws.s3.Upload
from: "{{ outputs.unzip.files['global_power_plant_database.csv'] }}"
bucket: "{{ inputs.bucket }}"
- key: powerplant/{{ trigger.date ?? execution.startDate |
- date('yyyy_MM_dd__HH_mm_ss') }}.csv
+ key: powerplant/{{ trigger.date ?? execution.startDate | date('yyyy_MM_dd__HH_mm_ss') }}.csv
+
triggers:
- id: hourly
type: io.kestra.plugin.core.trigger.Schedule
cron: "@hourly"
+
extend:
title: Extract a CSV file via HTTP API and upload it to S3 by using scheduled
date as a filename
diff --git a/wdir-pandas-python-outputs.yaml b/wdir-pandas-python-outputs.yaml
index ee95fba..c0b02fb 100644
--- a/wdir-pandas-python-outputs.yaml
+++ b/wdir-pandas-python-outputs.yaml
@@ -1,5 +1,6 @@
id: wdir-pandas-python-outputs
namespace: company.team
+
tasks:
- id: etl
type: io.kestra.plugin.core.flow.WorkingDirectory
@@ -20,6 +21,7 @@ tasks:
df = pd.DataFrame(data)
print(df.head())
df.to_csv("raw_data.csv", index=False)
+
- id: transform_and_load_csv
type: io.kestra.plugin.scripts.python.Script
warningOnStdErr: false
@@ -34,6 +36,7 @@ tasks:
df['Column4'] = df['Column2'] + df['Column3']
print(df.head())
df.to_csv("final.csv", index=False)
+
extend:
title: Pandas ETL - passing data between Python script tasks running in separate
containers
@@ -41,12 +44,10 @@ extend:
This flow demonstrates how to use the `WorkingDirectory` task to persist
data between multiple Python script tasks running in separate containers.
-
The first task stores the data as a CSV file called "raw_data.csv". The
second task loads the CSV file, transforms it and outputs the final CSV file
to Kestra's internal storage. You can download that file from the Outputs
- tab on the Execution's page.
-
+ tab on the Execution's page.
Kestra's internal storage allows you to use the output in other tasks in the
flow, even if those tasks are processed in different containers. The final
diff --git a/weaviate-csv.yaml b/weaviate-csv.yaml
index ad72fd2..a557d78 100644
--- a/weaviate-csv.yaml
+++ b/weaviate-csv.yaml
@@ -1,21 +1,26 @@
id: weaviate-csv
namespace: company.team
+
variables:
host: https://demo-ito81rf6.weaviate.network
secret: YOUR_WEAVIATE_API_KEY
+
tasks:
- id: csv
type: io.kestra.plugin.core.http.Download
uri: https://huggingface.co/datasets/kestra/datasets/raw/main/csv/trivia_questions.csv
+
- id: csv_to_ion
type: io.kestra.plugin.serdes.csv.CsvToIon
from: "{{ outputs.csv.uri }}"
+
- id: batch_load
type: io.kestra.plugin.weaviate.BatchCreate
url: "{{ vars.host }}"
apiKey: "{{ vars.secret }}"
className: QuestionsCsv
objects: "{{ outputs.csv_to_ion.uri }}"
+
- id: query
type: io.kestra.plugin.weaviate.Query
url: "{{ vars.host }}"
@@ -30,6 +35,7 @@ tasks:
}
}
}
+
extend:
title: Extract data from a CSV file, load it in batch to Weaviate and query it
with GraphQL
@@ -37,14 +43,12 @@ extend:
This flow shows how to extract data from a CSV file using the HTTP API, load
it to a Weaviate cluster and query it with GraphQL.
-
This flow assumes that you have a [Weaviate
cluster](https://console.weaviate.cloud/) running, and that you created an
API key. Make sure to replace the `url` and `apiKey` values in the tasks
with your Weaviate credentials. It's recommended to use Secrets to store
your API key.
-
Once you've configured the Weaviate secret, you can reproduce this flow
without any changes. It will load the data from the Kaggle Jeopardy dataset
to Weaviate, and then query it with GraphQL.
diff --git a/weaviate-load-and-query.yaml b/weaviate-load-and-query.yaml
index 9c61fb2..b56289c 100644
--- a/weaviate-load-and-query.yaml
+++ b/weaviate-load-and-query.yaml
@@ -1,19 +1,23 @@
id: weaviate-load-and-query
namespace: company.team
+
tasks:
- id: json
type: io.kestra.plugin.core.http.Download
uri: https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json
+
- id: json_to_ion
type: io.kestra.plugin.serdes.json.JsonToIon
from: "{{ outputs.json.uri }}"
newLine: false
+
- id: batch_load
type: io.kestra.plugin.weaviate.BatchCreate
url: https://demo-oczq9ryw.weaviate.network
apiKey: "{{ secret('WEAVIATE_API_KEY') }}"
className: Questions
objects: "{{ outputs.json_to_ion.uri }}"
+
- id: batch_load_map
type: io.kestra.plugin.weaviate.BatchCreate
url: demo-oczq9ryw.weaviate.network
@@ -32,6 +36,7 @@ tasks:
- company: initech
user: Bob Slydell
city: Austin
+
- id: query_users
type: io.kestra.plugin.weaviate.Query
url: demo-oczq9ryw.weaviate.network
@@ -46,6 +51,7 @@ tasks:
}
}
}
+
- id: generative_search
type: io.kestra.plugin.weaviate.Query
disabled: true
@@ -63,26 +69,24 @@ tasks:
}
}
}
+
extend:
title: Extract data from a REST API, load it to Weaviate and query it with GraphQL
description: >
This flow shows how to extract data from an HTTP API, load it to a Weaviate
cluster and query it with GraphQL.
-
This flow assumes that you have a [Weaviate
cluster](https://console.weaviate.cloud/) running, and that you created an
API key. Make sure to replace the `url` and `apiKey` values in the tasks
with your Weaviate credentials. It's recommended to use Secrets to store
your API key.
-
Once you've configured the Weaviate secret, you can reproduce this flow
without any changes. It will load the data from the [Jeopardy
dataset](https://www.kaggle.com/tunguz/200000-jeopardy-questions) to
Weaviate, and then query it with GraphQL.
-
You can ingest data to Weaviate from a Kestra flow using one of the
following options:
@@ -93,7 +97,6 @@ extend:
is recommended when you want to load data from a previous task in the same
flow, e.g. after extracting it from a database or a file.
-
The last task performing a [Generative
Search](https://weaviate.io/developers/weaviate/starter-guides/generative#what-is-generative-search)
is currently disabled, as it requires an OpenAI API key and following the
diff --git a/whatsapp-notify-on-failure.yaml b/whatsapp-notify-on-failure.yaml
index a1f9690..af6bd43 100644
--- a/whatsapp-notify-on-failure.yaml
+++ b/whatsapp-notify-on-failure.yaml
@@ -1,5 +1,6 @@
id: whatsapp-notify-on-failure
namespace: company.team
+
tasks:
- id: send_notification
type: io.kestra.plugin.notifications.whatsapp.WhatsAppExecution
@@ -12,6 +13,7 @@ tasks:
payload: "Kestra Workflow Failure: {{ trigger.executionId }} has failed on {{
taskrun.startDate }}"
executionId: "{{ trigger.executionId }}"
+
triggers:
- id: on_failure
type: io.kestra.plugin.core.trigger.Flow
@@ -23,40 +25,31 @@ triggers:
- type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
namespace: company
comparison: PREFIX
+
extend:
title: Send a WhatsApp notification when a workflow fails
description: >-
This system flow will send a WhatsApp notification anytime a workflow in a
`company` namespace (or any nested child namespace) fails.
-
Using this pattern, you can send WhatsApp notifications for Kestra workflow
execution failures alongside other notifications.
-
You can customize that system flow by modifying the task, adding more tasks
to the flow or adjusting the trigger conditions. Read more about that
- pattern in the [Administrator
- Guide](https://kestra.io/docs/administrator-guide/monitoring).
-
-
- Let's create a flow in the namespace with prefix `company` that will always
- fail.
+ pattern in the [Administrator Guide](https://kestra.io/docs/administrator-guide/monitoring).
+ Let's create a flow in the namespace with prefix `company` that will always fail.
```yaml
-
id: failure_flow
-
namespace: company.team
-
tasks:
- id: always_fails
type: io.kestra.plugin.core.execution.Fail
```
-
Whenever you run the `failure_flow`, it will trigger an execution of the
`whatsapp_notify_on_failure` flow. As a result, a WhatsApp notification will
be sent so that prompt action can be taken.
diff --git a/wikipedia-top10-python-pandas.yaml b/wikipedia-top10-python-pandas.yaml
index 8abb23b..a2de1aa 100644
--- a/wikipedia-top10-python-pandas.yaml
+++ b/wikipedia-top10-python-pandas.yaml
@@ -1,24 +1,24 @@
id: wikipedia-top10-python-pandas
namespace: company.team
description: Analyze top 10 Wikipedia pages
+
tasks:
- id: query
type: io.kestra.plugin.gcp.bigquery.Query
sql: >
SELECT DATETIME(datehour) as date, title, views FROM
`bigquery-public-data.wikipedia.pageviews_2024`
-
WHERE DATE(datehour) = current_date() and wiki = 'en'
-
ORDER BY datehour desc, views desc
-
LIMIT 10
store: true
projectId: test-project
serviceAccount: "{{ secret('GCP_SERVICE_ACCOUNT_JSON') }}"
+
- id: write_csv
type: io.kestra.plugin.serdes.csv.IonToCsv
from: "{{ outputs.query.uri }}"
+
- id: pandas
type: io.kestra.plugin.scripts.python.Script
warningOnStdErr: false
@@ -35,39 +35,30 @@ tasks:
df.head(10)
views = df['views'].max()
Kestra.outputs({'views': int(views)})
+
extend:
title: Use BigQuery and Python script running in Docker to analyze Wikipedia
page views
description: >-
This flow will do the following:
-
1. Use `bigquery.Query` task to query the top 10 wikipedia pages for the
current day
-
2. Use `IonToCsv` to store the results in a CSV file.
-
3. Use `python.Script` task to read the CSV file and use pandas to find the
maximum number of views.
-
4. Use Kestra `outputs` to track the maximum number of views over time.
-
The Python script will run in a Docker container based on the public image
`ghcr.io/kestra-io/pydata:latest`.
-
The BigQuery task exposes (by default) a variety of **metrics** such as:
- total.bytes.billed
-
- total.partitions.processed
-
- number of rows processed
-
- query duration
-
You can view those metrics on the Execution page in the Metrics tab.
tags:
- Python
diff --git a/write-mongo.yaml b/write-mongo.yaml
index 6c10802..d070262 100644
--- a/write-mongo.yaml
+++ b/write-mongo.yaml
@@ -1,5 +1,6 @@
id: write-mongo
namespace: company.team
+
tasks:
- id: write
type: io.kestra.plugin.mongodb.InsertOne
@@ -12,6 +13,7 @@ tasks:
$oid: 60930c39a982931c20ef6cd6
name: John Doe
city: Paris
+
extend:
title: Write data in MongoDB
description: This blueprint shows how to insert a document in MongoDB database.
diff --git a/zenduty-failure-alert.yaml b/zenduty-failure-alert.yaml
index 3c56735..b435182 100644
--- a/zenduty-failure-alert.yaml
+++ b/zenduty-failure-alert.yaml
@@ -1,13 +1,14 @@
id: zenduty-failure-alert
namespace: system
+
tasks:
- id: send_alert
type: io.kestra.plugin.notifications.zenduty.ZendutyExecution
url: https://www.zenduty.com/api/events/{{ secret('ZENDUTY_INTEGRATION_KEY') }}/
executionId: "{{ trigger.executionId }}"
- message: Kestra workflow execution {{ trigger.executionId }} of a flow {{
- trigger.flowId }} in the namespace {{ trigger.namespace }} changed status
- to {{ trigger.state }}
+ message: Kestra workflow execution {{ trigger.executionId }} of a flow {{ trigger.flowId }}
+ in the namespace {{ trigger.namespace }} changed status to {{ trigger.state }}
+
triggers:
- id: failed_prod_workflows
type: io.kestra.plugin.core.trigger.Flow
@@ -19,6 +20,7 @@ triggers:
- type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
namespace: company
prefix: true
+
extend:
title: Send an alert to Zenduty when any flow fails in the company namespace
description: >-
@@ -28,24 +30,17 @@ extend:
API integration and generate the key. The API integration will send an API
call that follows the format:
-
-
```bash
-
curl -X POST https://www.zenduty.com/api/events/[integration-key]/ -H
'Content-Type: application/json' -d '{"alert_type":"critical",
"message":"Some message", "summary":"some summary",
"entity_id":"some_entity_id"}'
-
```
-
-
The `message` and `summary` parameters are required. The `alert_type`
parameter is the severity of the issue, including `info`, `warning`,
`error`, or `critical`.
-
This kestra task abstracts away raw API calls and only requires the
integration key, which you can store as a Secret. The default value is
`error`. Visit the Zenduty [Events API
diff --git a/zip-to-parquet.yaml b/zip-to-parquet.yaml
index a9736d0..66a061a 100644
--- a/zip-to-parquet.yaml
+++ b/zip-to-parquet.yaml
@@ -1,16 +1,19 @@
id: zip-to-parquet
namespace: company.team
+
variables:
file_id: "{{ execution.startDate | dateAdd(-3, 'MONTHS') | date('yyyyMM') }}"
+
tasks:
- id: get_zipfile
type: io.kestra.plugin.core.http.Download
- uri: https://divvy-tripdata.s3.amazonaws.com/{{ render(vars.file_id)
- }}-divvy-tripdata.zip
+ uri: https://divvy-tripdata.s3.amazonaws.com/{{ render(vars.file_id) }}-divvy-tripdata.zip
+
- id: unzip
type: io.kestra.plugin.compress.ArchiveDecompress
algorithm: ZIP
from: "{{ outputs.get_zipfile.uri }}"
+
- id: parquet_output
type: io.kestra.plugin.scripts.python.Script
warningOnStdErr: false
@@ -31,6 +34,7 @@ tasks:
df.to_parquet(f"{file_id}.parquet")
outputFiles:
- "*.parquet"
+
extend:
title: Extract a zip file, decompress it, and convert CSV to parquet format in
Python