Merge pull request #14 from shrutimantri/add-new-lines3

kestra-io · Nov 11, 2024 · 9d1a0d1 · 9d1a0d1
2 parents d3f7e1f + b234ad9
commit 9d1a0d1
Show file tree

Hide file tree

Showing 127 changed files with 543 additions and 627 deletions.
diff --git a/api-python-sql.yaml b/api-python-sql.yaml
@@ -32,7 +32,7 @@ tasks:
       FROM read_csv_auto('{{ workingDir }}/in.csv', header=True)
       GROUP BY brand
       ORDER BY avg_price DESC;
-    store: true
+    fetchType: STORE
 
 extend:
   title: Extract data from a REST API, process it in Python with Polars in a

diff --git a/business-automation.yaml b/business-automation.yaml
@@ -39,7 +39,7 @@ tasks:
       - id: query
         type: io.kestra.plugin.jdbc.sqlite.Query
         url: jdbc:sqlite:kestra.db
-        store: true
+        fetchType: STORE
         sql: |
           SELECT * FROM features
           ORDER BY release_version;

diff --git a/data-engineering-pipeline.yaml b/data-engineering-pipeline.yaml
@@ -53,7 +53,7 @@ tasks:
       FROM read_json_auto('{{ workingDir }}/products.json')
       GROUP BY brand
       ORDER BY avg_price DESC;
-    store: true
+    fetchType: STORE
 
 extend:
   title: Getting started with Kestra — a Data Engineering Pipeline example

diff --git a/dremio-sql-python.yaml b/dremio-sql-python.yaml
@@ -11,9 +11,8 @@ tasks:
     url: jdbc:dremio:direct=sql.dremio.cloud:443;ssl=true;PROJECT_ID={{vars.project_id}};schema=postgres.public
     username: $token
     password: "{{ secret('DREMIO_TOKEN') }}"
-    sql: SELECT first_name, last_name, hire_date, salary FROM
-      postgres.public.employees LIMIT 100;
-    store: true
+    sql: SELECT first_name, last_name, hire_date, salary FROM postgres.public.employees LIMIT 100;
+    fetchType: STORE
 
   - id: python
     type: io.kestra.plugin.scripts.python.Script

diff --git a/http-check.yaml b/http-check.yaml
@@ -1,20 +1,24 @@
 id: http-check
 namespace: company.team
+
 inputs:
   - id: uri
     type: URI
     defaults: https://kestra.io
+
 tasks:
   - id: api
     type: io.kestra.plugin.core.http.Request
     uri: "{{ inputs.uri }}"
+
   - id: check_status
     type: io.kestra.plugin.core.flow.If
     condition: "{{ outputs.api.code != 200 }}"
     then:
       - id: unhealthy
         type: io.kestra.plugin.core.log.Log
         message: Server unhealthy!!! Response {{ outputs.api.body }}
+
       - id: send_slack_alert
         type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
         url: "{{ secret('SLACK_WEBHOOK') }}"
@@ -27,10 +31,12 @@ tasks:
       - id: healthy
         type: io.kestra.plugin.core.log.Log
         message: Everything is fine!
+
 triggers:
   - id: daily
     type: io.kestra.plugin.core.trigger.Schedule
     cron: 0 9 * * *
+
 extend:
   title: Monitor availability of an HTTP endpoint and send a Slack alert if a
     service is unhealthy

diff --git a/hubspot-to-bigquery.yaml b/hubspot-to-bigquery.yaml
@@ -1,5 +1,6 @@
 id: hubspot-to-bigquery
 namespace: company.team
+
 tasks:
   - id: sync
     type: io.kestra.plugin.cloudquery.Sync
@@ -32,32 +33,31 @@ tasks:
             - "*"
           spec:
             max_requests_per_second: 5
+
 triggers:
   - id: schedule
     type: io.kestra.plugin.core.trigger.Schedule
     cron: 0 6 * * *
+
 extend:
   title: Sync Hubspot CRM data to BigQuery on a schedule
   description: >-
     This flow will sync data from Hubspot CRM to BigQuery on a schedule. The
     `sync` task from the CloudQuery plugin uses the `hubspot` source and the
     `bigquery` destination.
 
-
     Note how we use the `sa.json` credentials file to authenticate with GCP and
     the `HUBSPOT_APP_TOKEN` environment variable to authenticate with Hubspot
     CRM.
       
-     To avoid rate limiting issues, you can set the `max_requests_per_second` parameter in the `hubspot` source configuration. In this example, we set it to 5 requests per second.
+    To avoid rate limiting issues, you can set the `max_requests_per_second` parameter in the `hubspot` source configuration. In this example, we set it to 5 requests per second.
 
     The `schedule` trigger runs the flow every day at 6:00 AM.
 
-
     Additionally, you can [generate an API
     key](https://docs.cloudquery.io/docs/deployment/generate-api-key) to use
     premium plugins. You can add the API key as an environment variable:
 
-
     ```yaml
       - id: hn_to_duckdb
         type: io.kestra.plugin.cloudquery.Sync

diff --git a/infrastructure-automation.yaml b/infrastructure-automation.yaml
@@ -1,10 +1,12 @@
 id: infrastructure-automation
 namespace: tutorial
 description: Infrastructure Automation
+
 inputs:
   - id: docker_image
     type: STRING
     defaults: kestra/myimage:latest
+
 tasks:
   - id: build_image
     type: io.kestra.plugin.docker.Build
@@ -18,6 +20,7 @@ tasks:
       registry: https://index.docker.io/v1/
       username: "{{ secret('DOCKERHUB_USERNAME') }}"
       password: "{{ secret('DOCKERHUB_PASSWORD') }}"
+
   - id: run_container
     type: io.kestra.plugin.docker.Run
     pullPolicy: NEVER
@@ -26,6 +29,7 @@ tasks:
       - pip
       - show
       - kestra
+
   - id: run_terraform
     type: io.kestra.plugin.terraform.cli.TerraformCLI
     beforeCommands:
@@ -48,24 +52,19 @@ tasks:
           }
         }
 
-
         provider "http" {}
-
         provider "local" {}
 
-
         variable "pokemon_names" {
           type    = list(string)
           default = ["pikachu", "psyduck", "charmander", "bulbasaur"]
         }
 
-
         data "http" "pokemon" {
           count = length(var.pokemon_names)
           url   = "https://pokeapi.co/api/v2/pokemon/${var.pokemon_names[count.index]}"
         }
 
-
         locals {
           pokemon_details = [for i in range(length(var.pokemon_names)) : {
             name  = jsondecode(data.http.pokemon[i].response_body)["name"]
@@ -75,19 +74,19 @@ tasks:
           file_content = join("\n\n", [for detail in local.pokemon_details : "Name: ${detail.name}\nTypes: ${detail.types}"])
         }
 
-
         resource "local_file" "pokemon_details_file" {
           filename = "${path.module}/pokemon.txt"
           content  = local.file_content
         }
 
-
         output "file_path" {
           value = local_file.pokemon_details_file.filename
         }
+
   - id: log_pokemon
     type: io.kestra.plugin.core.log.Log
     message: "{{ read(outputs.run_terraform.outputFiles['pokemon.txt']) }}"
+
 extend:
   title: Getting started with Kestra — an Infrastructure Automation workflow example
   description: >-
@@ -98,12 +97,8 @@ extend:
     The flow has four tasks:
 
     1. The first task builds a Docker image.
-
     2. The second task runs a container using the image.
-
-    3. The third task uses Terraform to create a file with details about
-    Pokémon.
-
+    3. The third task uses Terraform to create a file with details about Pokémon.
     4. The fourth task logs the details about Pokémon.
   tags:
     - Getting Started

diff --git a/ingest-to-datalake-event-driven.yaml b/ingest-to-datalake-event-driven.yaml
@@ -14,6 +14,7 @@ tasks:
       - id: clone_repository
         type: io.kestra.plugin.git.Clone
         url: https://github.com/kestra-io/scripts
+
       - id: etl
         type: io.kestra.plugin.scripts.python.Commands
         warningOnStdErr: false
@@ -27,6 +28,7 @@ tasks:
         commands:
           - python etl/aws_iceberg_fruit.py {{ vars.destination_prefix }}/{{
             trigger.objects | jq('.[].key') | first }}
+
   - id: merge_query
     type: io.kestra.plugin.aws.athena.Query
     accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -43,6 +45,7 @@ tasks:
           WHEN NOT MATCHED
               THEN INSERT (id, fruit, berry, update_timestamp)
                     VALUES(r.id, r.fruit, r.berry, current_timestamp);
+
   - id: optimize
     type: io.kestra.plugin.aws.athena.Query
     accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -68,13 +71,13 @@ triggers:
     region: "{{ secret('AWS_DEFAULT_REGION') }}"
     accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
     secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
+
 extend:
   title: Event-driven data ingestion to AWS S3 data lake managed by Apache
     Iceberg, AWS Glue and Amazon Athena
   description: >-
     This workflow ingests data to an S3 data lake using a Python script. 
 
-
     This script is stored in a public GitHub repository so you can directly use
     this workflow as long as you adjust your AWS credentials, S3 bucket name and
     the Amazon Athena table name. The script takes the detected S3 object key

diff --git a/ingest-to-datalake-git.yaml b/ingest-to-datalake-git.yaml
@@ -1,9 +1,11 @@
 id: ingest-to-datalake-git
 namespace: company.team
+
 variables:
   bucket: kestraio
   prefix: inbox
   database: default
+
 tasks:
   - id: list_objects
     type: io.kestra.plugin.aws.s3.List
@@ -12,6 +14,7 @@ tasks:
     secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
     region: "{{ secret('AWS_DEFAULT_REGION') }}"
     bucket: "{{ vars.bucket }}"
+
   - id: check
     type: io.kestra.plugin.core.flow.If
     condition: "{{ outputs.list_objects.objects }}"
@@ -23,6 +26,7 @@ tasks:
             type: io.kestra.plugin.git.Clone
             url: https://github.com/kestra-io/scripts
             branch: main
+
           - id: ingest_to_datalake
             type: io.kestra.plugin.scripts.python.Commands
             warningOnStdErr: false
@@ -35,6 +39,7 @@ tasks:
             containerImage: ghcr.io/kestra-io/aws:latest
             commands:
               - python etl/aws_iceberg_fruit.py
+
       - id: merge_query
         type: io.kestra.plugin.aws.athena.Query
         accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -51,6 +56,7 @@ tasks:
               WHEN NOT MATCHED
                   THEN INSERT (id, fruit, berry, update_timestamp)
                         VALUES(r.id, r.fruit, r.berry, current_timestamp);
+
       - id: optimize
         type: io.kestra.plugin.aws.athena.Query
         accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -60,6 +66,7 @@ tasks:
         outputLocation: s3://{{ vars.bucket }}/query_results/
         query: |
           OPTIMIZE fruits REWRITE DATA USING BIN_PACK;
+
       - id: move_to_archive
         type: io.kestra.plugin.aws.cli.AwsCLI
         accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
@@ -68,11 +75,13 @@ tasks:
         commands:
           - aws s3 mv s3://{{ vars.bucket }}/{{ vars.prefix }}/ s3://{{
             vars.bucket }}/archive/{{ vars.prefix }}/ --recursive
+
 triggers:
   - id: hourly_schedule
     type: io.kestra.plugin.core.trigger.Schedule
     disabled: true
     cron: "@hourly"
+
 extend:
   title: Ingest data to AWS S3 with Git, Python, Apache Iceberg, AWS Glue and
     Amazon Athena