Skip to content

Commit

Permalink
Merge pull request #14 from shrutimantri/add-new-lines3
Browse files Browse the repository at this point in the history
  • Loading branch information
wrussell1999 authored Nov 11, 2024
2 parents d3f7e1f + b234ad9 commit 9d1a0d1
Show file tree
Hide file tree
Showing 127 changed files with 543 additions and 627 deletions.
2 changes: 1 addition & 1 deletion api-python-sql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ tasks:
FROM read_csv_auto('{{ workingDir }}/in.csv', header=True)
GROUP BY brand
ORDER BY avg_price DESC;
store: true
fetchType: STORE

extend:
title: Extract data from a REST API, process it in Python with Polars in a
Expand Down
2 changes: 1 addition & 1 deletion business-automation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ tasks:
- id: query
type: io.kestra.plugin.jdbc.sqlite.Query
url: jdbc:sqlite:kestra.db
store: true
fetchType: STORE
sql: |
SELECT * FROM features
ORDER BY release_version;
Expand Down
2 changes: 1 addition & 1 deletion data-engineering-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ tasks:
FROM read_json_auto('{{ workingDir }}/products.json')
GROUP BY brand
ORDER BY avg_price DESC;
store: true
fetchType: STORE

extend:
title: Getting started with Kestra — a Data Engineering Pipeline example
Expand Down
5 changes: 2 additions & 3 deletions dremio-sql-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ tasks:
url: jdbc:dremio:direct=sql.dremio.cloud:443;ssl=true;PROJECT_ID={{vars.project_id}};schema=postgres.public
username: $token
password: "{{ secret('DREMIO_TOKEN') }}"
sql: SELECT first_name, last_name, hire_date, salary FROM
postgres.public.employees LIMIT 100;
store: true
sql: SELECT first_name, last_name, hire_date, salary FROM postgres.public.employees LIMIT 100;
fetchType: STORE

- id: python
type: io.kestra.plugin.scripts.python.Script
Expand Down
6 changes: 6 additions & 0 deletions http-check.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
id: http-check
namespace: company.team

inputs:
- id: uri
type: URI
defaults: https://kestra.io

tasks:
- id: api
type: io.kestra.plugin.core.http.Request
uri: "{{ inputs.uri }}"

- id: check_status
type: io.kestra.plugin.core.flow.If
condition: "{{ outputs.api.code != 200 }}"
then:
- id: unhealthy
type: io.kestra.plugin.core.log.Log
message: Server unhealthy!!! Response {{ outputs.api.body }}

- id: send_slack_alert
type: io.kestra.plugin.notifications.slack.SlackIncomingWebhook
url: "{{ secret('SLACK_WEBHOOK') }}"
Expand All @@ -27,10 +31,12 @@ tasks:
- id: healthy
type: io.kestra.plugin.core.log.Log
message: Everything is fine!

triggers:
- id: daily
type: io.kestra.plugin.core.trigger.Schedule
cron: 0 9 * * *

extend:
title: Monitor availability of an HTTP endpoint and send a Slack alert if a
service is unhealthy
Expand Down
8 changes: 4 additions & 4 deletions hubspot-to-bigquery.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
id: hubspot-to-bigquery
namespace: company.team

tasks:
- id: sync
type: io.kestra.plugin.cloudquery.Sync
Expand Down Expand Up @@ -32,32 +33,31 @@ tasks:
- "*"
spec:
max_requests_per_second: 5

triggers:
- id: schedule
type: io.kestra.plugin.core.trigger.Schedule
cron: 0 6 * * *

extend:
title: Sync Hubspot CRM data to BigQuery on a schedule
description: >-
This flow will sync data from Hubspot CRM to BigQuery on a schedule. The
`sync` task from the CloudQuery plugin uses the `hubspot` source and the
`bigquery` destination.
Note how we use the `sa.json` credentials file to authenticate with GCP and
the `HUBSPOT_APP_TOKEN` environment variable to authenticate with Hubspot
CRM.
To avoid rate limiting issues, you can set the `max_requests_per_second` parameter in the `hubspot` source configuration. In this example, we set it to 5 requests per second.
To avoid rate limiting issues, you can set the `max_requests_per_second` parameter in the `hubspot` source configuration. In this example, we set it to 5 requests per second.
The `schedule` trigger runs the flow every day at 6:00 AM.
Additionally, you can [generate an API
key](https://docs.cloudquery.io/docs/deployment/generate-api-key) to use
premium plugins. You can add the API key as an environment variable:
```yaml
- id: hn_to_duckdb
type: io.kestra.plugin.cloudquery.Sync
Expand Down
19 changes: 7 additions & 12 deletions infrastructure-automation.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
id: infrastructure-automation
namespace: tutorial
description: Infrastructure Automation

inputs:
- id: docker_image
type: STRING
defaults: kestra/myimage:latest

tasks:
- id: build_image
type: io.kestra.plugin.docker.Build
Expand All @@ -18,6 +20,7 @@ tasks:
registry: https://index.docker.io/v1/
username: "{{ secret('DOCKERHUB_USERNAME') }}"
password: "{{ secret('DOCKERHUB_PASSWORD') }}"

- id: run_container
type: io.kestra.plugin.docker.Run
pullPolicy: NEVER
Expand All @@ -26,6 +29,7 @@ tasks:
- pip
- show
- kestra

- id: run_terraform
type: io.kestra.plugin.terraform.cli.TerraformCLI
beforeCommands:
Expand All @@ -48,24 +52,19 @@ tasks:
}
}
provider "http" {}
provider "local" {}
variable "pokemon_names" {
type = list(string)
default = ["pikachu", "psyduck", "charmander", "bulbasaur"]
}
data "http" "pokemon" {
count = length(var.pokemon_names)
url = "https://pokeapi.co/api/v2/pokemon/${var.pokemon_names[count.index]}"
}
locals {
pokemon_details = [for i in range(length(var.pokemon_names)) : {
name = jsondecode(data.http.pokemon[i].response_body)["name"]
Expand All @@ -75,19 +74,19 @@ tasks:
file_content = join("\n\n", [for detail in local.pokemon_details : "Name: ${detail.name}\nTypes: ${detail.types}"])
}
resource "local_file" "pokemon_details_file" {
filename = "${path.module}/pokemon.txt"
content = local.file_content
}
output "file_path" {
value = local_file.pokemon_details_file.filename
}
- id: log_pokemon
type: io.kestra.plugin.core.log.Log
message: "{{ read(outputs.run_terraform.outputFiles['pokemon.txt']) }}"

extend:
title: Getting started with Kestra — an Infrastructure Automation workflow example
description: >-
Expand All @@ -98,12 +97,8 @@ extend:
The flow has four tasks:
1. The first task builds a Docker image.
2. The second task runs a container using the image.
3. The third task uses Terraform to create a file with details about
Pokémon.
3. The third task uses Terraform to create a file with details about Pokémon.
4. The fourth task logs the details about Pokémon.
tags:
- Getting Started
Expand Down
5 changes: 4 additions & 1 deletion ingest-to-datalake-event-driven.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ tasks:
- id: clone_repository
type: io.kestra.plugin.git.Clone
url: https://github.com/kestra-io/scripts

- id: etl
type: io.kestra.plugin.scripts.python.Commands
warningOnStdErr: false
Expand All @@ -27,6 +28,7 @@ tasks:
commands:
- python etl/aws_iceberg_fruit.py {{ vars.destination_prefix }}/{{
trigger.objects | jq('.[].key') | first }}

- id: merge_query
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
Expand All @@ -43,6 +45,7 @@ tasks:
WHEN NOT MATCHED
THEN INSERT (id, fruit, berry, update_timestamp)
VALUES(r.id, r.fruit, r.berry, current_timestamp);
- id: optimize
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
Expand All @@ -68,13 +71,13 @@ triggers:
region: "{{ secret('AWS_DEFAULT_REGION') }}"
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"

extend:
title: Event-driven data ingestion to AWS S3 data lake managed by Apache
Iceberg, AWS Glue and Amazon Athena
description: >-
This workflow ingests data to an S3 data lake using a Python script.
This script is stored in a public GitHub repository so you can directly use
this workflow as long as you adjust your AWS credentials, S3 bucket name and
the Amazon Athena table name. The script takes the detected S3 object key
Expand Down
9 changes: 9 additions & 0 deletions ingest-to-datalake-git.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
id: ingest-to-datalake-git
namespace: company.team

variables:
bucket: kestraio
prefix: inbox
database: default

tasks:
- id: list_objects
type: io.kestra.plugin.aws.s3.List
Expand All @@ -12,6 +14,7 @@ tasks:
secretKeyId: "{{ secret('AWS_SECRET_ACCESS_KEY') }}"
region: "{{ secret('AWS_DEFAULT_REGION') }}"
bucket: "{{ vars.bucket }}"

- id: check
type: io.kestra.plugin.core.flow.If
condition: "{{ outputs.list_objects.objects }}"
Expand All @@ -23,6 +26,7 @@ tasks:
type: io.kestra.plugin.git.Clone
url: https://github.com/kestra-io/scripts
branch: main

- id: ingest_to_datalake
type: io.kestra.plugin.scripts.python.Commands
warningOnStdErr: false
Expand All @@ -35,6 +39,7 @@ tasks:
containerImage: ghcr.io/kestra-io/aws:latest
commands:
- python etl/aws_iceberg_fruit.py

- id: merge_query
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
Expand All @@ -51,6 +56,7 @@ tasks:
WHEN NOT MATCHED
THEN INSERT (id, fruit, berry, update_timestamp)
VALUES(r.id, r.fruit, r.berry, current_timestamp);
- id: optimize
type: io.kestra.plugin.aws.athena.Query
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
Expand All @@ -60,6 +66,7 @@ tasks:
outputLocation: s3://{{ vars.bucket }}/query_results/
query: |
OPTIMIZE fruits REWRITE DATA USING BIN_PACK;
- id: move_to_archive
type: io.kestra.plugin.aws.cli.AwsCLI
accessKeyId: "{{ secret('AWS_ACCESS_KEY_ID') }}"
Expand All @@ -68,11 +75,13 @@ tasks:
commands:
- aws s3 mv s3://{{ vars.bucket }}/{{ vars.prefix }}/ s3://{{
vars.bucket }}/archive/{{ vars.prefix }}/ --recursive

triggers:
- id: hourly_schedule
type: io.kestra.plugin.core.trigger.Schedule
disabled: true
cron: "@hourly"

extend:
title: Ingest data to AWS S3 with Git, Python, Apache Iceberg, AWS Glue and
Amazon Athena
Expand Down
Loading

0 comments on commit 9d1a0d1

Please sign in to comment.