cloudquery-sync-hn-to-parquet.yaml

id: cloudquery-sync-hn-to-parquet
namespace: company.team

tasks:
  - id: hn_to_parquet
    type: io.kestra.plugin.cloudquery.CloudQueryCLI
    inputFiles:
      config.yml: |
        kind: source
        spec:
          name: hackernews
          path: cloudquery/hackernews
          version: v3.1.5
          tables: ["*"]
          destinations:
            - file
          spec:
            item_concurrency: 100
            start_time: "{{ execution.startDate | dateAdd(-1, 'DAYS') }}"
        ---
        kind: destination
        spec:
          name: file
          path: cloudquery/file
          version: v4.1.1
          spec:
            path: "{% raw %}{{TABLE}}/{{UUID}}.{{FORMAT}}{% endraw %}"
            format: csv
    outputFiles:
      - "**/*.csv"
    env:
      CLOUDQUERY_API_KEY: 9ITIyNYb8s3Cr8nSiV4KcKVPGJNSd6u8
    commands:
      - cloudquery sync config.yml --log-console --log-level=warn

extend:
  title: Extract data from HackerNews and store it in a local CSV file using
    CloudQuery CLI
  description: >-
    This flow shows how to extract data from HackerNews using CloudQuery and
    store it in a local CSV file. 


    You can use the same format to ingest data from any other source and load it
    to any destination supported by CloudQuery.


    To avoid parsing CloudQuery TABLE and UUID variables as Kestra's native
    Pebble expressions, we use the `{% raw %}{{TABLE}}{% endraw %}` syntax.


    Make sure to replace the API key with your API key as shown in [this blog
    post](https://kestra.io/blogs/2024-03-12-introduction-to-cloudquery#how-cloudquery-works).
  tags:
    - Ingest
    - CLI
  ee: false
  demo: true
  meta_description: "This flow shows how to extract data from HackerNews using
    CloudQuery and store it in a local Parquet file. "