-
Notifications
You must be signed in to change notification settings - Fork 1
/
dlt-elt-chess-api-to-duckdb.yaml
49 lines (45 loc) · 1.42 KB
/
dlt-elt-chess-api-to-duckdb.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
id: dlt-elt-chess-api-to-duckdb
namespace: company.team
tasks:
- id: chess_api_to_duckdb
type: io.kestra.plugin.scripts.python.Script
taskRunner:
type: io.kestra.plugin.scripts.runner.docker.Docker
containerImage: python:slim
beforeCommands:
- pip install dlt[duckdb]
warningOnStdErr: false
script: |
import dlt
import requests
pipeline = dlt.pipeline(
pipeline_name='chess_pipeline',
destination='duckdb',
dataset_name='player_data'
)
data = []
for player in ['magnuscarlsen', 'rpragchess']:
response = requests.get(f'https://api.chess.com/pub/player/{player}')
response.raise_for_status()
data.append(response.json())
# Extract, normalize, and load the data
pipeline.run(data, table_name='player')
triggers:
- id: daily
type: io.kestra.plugin.core.trigger.Schedule
disabled: true
cron: 0 9 * * *
extend:
title: Schedule a Python data ingestion job to extract data from an API and load
it to DuckDB using dltHub (data load tool)
description: This flow loads data from the Chess.com API into DuckDB
destination. The flow is scheduled to run daily at 9 AM.
tags:
- Ingest
- Schedule
- Trigger
- Data
ee: false
demo: true
meta_description: This flow loads data from the Chess.com API into DuckDB
destination. The flow is scheduled to run daily at 9 AM.