-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathinfra_sync.py
73 lines (63 loc) · 2.16 KB
/
infra_sync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import sys
import json
from os import path
from datetime import datetime, timedelta
import logging
from airflow import DAG
from airflow.decorators import dag, task
from airflow.models import Variable
from airflow.providers.http.operators.http import SimpleHttpOperator
from airflow.operators.python import get_current_context
from airflow.operators.bash_operator import BashOperator
from airflow.utils.dates import days_ago
# HACK: Fix for loading relative modules.
sys.path.append(path.dirname(path.realpath(__file__)))
from tasks.airbyte import fetch_airbyte_connections_tg
from tasks.alerting import send_alert_discord
from providers.airbyte.operator import AirbyteTriggerSyncOperator
from tasks.config import INTERNAL_ENV
"""
DAG to sync data from Victor Ops
"""
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
ARGS = {
'owner': 'apentori',
'depends_on_past': False,
'start_date': datetime(2024,4,30),
'email': ['alexis@status.im'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 0,
'retry_delay': timedelta(minutes=10),
'on_failure_callback': send_alert_discord,
}
airbyte_connections=['victor_ops']
@dag('infra_sync',
default_args=ARGS,
schedule_interval='0 3 * * *')
def infra_sync():
connections_id=fetch_airbyte_connections_tg(airbyte_connections)
# Trigger Airbyte fetch Data from Github
gh_sync_victor_ops = AirbyteTriggerSyncOperator(
task_id='airbyte_fetch_victor_ops',
airbyte_conn_id='airbyte_conn',
connection_id=connections_id['victor_ops'],
asynchronous=False,
wait_seconds=3
)
# Launch DBT transformation on the data previously fetched
dbt_seed = BashOperator(
task_id='dbt_seed_models_infra',
bash_command='dbt seed --select infra',
env=INTERNAL_ENV,
append_env=True
)
# Launch DBT transformation on the data previously fetched
dbt_transform = BashOperator(
task_id='dbt_run_models_infra',
bash_command='dbt run --select infra',
env=INTERNAL_ENV,
append_env=True
)
connections_id >> gh_sync_victor_ops >> dbt_seed >> dbt_transform
infra_sync()