Skip to content

Commit

Permalink
airflow dag 파일 추가 (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
heehehe authored Nov 10, 2023
1 parent b74240c commit 13186d8
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions airflow/deploy_daily.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/python

import os
from datetime import timedelta
from airflow import DAG
from airflow.operators.bash_operator import BashOperator

SITE_LIST = {"jumpit"}
DEFAULT_ARGS = {
'owner': 'DE4E',
'retries': 2,
'retry_delay': timedelta(minutes=5),
}

DIR_PATH=os.path.abspath(__file__)
SCRIPT_PATH=f"{DIR_PATH}/../script"
DATA_PATH=f"{DIR_PATH}/../data"


with DAG(
dag_id='job_trend_daily',
default_args=DEFAULT_ARGS,
schedule_interval='@daily'
) as dag:
crawling_tasks = [
BashOperator(
task_id=f'crawling_{site}',
bash_command=f'python3 {SCRIPT_PATH}/crawling.py -s "{site}" -d {DATA_PATH}'
) for site in SITE_LIST
]

upload_task = BashOperator(
task_id = 'upload_to_bigquery',
bash_command = f'python3 {SCRIPT_PATH}/upload_to_bigquery.py'
)

crawling_tasks >> upload_task

0 comments on commit 13186d8

Please sign in to comment.