44from copy import deepcopy
55import smart_open
66from airflow .models .variable import Variable
7+ from airflow .models .xcom import LazyXComSelectSequence
78from airflow .decorators import task
89from airflow .datasets import Dataset , DatasetAlias
910from airflow .datasets .metadata import Metadata
@@ -88,7 +89,7 @@ def build_stac_task(payload, ti=None):
8889 DatasetAlias ("VEDA-Datasets" )
8990 ],
9091)
91- def post_ingest_report (ti , logical_date ): # params are Airflow kwargs - use this task without input
92+ def post_ingest_dataset_event (ti , logical_date , built_items = {} ): # params are Airflow kwargs - use this task without input
9293 """
9394 Logs a Dataset event, saving the config used as a versioned object in s3, and creating a Metadata object visible in Airflow.
9495
@@ -113,8 +114,22 @@ def post_ingest_report(ti, logical_date): # params are Airflow kwargs - use thi
113114 json .dump (payload , f , indent = 2 )
114115 log_task (f"Payload written to { key } " )
115116
117+ # built items can be either a dict or a list of dicts
118+ if isinstance (built_items , LazyXComSelectSequence ):
119+ built_items = list (built_items )
120+ elif not isinstance (built_items , list ):
121+ built_items = [built_items ]
122+ print (f"Built items: { built_items } " )
123+ success_count = sum (item .get ("payload" , {}).get ("status" , {}).get ("successes" , 0 ) for item in built_items )
124+ failure_count = sum (item .get ("payload" , {}).get ("status" , {}).get ("failures" , 0 ) for item in built_items )
125+
116126 yield Metadata (
117127 Dataset (f"{ collection } " ),
118- extra = {"ingest_datetime" : str (logical_date ) }, # extra has to be provided, can be {}
128+ extra = {
129+ "ingest_datetime" : str (logical_date ),
130+ "ingest_configuration" : key ,
131+ "successful_items" : success_count ,
132+ "failed_items" : failure_count ,
133+ }, # extra has to be provided, can be {}
119134 alias = "VEDA-Datasets" ,
120135 )
0 commit comments