2
2
import random
3
3
import string
4
4
import logging
5
+
6
+ from airflow .utils .db import provide_session
7
+ from airflow .models import Connection
5
8
from airflow .utils .decorators import apply_defaults
9
+
6
10
from airflow .models import BaseOperator
7
11
from airflow .hooks .S3_hook import S3Hook
8
12
from airflow .hooks .postgres_hook import PostgresHook
9
- from airflow .utils .db import provide_session
10
- from airflow .models import Connection
11
13
12
14
13
15
class S3ToRedshiftOperator (BaseOperator ):
@@ -46,9 +48,12 @@ class S3ToRedshiftOperator(BaseOperator):
46
48
possible values include "mysql".
47
49
:type origin_datatype: string
48
50
:param load_type: The method of loading into Redshift that
49
- should occur. Options are "append",
50
- "rebuild", and "upsert". Defaults to
51
- "append."
51
+ should occur. Options:
52
+ - "append"
53
+ - "rebuild"
54
+ - "truncate"
55
+ - "upsert"
56
+ Defaults to "append."
52
57
:type load_type: string
53
58
:param primary_key: *(optional)* The primary key for the
54
59
destination table. Not enforced by redshift
@@ -128,10 +133,10 @@ def __init__(self,
128
133
self .sortkey = sortkey
129
134
self .sort_type = sort_type
130
135
131
- if self .load_type .lower () not in [ "append" , "rebuild" , "upsert" ] :
136
+ if self .load_type .lower () not in ( "append" , "rebuild" , "truncate" , " upsert") :
132
137
raise Exception ('Please choose "append", "rebuild", or "upsert".' )
133
138
134
- if self .schema_location .lower () not in [ 's3' , 'local' ] :
139
+ if self .schema_location .lower () not in ( 's3' , 'local' ) :
135
140
raise Exception ('Valid Schema Locations are "s3" or "local".' )
136
141
137
142
if not (isinstance (self .sortkey , str ) or isinstance (self .sortkey , list )):
@@ -152,9 +157,12 @@ def execute(self, context):
152
157
letters = string .ascii_lowercase
153
158
random_string = '' .join (random .choice (letters ) for _ in range (7 ))
154
159
self .temp_suffix = '_tmp_{0}' .format (random_string )
160
+
155
161
if self .origin_schema :
156
162
schema = self .read_and_format ()
163
+
157
164
pg_hook = PostgresHook (self .redshift_conn_id )
165
+
158
166
self .create_if_not_exists (schema , pg_hook )
159
167
self .reconcile_schemas (schema , pg_hook )
160
168
self .copy_data (pg_hook , schema )
@@ -221,7 +229,6 @@ def read_and_format(self):
221
229
if i ['type' ] == e ['avro' ]:
222
230
i ['type' ] = e ['redshift' ]
223
231
224
- print (schema )
225
232
return schema
226
233
227
234
def reconcile_schemas (self , schema , pg_hook ):
@@ -277,7 +284,7 @@ def getS3Conn():
277
284
elif aws_role_arn :
278
285
creds = ("aws_iam_role={0}"
279
286
.format (aws_role_arn ))
280
-
287
+
281
288
return creds
282
289
283
290
# Delete records from the destination table where the incremental_key
@@ -331,6 +338,11 @@ def getS3Conn():
331
338
FILLTARGET
332
339
''' .format (self .redshift_schema , self .table , self .temp_suffix )
333
340
341
+ drop_sql = \
342
+ '''
343
+ DROP TABLE IF EXISTS "{0}"."{1}"
344
+ ''' .format (self .redshift_schema , self .table )
345
+
334
346
drop_temp_sql = \
335
347
'''
336
348
DROP TABLE IF EXISTS "{0}"."{1}{2}"
@@ -366,6 +378,13 @@ def getS3Conn():
366
378
base_sql )
367
379
if self .load_type == 'append' :
368
380
pg_hook .run (load_sql )
381
+ elif self .load_type == 'rebuild' :
382
+ pg_hook .run (drop_sql )
383
+ self .create_if_not_exists (schema , pg_hook )
384
+ pg_hook .run (load_sql )
385
+ elif self .load_type == 'truncate' :
386
+ pg_hook .run (truncate_sql )
387
+ pg_hook .run (load_sql )
369
388
elif self .load_type == 'upsert' :
370
389
self .create_if_not_exists (schema , pg_hook , temp = True )
371
390
load_temp_sql = \
@@ -378,9 +397,6 @@ def getS3Conn():
378
397
pg_hook .run (delete_confirm_sql )
379
398
pg_hook .run (append_sql , autocommit = True )
380
399
pg_hook .run (drop_temp_sql )
381
- elif self .load_type == 'rebuild' :
382
- pg_hook .run (truncate_sql )
383
- pg_hook .run (load_sql )
384
400
385
401
def create_if_not_exists (self , schema , pg_hook , temp = False ):
386
402
output = ''
0 commit comments