From 3af7c11e8e34636f87c6a10212803e5c802b1278 Mon Sep 17 00:00:00 2001 From: mbergeron Date: Wed, 26 Sep 2018 15:41:50 -0400 Subject: [PATCH 1/3] fix the target-postgres to work with zendesk - fix an error when an empty type declaration is found i.e. "custom": {} - fix a problem in string sanitization for \u0000 --- target_postgres/db_sync.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/target_postgres/db_sync.py b/target_postgres/db_sync.py index 4fba051..89cf28d 100644 --- a/target_postgres/db_sync.py +++ b/target_postgres/db_sync.py @@ -39,6 +39,13 @@ def safe_column_name(name): return '"{}"'.format(name) +def sanitize(value): + if not isinstance(value, str): + return value + + return value.replace('\u0000', '') + + def column_clause(name, schema_property): return '{} {}'.format(safe_column_name(name), column_type(schema_property)) @@ -58,20 +65,27 @@ def flatten_key(k, parent_key, sep): def flatten_schema(d, parent_key=[], sep='__'): items = [] + print("{}\n".format(d['properties'])) for k, v in d['properties'].items(): new_key = flatten_key(k, parent_key, sep) + + if not v: + logger.warn("Empty definition for {}.".format(new_key)) + continue + if 'type' in v.keys(): if 'object' in v['type']: items.extend(flatten_schema(v, parent_key + [k], sep=sep).items()) else: items.append((new_key, v)) else: - if list(v.values())[0][0]['type'] == 'string': - list(v.values())[0][0]['type'] = ['null', 'string'] - items.append((new_key, list(v.values())[0][0])) - elif list(v.values())[0][0]['type'] == 'array': - list(v.values())[0][0]['type'] = ['null', 'array'] - items.append((new_key, list(v.values())[0][0])) + property = list(v.values())[0][0] + if property['type'] == 'string': + property['type'] = ['null', 'string'] + items.append((new_key, property)) + elif property['type'] == 'array': + property['type'] = ['null', 'array'] + items.append((new_key, property)) key_func = lambda item: item[0] sorted_items = sorted(items, key=key_func) @@ -89,7 +103,8 @@ def flatten_record(d, parent_key=[], sep='__'): if isinstance(v, collections.MutableMapping): items.extend(flatten_record(v, parent_key + [k], sep=sep).items()) else: - items.append((new_key, json.dumps(v) if type(v) is list else v)) + sanitized = sanitize(json.dumps(v) if type(v) is list else v) + items.append((new_key, sanitized)) return dict(items) From 02e1073d8245e877b4d65f6acb8fbbe701cd1d74 Mon Sep 17 00:00:00 2001 From: mbergeron Date: Thu, 27 Sep 2018 13:08:43 -0400 Subject: [PATCH 2/3] sanitize csv values to remove \u0000 --- target_postgres/db_sync.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target_postgres/db_sync.py b/target_postgres/db_sync.py index 89cf28d..d41eea2 100644 --- a/target_postgres/db_sync.py +++ b/target_postgres/db_sync.py @@ -39,15 +39,16 @@ def safe_column_name(name): return '"{}"'.format(name) +def column_clause(name, schema_property): + return '{} {}'.format(safe_column_name(name), column_type(schema_property)) + + def sanitize(value): if not isinstance(value, str): return value - return value.replace('\u0000', '') - - -def column_clause(name, schema_property): - return '{} {}'.format(safe_column_name(name), column_type(schema_property)) + # this sequence will cause the CSV load to fail + return value.replace("\\u0000", '') def flatten_key(k, parent_key, sep): @@ -103,8 +104,7 @@ def flatten_record(d, parent_key=[], sep='__'): if isinstance(v, collections.MutableMapping): items.extend(flatten_record(v, parent_key + [k], sep=sep).items()) else: - sanitized = sanitize(json.dumps(v) if type(v) is list else v) - items.append((new_key, sanitized)) + items.append((new_key, json.dumps(v) if type(v) is list else v)) return dict(items) @@ -165,7 +165,7 @@ def record_to_csv_line(self, record): flatten = flatten_record(record) return ','.join( [ - json.dumps(flatten[name]) if name in flatten and flatten[name] else '' + json.dumps(sanitize(flatten[name])) if name in flatten and flatten[name] else '' for name in self.flatten_schema ] ) From f0a6ce7ecfbe40d72139c61301858118339693c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mica=C3=ABl=20Bergeron?= Date: Thu, 27 Sep 2018 13:17:23 -0400 Subject: [PATCH 3/3] Update db_sync.py Remove a print statement --- target_postgres/db_sync.py | 1 - 1 file changed, 1 deletion(-) diff --git a/target_postgres/db_sync.py b/target_postgres/db_sync.py index d41eea2..e6aef05 100644 --- a/target_postgres/db_sync.py +++ b/target_postgres/db_sync.py @@ -66,7 +66,6 @@ def flatten_key(k, parent_key, sep): def flatten_schema(d, parent_key=[], sep='__'): items = [] - print("{}\n".format(d['properties'])) for k, v in d['properties'].items(): new_key = flatten_key(k, parent_key, sep)