Skip to content

Commit 9490fec

Browse files
author
Ruslan Bergenov
authored
Merge pull request #33 from adswerve/hotfix/issue32
Issue32 fix
2 parents 29128d3 + 243e4a7 commit 9490fec

File tree

2 files changed

+77
-8
lines changed

2 files changed

+77
-8
lines changed

target_bigquery/simplify_json_schema.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,8 @@ def is_iterable(schema):
189189
"""
190190

191191
return not _is_ref(schema) \
192-
and ARRAY in get_type(schema) \
193-
and 'items' in schema
192+
and ARRAY in get_type(schema)
193+
# and 'items' in schema # commented out to allow "members": {"type": "array"}
194194

195195

196196
def is_nullable(schema):
@@ -217,21 +217,31 @@ def is_literal(schema):
217217
def is_datetime(schema):
218218
"""
219219
Given a JSON Schema compatible dict, returns True when schema's type allows being a date-time
220+
Two cases make a datetime type:
221+
a) string in type and format date-time (this is per JSON schema standards)
222+
b) date-time is in type (this is for simplicity)
220223
:param schema: dict, JSON Schema
221224
:return: Boolean
222225
"""
223226

224-
return STRING in get_type(schema) and schema.get('format') == DATE_TIME_FORMAT
227+
return \
228+
(STRING in get_type(schema) and schema.get('format') == DATE_TIME_FORMAT) \
229+
or (DATE_TIME_FORMAT in get_type(schema) and schema.get('format') is None)
225230

226231

227232
def is_date(schema):
228233
"""
229-
Given a JSON Schema compatible dict, returns True when schema's type allows being a date-time
234+
Given a JSON Schema compatible dict, returns True when schema's type allows being a date
235+
Two cases make a date type:
236+
a) string in type and format date (this is per JSON schema standards)
237+
b) date is in type (this is for simplicity)
230238
:param schema: dict, JSON Schema
231239
:return: Boolean
232240
"""
233241

234-
return STRING in get_type(schema) and schema.get('format') == DATE_FORMAT
242+
return \
243+
(STRING in get_type(schema) and schema.get('format') == DATE_FORMAT) \
244+
or (DATE_FORMAT in get_type(schema) and schema.get('format') is None)
235245

236246

237247
def is_bq_geography(schema):
@@ -417,15 +427,21 @@ def _simplify__implicit_anyof(root_schema, schema):
417427
'format': DATE_TIME_FORMAT
418428
}))
419429

420-
types.remove(STRING)
430+
if DATE_TIME_FORMAT in types:
431+
types.remove(DATE_TIME_FORMAT)
432+
else:
433+
types.remove(STRING)
421434

422435
if is_date(schema):
423436
schemas.append(Cachable({
424437
'type': [STRING],
425438
'format': DATE_FORMAT
426439
}))
427440

428-
types.remove(STRING)
441+
if DATE_FORMAT in types:
442+
types.remove(DATE_FORMAT)
443+
else:
444+
types.remove(STRING)
429445

430446
if is_bq_geography(schema):
431447
schemas.append(Cachable({
@@ -485,7 +501,7 @@ def _simplify__implicit_anyof(root_schema, schema):
485501
if is_iterable(schema):
486502
schemas.append({
487503
'type': [ARRAY],
488-
'items': _helper_simplify(root_schema, schema.get('items', {}))
504+
'items': _helper_simplify(root_schema, schema.get('items', {"type": STRING}))
489505
})
490506

491507
types.remove(ARRAY)

tests/test_schema_conversion.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,59 @@ class TestSchemaConversion(unittestcore.BaseUnitTest):
235235
def setUp(self):
236236
super(TestSchemaConversion, self).setUp()
237237

238+
def test_flat_simplify_and_build(self):
239+
schema = {
240+
"properties": {
241+
"new_status": {
242+
"type": ["string", "null"]
243+
},
244+
"previous_status": {
245+
"type": ["number", "null"]
246+
},
247+
"new_assignee": {
248+
"type": ["integer", "null"]
249+
},
250+
"previous_assignee": {
251+
"type": ["boolean", "null"]
252+
},
253+
"new_due_date": {
254+
"type": ["date", "null"]
255+
},
256+
"previous_due_date": {
257+
"type": ["date-time", "null"]
258+
},
259+
"members": {
260+
"type": "array" # shorted array definition, by default we treat this as array of strings
261+
}
262+
}
263+
}
264+
265+
schema_simplified = simplify(schema)
266+
schema_bq = build_schema(schema_simplified, key_properties={}, add_metadata=False)
267+
268+
for f in schema_bq:
269+
if f.name == "new_status":
270+
self.assertEqual(f.field_type.upper(), "STRING")
271+
272+
elif f.name == "previous_status":
273+
self.assertEqual(f.field_type.upper(), "FLOAT")
274+
275+
elif f.name == "new_assignee":
276+
self.assertEqual(f.field_type.upper(), "INTEGER")
277+
278+
elif f.name == "previous_assignee":
279+
self.assertEqual(f.field_type.upper(), "BOOLEAN")
280+
281+
elif f.name == "new_due_date":
282+
self.assertEqual(f.field_type.upper(), "DATE")
283+
284+
elif f.name == "previous_due_date":
285+
self.assertEqual(f.field_type.upper(), "TIMESTAMP")
286+
287+
elif f.name == "members":
288+
self.assertEqual(f.field_type.upper(), "STRING")
289+
self.assertEqual(f.mode, "REPEATED")
290+
238291
def test_flat_schema(self):
239292

240293
schema_0_input = schema_simple_1

0 commit comments

Comments
 (0)