Skip to content

Commit ef621c6

Browse files
author
RuslanBergenov
committed
docs: docstring in functions check_schema_for_dupes_in_field_names & build_field_list
1 parent 705311f commit ef621c6

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

target_bigquery/validate_json_schema.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,39 @@ def check_schema_for_dupes_in_field_names(stream_name, schema):
7070
Alerts user if there are duplicate field names in JSON schema.
7171
7272
For example, if JSON schema contains:
73-
"Name" and "name"
73+
"Name" and "name" (this will be considered a dupe field in BigQuery and it'll throw an error)
7474
or
7575
"first name" and "first_name" (this example is also a dupe because "first name" will be converted to "first_name" by schema.py)
7676
:param stream_name: name of stream
7777
:param schema: JSON schema of the stream
7878
:return:
7979
"""
8080
def build_field_list(schema):
81+
"""
82+
83+
:param schema:
84+
:return: a dictionary, where:
85+
86+
every key is uppercase of BigQuery transformed key (uppercase of field name cleaned up to load into BigQuery)
87+
88+
every value is original field names from JSON schema
89+
90+
This dictionary is flat, not nested.
91+
92+
JSON nested fields are represented in dictionary with a . dot.
93+
94+
This dict makes it easy to detect dupes and tell the user exactly where the dupe is located
95+
(what its parent field is).
96+
97+
Sample output:
98+
99+
f_dict / fields = {'OBJECT': ['object'], 'ID': ['id'],
100+
'PERSON._SOURCE': ['person.$source'],
101+
'PERSON.NAME': ['person.name', 'person.Name']}
102+
103+
dupes = {'PERSON.NAME': ['person.name', 'person.Name']}
104+
105+
"""
81106
f_dict = {}
82107
for field_name, field_property in schema.get("properties", schema.get("items", {}).get("properties", {})).items():
83108
if not ("items" in field_property and "properties" in field_property["items"]) \
@@ -98,7 +123,6 @@ def build_field_list(schema):
98123
else:
99124
f_dict[f"{key}.{k}"].extend([f"{field_name}.{i}" for i in v])
100125

101-
# sample f_dict: {"BQ_FIELD_NAME.BQ_NESTED_FIELD": ["json_schema_field_name.$nested_name", "json_schema_field_name.nested name"]}
102126
return f_dict
103127

104128
fields = build_field_list(schema)

0 commit comments

Comments
 (0)