Skip to content

Commit 3fd9fec

Browse files
author
RuslanBergenov
committed
Merge branch 'development-refactor-bigquery-field-name-function' into development
# Conflicts: # .github/workflows/python-package.yml
2 parents 5f6e6f2 + 178247e commit 3fd9fec

File tree

3 files changed

+23
-46
lines changed

3 files changed

+23
-46
lines changed

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ env:
2424

2525
on:
2626
push:
27-
branches: [ development-update-requirements, development, master ]
27+
branches: [ development, master ]
2828
pull_request:
2929
branches: [ master ]
3030

target_bigquery/schema.py

Lines changed: 19 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@ def cleanup_record(schema, record):
3131
elif isinstance(record, dict):
3232
nr = {}
3333
for key, value in record.items():
34-
nkey = bigquery_transformed_key(key)
34+
nkey = create_valid_bigquery_field_name(key)
3535
nr[nkey] = cleanup_record(schema, value)
3636
return nr
3737

3838
else:
3939
raise Exception(f"unhandled instance of record: {record}")
4040

4141

42-
def bigquery_transformed_key(key):
42+
def create_valid_bigquery_field_name(field_name):
4343
"""
4444
Clean up / prettify field names, make sure they match BigQuery naming conventions.
4545
@@ -56,45 +56,22 @@ def bigquery_transformed_key(key):
5656
:param key: JSON field name
5757
:return: cleaned up JSON field name
5858
"""
59-
remove_list = [" ",
60-
"!",
61-
"\"",
62-
"#",
63-
"$",
64-
"%",
65-
"&",
66-
"'",
67-
"(",
68-
")",
69-
"*",
70-
"+",
71-
",",
72-
"-",
73-
".",
74-
"/",
75-
":",
76-
";",
77-
"<",
78-
"=",
79-
">",
80-
"?",
81-
"@",
82-
"\\",
83-
"]",
84-
"^",
85-
"`",
86-
"|",
87-
"}",
88-
"~"]
89-
90-
for c in remove_list:
91-
key = key.replace(c, "_")
92-
93-
if re.match(r"^\d", key):
94-
key = "_" + key
95-
96-
return key
9759

60+
cleaned_up_field_name = ""
61+
62+
# if char is alphanumeric (either letters or numbers), append char to our string
63+
for char in field_name:
64+
if char.isalnum():
65+
cleaned_up_field_name += char
66+
else:
67+
# otherwise, replace it with underscore
68+
cleaned_up_field_name += "_"
69+
70+
# if field starts with digit, prepend it with underscore
71+
if cleaned_up_field_name[0].isdigit():
72+
cleaned_up_field_name = "_%s" % cleaned_up_field_name
73+
74+
return cleaned_up_field_name[:300] # trim the string to the first x chars
9875

9976
def prioritize_one_data_type_from_multiple_ones_in_any_of(field_property):
10077
"""
@@ -248,7 +225,7 @@ def build_field(field_name, field_property):
248225
if not ("items" in field_property and "properties" in field_property["items"]) and not (
249226
"properties" in field_property):
250227

251-
return (SchemaField(name=bigquery_transformed_key(field_name),
228+
return (SchemaField(name=create_valid_bigquery_field_name(field_name),
252229
field_type=convert_field_type(field_property),
253230
mode=determine_field_mode(field_name, field_property),
254231
description=None,
@@ -266,7 +243,7 @@ def build_field(field_name, field_property):
266243
).items():
267244
processed_subfields.append(build_field(subfield_name, subfield_property))
268245

269-
return (SchemaField(name=bigquery_transformed_key(field_name),
246+
return (SchemaField(name=create_valid_bigquery_field_name(field_name),
270247
field_type=convert_field_type(field_property),
271248
mode=determine_field_mode(field_name, field_property),
272249
description=None,

target_bigquery/validate_json_schema.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import re
22
import singer
3-
from target_bigquery.schema import bigquery_transformed_key
3+
from target_bigquery.schema import create_valid_bigquery_field_name
44

55
LOGGER = singer.get_logger()
66

@@ -107,7 +107,7 @@ def build_field_list(schema):
107107
for field_name, field_property in schema.get("properties", schema.get("items", {}).get("properties", {})).items():
108108
if not ("items" in field_property and "properties" in field_property["items"]) \
109109
and not ("properties" in field_property):
110-
key = bigquery_transformed_key(field_name.upper())
110+
key = create_valid_bigquery_field_name(field_name.upper())
111111
if not f_dict.get(key):
112112
f_dict[key] = [field_name]
113113
else:
@@ -116,7 +116,7 @@ def build_field_list(schema):
116116
elif ("items" in field_property and "properties" in field_property["items"]) \
117117
or ("properties" in field_property):
118118
nd = build_field_list(field_property)
119-
key = bigquery_transformed_key(field_name.upper())
119+
key = create_valid_bigquery_field_name(field_name.upper())
120120
for k, v in nd.items():
121121
if not f_dict.get(f"{key}.{k}"):
122122
f_dict[f"{key}.{k}"] = [f"{field_name}.{i}" for i in v]

0 commit comments

Comments
 (0)