@@ -31,15 +31,15 @@ def cleanup_record(schema, record):
3131 elif isinstance (record , dict ):
3232 nr = {}
3333 for key , value in record .items ():
34- nkey = bigquery_transformed_key (key )
34+ nkey = create_valid_bigquery_field_name (key )
3535 nr [nkey ] = cleanup_record (schema , value )
3636 return nr
3737
3838 else :
3939 raise Exception (f"unhandled instance of record: { record } " )
4040
4141
42- def bigquery_transformed_key ( key ):
42+ def create_valid_bigquery_field_name ( field_name ):
4343 """
4444 Clean up / prettify field names, make sure they match BigQuery naming conventions.
4545
@@ -56,45 +56,22 @@ def bigquery_transformed_key(key):
5656 :param key: JSON field name
5757 :return: cleaned up JSON field name
5858 """
59- remove_list = [" " ,
60- "!" ,
61- "\" " ,
62- "#" ,
63- "$" ,
64- "%" ,
65- "&" ,
66- "'" ,
67- "(" ,
68- ")" ,
69- "*" ,
70- "+" ,
71- "," ,
72- "-" ,
73- "." ,
74- "/" ,
75- ":" ,
76- ";" ,
77- "<" ,
78- "=" ,
79- ">" ,
80- "?" ,
81- "@" ,
82- "\\ " ,
83- "]" ,
84- "^" ,
85- "`" ,
86- "|" ,
87- "}" ,
88- "~" ]
89-
90- for c in remove_list :
91- key = key .replace (c , "_" )
92-
93- if re .match (r"^\d" , key ):
94- key = "_" + key
95-
96- return key
9759
60+ cleaned_up_field_name = ""
61+
62+ # if char is alphanumeric (either letters or numbers), append char to our string
63+ for char in field_name :
64+ if char .isalnum ():
65+ cleaned_up_field_name += char
66+ else :
67+ # otherwise, replace it with underscore
68+ cleaned_up_field_name += "_"
69+
70+ # if field starts with digit, prepend it with underscore
71+ if cleaned_up_field_name [0 ].isdigit ():
72+ cleaned_up_field_name = "_%s" % cleaned_up_field_name
73+
74+ return cleaned_up_field_name [:300 ] # trim the string to the first x chars
9875
9976def prioritize_one_data_type_from_multiple_ones_in_any_of (field_property ):
10077 """
@@ -248,7 +225,7 @@ def build_field(field_name, field_property):
248225 if not ("items" in field_property and "properties" in field_property ["items" ]) and not (
249226 "properties" in field_property ):
250227
251- return (SchemaField (name = bigquery_transformed_key (field_name ),
228+ return (SchemaField (name = create_valid_bigquery_field_name (field_name ),
252229 field_type = convert_field_type (field_property ),
253230 mode = determine_field_mode (field_name , field_property ),
254231 description = None ,
@@ -266,7 +243,7 @@ def build_field(field_name, field_property):
266243 ).items ():
267244 processed_subfields .append (build_field (subfield_name , subfield_property ))
268245
269- return (SchemaField (name = bigquery_transformed_key (field_name ),
246+ return (SchemaField (name = create_valid_bigquery_field_name (field_name ),
270247 field_type = convert_field_type (field_property ),
271248 mode = determine_field_mode (field_name , field_property ),
272249 description = None ,
0 commit comments