22
22
import io .cdap .cdap .api .data .schema .Schema ;
23
23
24
24
import java .util .ArrayList ;
25
+ import java .util .Arrays ;
26
+ import java .util .HashSet ;
25
27
import java .util .List ;
28
+ import java .util .Set ;
26
29
import javax .annotation .Nullable ;
27
30
28
31
/**
29
32
* Utilities around BigQuery schemas.
30
33
*/
31
34
public class Schemas {
32
35
36
+ // Set of BigQuery types supported for clustering per definition
37
+ // https://cloud.google.com/bigquery/docs/creating-clustered-tables#limitations
38
+ private static final Set <StandardSQLTypeName > CLUSTERING_SUPPORTED_TYPES
39
+ = new HashSet <>(Arrays .asList (StandardSQLTypeName .DATE , StandardSQLTypeName .BOOL , StandardSQLTypeName .GEOGRAPHY ,
40
+ StandardSQLTypeName .INT64 , StandardSQLTypeName .NUMERIC , StandardSQLTypeName .STRING ,
41
+ StandardSQLTypeName .TIMESTAMP , StandardSQLTypeName .DATETIME ));
42
+
33
43
private Schemas () {
34
44
// no-op
35
45
}
36
46
47
+
37
48
public static com .google .cloud .bigquery .Schema convert (Schema schema ) {
38
49
return com .google .cloud .bigquery .Schema .of (convertFields (schema .getFields ()));
39
50
}
40
51
41
52
private static List <Field > convertFields (List <Schema .Field > fields ) {
42
53
List <Field > output = new ArrayList <>();
43
54
for (Schema .Field field : fields ) {
44
- String name = field .getName ();
45
- boolean isNullable = field .getSchema ().isNullable ();
46
- Schema fieldSchema = field .getSchema ();
47
- fieldSchema = isNullable ? fieldSchema .getNonNullable () : fieldSchema ;
48
- Schema .LogicalType logicalType = fieldSchema .getLogicalType ();
49
- Field .Mode fieldMode = isNullable ? Field .Mode .NULLABLE : Field .Mode .REQUIRED ;
50
- if (logicalType != null ) {
51
- StandardSQLTypeName bqType = convertLogicalType (logicalType );
52
- // TODO: figure out what the correct behavior should be
53
- if (bqType == null ) {
54
- throw new IllegalArgumentException (
55
- String .format ("Field '%s' is of type '%s', which is not supported in BigQuery." ,
56
- name , logicalType .getToken ()));
57
- }
58
- output .add (Field .newBuilder (name , bqType ).setMode (fieldMode ).build ());
59
- continue ;
60
- }
61
-
62
- Schema .Type type = isNullable ? field .getSchema ().getNonNullable ().getType () : field .getSchema ().getType ();
63
- if (type == Schema .Type .ARRAY ) {
64
- Schema componentSchema = fieldSchema .getComponentSchema ();
65
- componentSchema = componentSchema .isNullable () ? componentSchema .getNonNullable () : componentSchema ;
66
- StandardSQLTypeName bqType = convertType (componentSchema .getType ());
67
- if (bqType == null ) {
68
- throw new IllegalArgumentException (
69
- String .format ("Field '%s' is an array of '%s', which is not supported in BigQuery." ,
70
- name , logicalType .getToken ()));
71
- }
72
- output .add (Field .newBuilder (name , bqType ).setMode (Field .Mode .REPEATED ).build ());
73
- } else if (type == Schema .Type .RECORD ) {
74
- List <Field > subFields = convertFields (fieldSchema .getFields ());
75
- output .add (Field .newBuilder (name , StandardSQLTypeName .STRUCT , FieldList .of (subFields )).build ());
76
- } else {
77
- StandardSQLTypeName bqType = convertType (type );
78
- if (bqType == null ) {
79
- throw new IllegalArgumentException (
80
- String .format ("Field '%s' is of type '%s', which is not supported in BigQuery." ,
81
- name , type .name ().toLowerCase ()));
82
- }
83
- output .add (Field .newBuilder (name , bqType ).setMode (fieldMode ).build ());
84
- }
55
+ output .add (convertToBigQueryField (field ));
85
56
}
86
57
return output ;
87
58
}
@@ -96,13 +67,12 @@ private static StandardSQLTypeName convertType(Schema.Type type) {
96
67
case DOUBLE :
97
68
return StandardSQLTypeName .FLOAT64 ;
98
69
case STRING :
70
+ case ENUM :
99
71
return StandardSQLTypeName .STRING ;
100
72
case BOOLEAN :
101
73
return StandardSQLTypeName .BOOL ;
102
74
case BYTES :
103
75
return StandardSQLTypeName .BYTES ;
104
- case ENUM :
105
- return StandardSQLTypeName .STRING ;
106
76
}
107
77
return null ;
108
78
}
@@ -125,4 +95,58 @@ private static StandardSQLTypeName convertLogicalType(Schema.LogicalType logical
125
95
}
126
96
return null ;
127
97
}
98
+
99
+ /**
100
+ * Check if the BigQuery data type associated with the {@link Schema.Field} can be added
101
+ * as a clustering column while creating BigQuery table.
102
+ */
103
+ public static boolean isClusteringSupported (Schema .Field field ) {
104
+ Field bigQueryField = convertToBigQueryField (field );
105
+ return CLUSTERING_SUPPORTED_TYPES .contains (bigQueryField .getType ().getStandardType ());
106
+ }
107
+
108
+ private static Field convertToBigQueryField (Schema .Field field ) {
109
+ String name = field .getName ();
110
+ boolean isNullable = field .getSchema ().isNullable ();
111
+ Schema fieldSchema = field .getSchema ();
112
+ fieldSchema = isNullable ? fieldSchema .getNonNullable () : fieldSchema ;
113
+ Schema .LogicalType logicalType = fieldSchema .getLogicalType ();
114
+ Field .Mode fieldMode = isNullable ? Field .Mode .NULLABLE : Field .Mode .REQUIRED ;
115
+ if (logicalType != null ) {
116
+ StandardSQLTypeName bqType = convertLogicalType (logicalType );
117
+ // TODO: figure out what the correct behavior should be
118
+ if (bqType == null ) {
119
+ throw new IllegalArgumentException (
120
+ String .format ("Field '%s' is of type '%s', which is not supported in BigQuery." ,
121
+ name , logicalType .getToken ()));
122
+ }
123
+ return Field .newBuilder (name , bqType ).setMode (fieldMode ).build ();
124
+ }
125
+
126
+ Field output ;
127
+ Schema .Type type = isNullable ? field .getSchema ().getNonNullable ().getType () : field .getSchema ().getType ();
128
+ if (type == Schema .Type .ARRAY ) {
129
+ Schema componentSchema = fieldSchema .getComponentSchema ();
130
+ componentSchema = componentSchema .isNullable () ? componentSchema .getNonNullable () : componentSchema ;
131
+ StandardSQLTypeName bqType = convertType (componentSchema .getType ());
132
+ if (bqType == null ) {
133
+ throw new IllegalArgumentException (
134
+ String .format ("Field '%s' is an array of '%s', which is not supported in BigQuery." ,
135
+ name , logicalType .getToken ()));
136
+ }
137
+ output = Field .newBuilder (name , bqType ).setMode (Field .Mode .REPEATED ).build ();
138
+ } else if (type == Schema .Type .RECORD ) {
139
+ List <Field > subFields = convertFields (fieldSchema .getFields ());
140
+ output = Field .newBuilder (name , StandardSQLTypeName .STRUCT , FieldList .of (subFields )).build ();
141
+ } else {
142
+ StandardSQLTypeName bqType = convertType (type );
143
+ if (bqType == null ) {
144
+ throw new IllegalArgumentException (
145
+ String .format ("Field '%s' is of type '%s', which is not supported in BigQuery." ,
146
+ name , type .name ().toLowerCase ()));
147
+ }
148
+ output = Field .newBuilder (name , bqType ).setMode (fieldMode ).build ();
149
+ }
150
+ return output ;
151
+ }
128
152
}
0 commit comments