Skip to content

Commit c3daf84

Browse files
authored
[FLINK-36269][python] Remove usage about TableEnvironmentInternal#fromTableSource in python module (#25322)
1 parent b700f1c commit c3daf84

File tree

9 files changed

+214
-48
lines changed

9 files changed

+214
-48
lines changed

docs/content.zh/docs/dev/python/table/table_environment.md

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -206,17 +206,6 @@ TableEnvironment API
206206
</tr>
207207
</thead>
208208
<tbody>
209-
<tr>
210-
<td>
211-
<strong>from_table_source(table_source)</strong>
212-
</td>
213-
<td>
214-
通过 table source 创建一张表。
215-
</td>
216-
<td class="text-center">
217-
{{< pythondoc file="pyflink.table.html#pyflink.table.TableEnvironment.from_table_source" name="链接">}}
218-
</td>
219-
</tr>
220209
<tr>
221210
<td>
222211
<strong>scan(*table_path)</strong>

docs/content/docs/dev/python/table/table_environment.md

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -206,17 +206,6 @@ These APIs are used to create/remove Table API/SQL Tables and write queries:
206206
</tr>
207207
</thead>
208208
<tbody>
209-
<tr>
210-
<td>
211-
<strong>from_table_source(table_source)</strong>
212-
</td>
213-
<td>
214-
Creates a table from a table source.
215-
</td>
216-
<td class="text-center">
217-
{{< pythondoc file="pyflink.table.html#pyflink.table.TableEnvironment.from_table_source" name="link">}}
218-
</td>
219-
</tr>
220209
<tr>
221210
<td>
222211
<strong>scan(*table_path)</strong>

flink-python/pyflink/table/table_environment.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,10 +1368,10 @@ def from_pandas(self, pdf,
13681368
data_type = data_type.bridgedTo(
13691369
load_java_class('org.apache.flink.table.data.RowData'))
13701370

1371-
j_arrow_table_source = \
1372-
jvm.org.apache.flink.table.runtime.arrow.ArrowUtils.createArrowTableSource(
1371+
j_arrow_table_source_descriptor = \
1372+
jvm.org.apache.flink.table.runtime.arrow.ArrowUtils.createArrowTableSourceDesc(
13731373
data_type, temp_file.name)
1374-
return Table(self._j_tenv.fromTableSource(j_arrow_table_source), self)
1374+
return Table(getattr(self._j_tenv, "from")(j_arrow_table_source_descriptor), self)
13751375
finally:
13761376
os.unlink(temp_file.name)
13771377

flink-python/src/main/java/org/apache/flink/table/runtime/arrow/ArrowUtils.java

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,18 @@
2424
import org.apache.flink.configuration.ExecutionOptions;
2525
import org.apache.flink.core.memory.ByteArrayOutputStreamWithPos;
2626
import org.apache.flink.table.api.Table;
27+
import org.apache.flink.table.api.TableDescriptor;
2728
import org.apache.flink.table.api.TableEnvironment;
29+
import org.apache.flink.table.api.TableException;
2830
import org.apache.flink.table.api.internal.TableEnvironmentImpl;
2931
import org.apache.flink.table.api.internal.TableImpl;
3032
import org.apache.flink.table.data.ArrayData;
3133
import org.apache.flink.table.data.RowData;
3234
import org.apache.flink.table.data.columnar.vector.ColumnVector;
3335
import org.apache.flink.table.data.util.DataFormatConverters;
3436
import org.apache.flink.table.operations.OutputConversionModifyOperation;
35-
import org.apache.flink.table.runtime.arrow.sources.ArrowTableSource;
37+
import org.apache.flink.table.runtime.arrow.sources.ArrowTableSourceFactory;
38+
import org.apache.flink.table.runtime.arrow.sources.ArrowTableSourceOptions;
3639
import org.apache.flink.table.runtime.arrow.vectors.ArrowArrayColumnVector;
3740
import org.apache.flink.table.runtime.arrow.vectors.ArrowBigIntColumnVector;
3841
import org.apache.flink.table.runtime.arrow.vectors.ArrowBinaryColumnVector;
@@ -159,6 +162,8 @@
159162
import java.util.List;
160163
import java.util.stream.Collectors;
161164

165+
import static org.apache.flink.table.types.DataType.getFieldNames;
166+
162167
/** Utilities for Arrow. */
163168
@Internal
164169
public final class ArrowUtils {
@@ -475,10 +480,31 @@ public static ColumnVector createColumnVector(ValueVector vector, LogicalType fi
475480
}
476481
}
477482

478-
public static ArrowTableSource createArrowTableSource(DataType dataType, String fileName)
479-
throws IOException {
483+
public static TableDescriptor createArrowTableSourceDesc(DataType dataType, String fileName) {
484+
List<String> fieldNames = getFieldNames(dataType);
485+
List<DataType> fieldTypes = dataType.getChildren();
486+
org.apache.flink.table.api.Schema.Builder schemaBuilder =
487+
org.apache.flink.table.api.Schema.newBuilder();
488+
for (int i = 0; i < fieldNames.size(); i++) {
489+
schemaBuilder.column(fieldNames.get(i), fieldTypes.get(i));
490+
}
491+
492+
try {
493+
byte[][] data = readArrowBatches(fileName);
494+
return TableDescriptor.forConnector(ArrowTableSourceFactory.IDENTIFIER)
495+
.option(
496+
ArrowTableSourceOptions.DATA,
497+
ByteArrayUtils.twoDimByteArrayToString(data))
498+
.schema(schemaBuilder.build())
499+
.build();
500+
} catch (Throwable e) {
501+
throw new TableException("Failed to read the arrow data from " + fileName, e);
502+
}
503+
}
504+
505+
public static byte[][] readArrowBatches(String fileName) throws IOException {
480506
try (FileInputStream fis = new FileInputStream(fileName)) {
481-
return new ArrowTableSource(dataType, readArrowBatches(fis.getChannel()));
507+
return readArrowBatches(fis.getChannel());
482508
}
483509
}
484510

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.flink.table.runtime.arrow;
20+
21+
import org.apache.flink.annotation.Internal;
22+
23+
import java.io.ByteArrayInputStream;
24+
import java.io.ByteArrayOutputStream;
25+
import java.io.IOException;
26+
import java.io.ObjectInputStream;
27+
import java.io.ObjectOutputStream;
28+
import java.util.Base64;
29+
30+
/** A utility class for converting byte[][] to String and String to byte[][]. */
31+
@Internal
32+
public class ByteArrayUtils {
33+
34+
/** Convert byte[][] to String. */
35+
public static String twoDimByteArrayToString(byte[][] byteArray) throws IOException {
36+
ByteArrayOutputStream bos = new ByteArrayOutputStream();
37+
ObjectOutputStream oos = new ObjectOutputStream(bos);
38+
oos.writeObject(byteArray);
39+
oos.flush();
40+
byte[] serializedArray = bos.toByteArray();
41+
42+
return Base64.getEncoder().encodeToString(serializedArray);
43+
}
44+
45+
/** Convert String to byte[][]. */
46+
public static byte[][] stringToTwoDimByteArray(String str)
47+
throws IOException, ClassNotFoundException {
48+
byte[] bytes = Base64.getDecoder().decode(str);
49+
50+
ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
51+
ObjectInputStream ois = new ObjectInputStream(bis);
52+
return (byte[][]) ois.readObject();
53+
}
54+
}

flink-python/src/main/java/org/apache/flink/table/runtime/arrow/sources/ArrowTableSource.java

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,43 +19,54 @@
1919
package org.apache.flink.table.runtime.arrow.sources;
2020

2121
import org.apache.flink.annotation.Internal;
22-
import org.apache.flink.legacy.table.sources.StreamTableSource;
23-
import org.apache.flink.streaming.api.datastream.DataStream;
24-
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
25-
import org.apache.flink.table.data.RowData;
26-
import org.apache.flink.table.legacy.api.TableSchema;
22+
import org.apache.flink.legacy.table.connector.source.SourceFunctionProvider;
23+
import org.apache.flink.table.api.TableException;
24+
import org.apache.flink.table.connector.ChangelogMode;
25+
import org.apache.flink.table.connector.source.DynamicTableSource;
26+
import org.apache.flink.table.connector.source.ScanTableSource;
27+
import org.apache.flink.table.runtime.arrow.ByteArrayUtils;
2728
import org.apache.flink.table.types.DataType;
28-
import org.apache.flink.table.types.utils.DataTypeUtils;
2929

30-
/** A {@link StreamTableSource} for serialized arrow record batch data. */
30+
/** A {@link ScanTableSource} for serialized arrow record batch data. */
3131
@Internal
32-
public class ArrowTableSource implements StreamTableSource<RowData> {
32+
public class ArrowTableSource implements ScanTableSource {
3333

34-
final DataType dataType;
35-
final byte[][] arrowData;
34+
private final DataType dataType;
3635

37-
public ArrowTableSource(DataType dataType, byte[][] arrowData) {
36+
private final byte[][] arrowData;
37+
38+
public ArrowTableSource(DataType dataType, String data) {
39+
this.dataType = dataType;
40+
try {
41+
this.arrowData = ByteArrayUtils.stringToTwoDimByteArray(data);
42+
} catch (Throwable e) {
43+
throw new TableException(
44+
"Failed to convert the data from String to byte[][].\nThe data is: " + data, e);
45+
}
46+
}
47+
48+
private ArrowTableSource(DataType dataType, byte[][] arrowData) {
3849
this.dataType = dataType;
3950
this.arrowData = arrowData;
4051
}
4152

4253
@Override
43-
public boolean isBounded() {
44-
return true;
54+
public DynamicTableSource copy() {
55+
return new ArrowTableSource(dataType, arrowData);
4556
}
4657

4758
@Override
48-
public DataStream<RowData> getDataStream(StreamExecutionEnvironment execEnv) {
49-
return execEnv.addSource(new ArrowSourceFunction(dataType, arrowData));
59+
public String asSummaryString() {
60+
return "ArrowTableSource";
5061
}
5162

5263
@Override
53-
public TableSchema getTableSchema() {
54-
return TableSchema.fromResolvedSchema(DataTypeUtils.expandCompositeTypeToSchema(dataType));
64+
public ChangelogMode getChangelogMode() {
65+
return ChangelogMode.insertOnly();
5566
}
5667

5768
@Override
58-
public DataType getProducedDataType() {
59-
return dataType;
69+
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
70+
return SourceFunctionProvider.of(new ArrowSourceFunction(dataType, arrowData), true);
6071
}
6172
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.flink.table.runtime.arrow.sources;
20+
21+
import org.apache.flink.configuration.ConfigOption;
22+
import org.apache.flink.configuration.ReadableConfig;
23+
import org.apache.flink.table.connector.source.DynamicTableSource;
24+
import org.apache.flink.table.factories.DynamicTableSourceFactory;
25+
import org.apache.flink.table.factories.FactoryUtil;
26+
import org.apache.flink.table.types.DataType;
27+
28+
import java.util.HashSet;
29+
import java.util.Set;
30+
31+
/** Factory for creating configured instances of {@link ArrowTableSource}.. */
32+
public class ArrowTableSourceFactory implements DynamicTableSourceFactory {
33+
34+
public static final String IDENTIFIER = "python-arrow-source";
35+
36+
@Override
37+
public String factoryIdentifier() {
38+
return IDENTIFIER;
39+
}
40+
41+
@Override
42+
public Set<ConfigOption<?>> requiredOptions() {
43+
Set<ConfigOption<?>> options = new HashSet<>();
44+
options.add(ArrowTableSourceOptions.DATA);
45+
return options;
46+
}
47+
48+
@Override
49+
public Set<ConfigOption<?>> optionalOptions() {
50+
return new HashSet<>();
51+
}
52+
53+
@Override
54+
public DynamicTableSource createDynamicTableSource(Context context) {
55+
FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
56+
ReadableConfig tableOptions = helper.getOptions();
57+
58+
String data = tableOptions.get(ArrowTableSourceOptions.DATA);
59+
DataType dataType = context.getPhysicalRowDataType();
60+
return new ArrowTableSource(dataType, data);
61+
}
62+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.flink.table.runtime.arrow.sources;
20+
21+
import org.apache.flink.configuration.ConfigOption;
22+
import org.apache.flink.configuration.ConfigOptions;
23+
24+
/** Table options for the {@link ArrowTableSource}. */
25+
public class ArrowTableSourceOptions {
26+
27+
public static final ConfigOption<String> DATA =
28+
ConfigOptions.key("data")
29+
.stringType()
30+
.noDefaultValue()
31+
.withDescription(
32+
"This is the data serialized by Arrow with a byte two-dimensional array. "
33+
+ "Note: The byte two-dimensional array is converted into a string using base64.");
34+
}

flink-python/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
# limitations under the License.
1515

1616
org.apache.flink.table.utils.python.PythonDynamicTableFactory
17+
org.apache.flink.table.runtime.arrow.sources.ArrowTableSourceFactory

0 commit comments

Comments
 (0)