Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b3b13c5

Browse files
committedMar 17, 2025
[FLINK-37460][state] Make state processor API checkpoint ID configurable
1 parent 69d998f commit b3b13c5

23 files changed

+285
-68
lines changed
 

‎flink-libraries/flink-state-processing-api/src/main/java/org/apache/flink/state/api/KeyedOperatorTransformation.java

+14-2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ public class KeyedOperatorTransformation<K, T> {
4747
/** The data set containing the data to bootstrap the operator state with. */
4848
private final DataSet<T> dataSet;
4949

50+
/** Checkpoint ID. */
51+
private final long checkpointId;
52+
5053
/** Local max parallelism for the bootstrapped operator. */
5154
private final OptionalInt operatorMaxParallelism;
5255

@@ -60,11 +63,13 @@ public class KeyedOperatorTransformation<K, T> {
6063

6164
KeyedOperatorTransformation(
6265
DataSet<T> dataSet,
66+
long checkpointId,
6367
OptionalInt operatorMaxParallelism,
6468
@Nullable Timestamper<T> timestamper,
6569
KeySelector<T, K> keySelector,
6670
TypeInformation<K> keyType) {
6771
this.dataSet = dataSet;
72+
this.checkpointId = checkpointId;
6873
this.operatorMaxParallelism = operatorMaxParallelism;
6974
this.timestamper = timestamper;
7075
this.keySelector = keySelector;
@@ -84,7 +89,8 @@ public class KeyedOperatorTransformation<K, T> {
8489
public BootstrapTransformation<T> transform(KeyedStateBootstrapFunction<K, T> processFunction) {
8590
SavepointWriterOperatorFactory factory =
8691
(timestamp, path) ->
87-
new KeyedStateBootstrapOperator<>(timestamp, path, processFunction);
92+
new KeyedStateBootstrapOperator<>(
93+
checkpointId, timestamp, path, processFunction);
8894
return transform(factory);
8995
}
9096

@@ -116,6 +122,12 @@ public BootstrapTransformation<T> transform(SavepointWriterOperatorFactory facto
116122
public <W extends Window> WindowedOperatorTransformation<T, K, W> window(
117123
WindowAssigner<? super T, W> assigner) {
118124
return new WindowedOperatorTransformation<>(
119-
dataSet, operatorMaxParallelism, timestamper, keySelector, keyType, assigner);
125+
dataSet,
126+
checkpointId,
127+
operatorMaxParallelism,
128+
timestamper,
129+
keySelector,
130+
keyType,
131+
assigner);
120132
}
121133
}

‎flink-libraries/flink-state-processing-api/src/main/java/org/apache/flink/state/api/KeyedStateTransformation.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ public class KeyedStateTransformation<K, T> {
4343
/** The data set containing the data to bootstrap the operator state with. */
4444
private final DataStream<T> stream;
4545

46+
/** Checkpoint ID. */
47+
private final long checkpointId;
48+
4649
/** Local max parallelism for the bootstrapped operator. */
4750
private final OptionalInt operatorMaxParallelism;
4851

@@ -54,10 +57,12 @@ public class KeyedStateTransformation<K, T> {
5457

5558
KeyedStateTransformation(
5659
DataStream<T> stream,
60+
long checkpointId,
5761
OptionalInt operatorMaxParallelism,
5862
KeySelector<T, K> keySelector,
5963
TypeInformation<K> keyType) {
6064
this.stream = stream;
65+
this.checkpointId = checkpointId;
6166
this.operatorMaxParallelism = operatorMaxParallelism;
6267
this.keySelector = keySelector;
6368
this.keyType = keyType;
@@ -77,7 +82,8 @@ public StateBootstrapTransformation<T> transform(
7782
KeyedStateBootstrapFunction<K, T> processFunction) {
7883
SavepointWriterOperatorFactory factory =
7984
(timestamp, path) ->
80-
new KeyedStateBootstrapOperator<>(timestamp, path, processFunction);
85+
new KeyedStateBootstrapOperator<>(
86+
checkpointId, timestamp, path, processFunction);
8187
return transform(factory);
8288
}
8389

@@ -109,6 +115,6 @@ public StateBootstrapTransformation<T> transform(SavepointWriterOperatorFactory
109115
public <W extends Window> WindowedStateTransformation<T, K, W> window(
110116
WindowAssigner<? super T, W> assigner) {
111117
return new WindowedStateTransformation<>(
112-
stream, operatorMaxParallelism, keySelector, keyType, assigner);
118+
stream, checkpointId, operatorMaxParallelism, keySelector, keyType, assigner);
113119
}
114120
}

‎flink-libraries/flink-state-processing-api/src/main/java/org/apache/flink/state/api/OneInputOperatorTransformation.java

+16-5
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,21 @@ public class OneInputOperatorTransformation<T> {
5555
/** The data set containing the data to bootstrap the operator state with. */
5656
private final DataSet<T> dataSet;
5757

58+
/** Checkpoint ID. */
59+
private final long checkpointId;
60+
5861
/** Local max parallelism for the bootstrapped operator. */
5962
private OptionalInt operatorMaxParallelism = OptionalInt.empty();
6063

6164
@Nullable private Timestamper<T> timestamper;
6265

6366
OneInputOperatorTransformation(DataSet<T> dataSet) {
67+
this(dataSet, 0L);
68+
}
69+
70+
OneInputOperatorTransformation(DataSet<T> dataSet, long checkpointId) {
6471
this.dataSet = dataSet;
72+
this.checkpointId = checkpointId;
6573
}
6674

6775
/**
@@ -108,7 +116,9 @@ public OneInputOperatorTransformation<T> assignTimestamps(TimestampAssigner<T> a
108116
*/
109117
public BootstrapTransformation<T> transform(StateBootstrapFunction<T> processFunction) {
110118
SavepointWriterOperatorFactory factory =
111-
(timestamp, path) -> new StateBootstrapOperator<>(timestamp, path, processFunction);
119+
(timestamp, path) ->
120+
new StateBootstrapOperator<>(
121+
checkpointId, timestamp, path, processFunction);
112122

113123
return transform(factory);
114124
}
@@ -127,7 +137,8 @@ public BootstrapTransformation<T> transform(
127137
BroadcastStateBootstrapFunction<T> processFunction) {
128138
SavepointWriterOperatorFactory factory =
129139
(timestamp, path) ->
130-
new BroadcastStateBootstrapOperator<>(timestamp, path, processFunction);
140+
new BroadcastStateBootstrapOperator<>(
141+
checkpointId, timestamp, path, processFunction);
131142

132143
return transform(factory);
133144
}
@@ -156,7 +167,7 @@ public <K> KeyedOperatorTransformation<K, T> keyBy(KeySelector<T, K> keySelector
156167
TypeInformation<K> keyType =
157168
TypeExtractor.getKeySelectorTypes(keySelector, dataSet.getType());
158169
return new KeyedOperatorTransformation<>(
159-
dataSet, operatorMaxParallelism, timestamper, keySelector, keyType);
170+
dataSet, checkpointId, operatorMaxParallelism, timestamper, keySelector, keyType);
160171
}
161172

162173
/**
@@ -170,7 +181,7 @@ public <K> KeyedOperatorTransformation<K, T> keyBy(KeySelector<T, K> keySelector
170181
public <K> KeyedOperatorTransformation<K, T> keyBy(
171182
KeySelector<T, K> keySelector, TypeInformation<K> keyType) {
172183
return new KeyedOperatorTransformation<>(
173-
dataSet, operatorMaxParallelism, timestamper, keySelector, keyType);
184+
dataSet, checkpointId, operatorMaxParallelism, timestamper, keySelector, keyType);
174185
}
175186

176187
/**
@@ -211,6 +222,6 @@ private KeyedOperatorTransformation<Tuple, T> keyBy(Keys<T> keys) {
211222
TypeInformation<Tuple> keyType =
212223
TypeExtractor.getKeySelectorTypes(keySelector, dataSet.getType());
213224
return new KeyedOperatorTransformation<>(
214-
dataSet, operatorMaxParallelism, timestamper, keySelector, keyType);
225+
dataSet, checkpointId, operatorMaxParallelism, timestamper, keySelector, keyType);
215226
}
216227
}

‎flink-libraries/flink-state-processing-api/src/main/java/org/apache/flink/state/api/OneInputStateTransformation.java

+19-5
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,19 @@ public class OneInputStateTransformation<T> {
4848
/** The data stream containing the data to bootstrap the operator state with. */
4949
private final DataStream<T> stream;
5050

51+
/** Checkpoint ID. */
52+
private final long checkpointId;
53+
5154
/** Local max parallelism for the bootstrapped operator. */
5255
private OptionalInt operatorMaxParallelism = OptionalInt.empty();
5356

5457
OneInputStateTransformation(DataStream<T> stream) {
58+
this(stream, 0L);
59+
}
60+
61+
OneInputStateTransformation(DataStream<T> stream, long checkpointId) {
5562
this.stream = stream;
63+
this.checkpointId = checkpointId;
5664
}
5765

5866
/**
@@ -81,7 +89,9 @@ public OneInputStateTransformation<T> setMaxParallelism(int maxParallelism) {
8189
*/
8290
public StateBootstrapTransformation<T> transform(StateBootstrapFunction<T> processFunction) {
8391
SavepointWriterOperatorFactory factory =
84-
(timestamp, path) -> new StateBootstrapOperator<>(timestamp, path, processFunction);
92+
(timestamp, path) ->
93+
new StateBootstrapOperator<>(
94+
checkpointId, timestamp, path, processFunction);
8595

8696
return transform(factory);
8797
}
@@ -100,7 +110,8 @@ public StateBootstrapTransformation<T> transform(
100110
BroadcastStateBootstrapFunction<T> processFunction) {
101111
SavepointWriterOperatorFactory factory =
102112
(timestamp, path) ->
103-
new BroadcastStateBootstrapOperator<>(timestamp, path, processFunction);
113+
new BroadcastStateBootstrapOperator<>(
114+
checkpointId, timestamp, path, processFunction);
104115

105116
return transform(factory);
106117
}
@@ -128,7 +139,8 @@ public StateBootstrapTransformation<T> transform(SavepointWriterOperatorFactory
128139
public <K> KeyedStateTransformation<K, T> keyBy(KeySelector<T, K> keySelector) {
129140
TypeInformation<K> keyType =
130141
TypeExtractor.getKeySelectorTypes(keySelector, stream.getType());
131-
return new KeyedStateTransformation<>(stream, operatorMaxParallelism, keySelector, keyType);
142+
return new KeyedStateTransformation<>(
143+
stream, checkpointId, operatorMaxParallelism, keySelector, keyType);
132144
}
133145

134146
/**
@@ -141,7 +153,8 @@ public <K> KeyedStateTransformation<K, T> keyBy(KeySelector<T, K> keySelector) {
141153
*/
142154
public <K> KeyedStateTransformation<K, T> keyBy(
143155
KeySelector<T, K> keySelector, TypeInformation<K> keyType) {
144-
return new KeyedStateTransformation<>(stream, operatorMaxParallelism, keySelector, keyType);
156+
return new KeyedStateTransformation<>(
157+
stream, checkpointId, operatorMaxParallelism, keySelector, keyType);
145158
}
146159

147160
/**
@@ -181,6 +194,7 @@ private KeyedStateTransformation<Tuple, T> keyBy(Keys<T> keys) {
181194

182195
TypeInformation<Tuple> keyType =
183196
TypeExtractor.getKeySelectorTypes(keySelector, stream.getType());
184-
return new KeyedStateTransformation<>(stream, operatorMaxParallelism, keySelector, keyType);
197+
return new KeyedStateTransformation<>(
198+
stream, checkpointId, operatorMaxParallelism, keySelector, keyType);
185199
}
186200
}

‎flink-libraries/flink-state-processing-api/src/main/java/org/apache/flink/state/api/OperatorTransformation.java

+29
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,22 @@ public static <T> OneInputOperatorTransformation<T> bootstrapWith(DataSet<T> dat
7171
return new OneInputOperatorTransformation<>(dataSet);
7272
}
7373

74+
/**
75+
* Create a new {@link OperatorTransformation} from a {@link DataSet}.
76+
*
77+
* @param dataSet A dataset of elements.
78+
* @param checkpointId checkpoint ID.
79+
* @param <T> The type of the input.
80+
* @return A {@link OneInputOperatorTransformation}.
81+
* @deprecated use {@link #bootstrapWith(DataStream)} to bootstrap a savepoint using the data
82+
* stream api under batch execution.
83+
*/
84+
@Deprecated
85+
public static <T> OneInputOperatorTransformation<T> bootstrapWith(
86+
DataSet<T> dataSet, long checkpointId) {
87+
return new OneInputOperatorTransformation<>(dataSet, checkpointId);
88+
}
89+
7490
/**
7591
* Create a new {@link OneInputStateTransformation} from a {@link DataStream}.
7692
*
@@ -81,4 +97,17 @@ public static <T> OneInputOperatorTransformation<T> bootstrapWith(DataSet<T> dat
8197
public static <T> OneInputStateTransformation<T> bootstrapWith(DataStream<T> stream) {
8298
return new OneInputStateTransformation<>(stream);
8399
}
100+
101+
/**
102+
* Create a new {@link OneInputStateTransformation} from a {@link DataStream}.
103+
*
104+
* @param stream A data stream of elements.
105+
* @param checkpointId checkpoint ID.
106+
* @param <T> The type of the input.
107+
* @return A {@link OneInputStateTransformation}.
108+
*/
109+
public static <T> OneInputStateTransformation<T> bootstrapWith(
110+
DataStream<T> stream, long checkpointId) {
111+
return new OneInputStateTransformation<>(stream, checkpointId);
112+
}
84113
}

‎flink-libraries/flink-state-processing-api/src/main/java/org/apache/flink/state/api/Savepoint.java

+10-4
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,10 @@ public static ExistingSavepoint load(ExecutionEnvironment env, String path) thro
7373

7474
SavepointMetadata savepointMetadata =
7575
new SavepointMetadata(
76-
maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
76+
metadata.getCheckpointId(),
77+
maxParallelism,
78+
metadata.getMasterStates(),
79+
metadata.getOperatorStates());
7780
return new ExistingSavepoint(env, savepointMetadata, null);
7881
}
7982

@@ -102,7 +105,10 @@ public static ExistingSavepoint load(
102105

103106
SavepointMetadata savepointMetadata =
104107
new SavepointMetadata(
105-
maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
108+
metadata.getCheckpointId(),
109+
maxParallelism,
110+
metadata.getMasterStates(),
111+
metadata.getOperatorStates());
106112
return new ExistingSavepoint(env, savepointMetadata, stateBackend);
107113
}
108114

@@ -124,7 +130,7 @@ public static NewSavepoint create(int maxParallelism) {
124130

125131
SavepointMetadata metadata =
126132
new SavepointMetadata(
127-
maxParallelism, Collections.emptyList(), Collections.emptyList());
133+
0L, maxParallelism, Collections.emptyList(), Collections.emptyList());
128134
return new NewSavepoint(metadata, null);
129135
}
130136

@@ -147,7 +153,7 @@ public static NewSavepoint create(StateBackend stateBackend, int maxParallelism)
147153

148154
SavepointMetadata metadata =
149155
new SavepointMetadata(
150-
maxParallelism, Collections.emptyList(), Collections.emptyList());
156+
0L, maxParallelism, Collections.emptyList(), Collections.emptyList());
151157
return new NewSavepoint(metadata, stateBackend);
152158
}
153159
}

‎flink-libraries/flink-state-processing-api/src/main/java/org/apache/flink/state/api/SavepointReader.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,10 @@ public static SavepointReader read(StreamExecutionEnvironment env, String path)
8282

8383
SavepointMetadataV2 savepointMetadata =
8484
new SavepointMetadataV2(
85-
maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
85+
metadata.getCheckpointId(),
86+
maxParallelism,
87+
metadata.getMasterStates(),
88+
metadata.getOperatorStates());
8689
return new SavepointReader(env, savepointMetadata, null);
8790
}
8891

@@ -111,7 +114,10 @@ public static SavepointReader read(
111114

112115
SavepointMetadataV2 savepointMetadata =
113116
new SavepointMetadataV2(
114-
maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
117+
metadata.getCheckpointId(),
118+
maxParallelism,
119+
metadata.getMasterStates(),
120+
metadata.getOperatorStates());
115121
return new SavepointReader(env, savepointMetadata, stateBackend);
116122
}
117123

0 commit comments

Comments
 (0)
Please sign in to comment.