Skip to content

Commit a489ea0

Browse files
authored
[To dev/1.1] Correct the retained size calculation for BinaryColumn and BinaryColumnBuilder (#514)
* Correct the retained size calculation for BinaryColumn and BinaryColumnBuilder * Empty-Commit * remove useless file
1 parent 10e9ea5 commit a489ea0

File tree

15 files changed

+128
-18
lines changed

15 files changed

+128
-18
lines changed

java/common/src/main/java/org/apache/tsfile/block/column/Column.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,13 @@ default TsPrimitiveType getTsPrimitiveType(int position) {
133133
*/
134134
long getRetainedSizeInBytes();
135135

136+
/**
137+
* Returns the size of this Column as if it was compacted, ignoring any over-allocations and any
138+
* unloaded nested Columns. For example, in dictionary blocks, this only counts each dictionary
139+
* entry once, rather than each time a value is referenced.
140+
*/
141+
long getSizeInBytes();
142+
136143
/**
137144
* Returns a column starting at the specified position and extends for the specified length. The
138145
* specified region must be entirely contained within this column.

java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,18 @@ public static long sizeOf(double[] arr) {
271271
: alignObjectSize(NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES * arr.length);
272272
}
273273

274+
public static long sizeOf(Accountable[] arr) {
275+
if (arr == null) {
276+
return 0;
277+
} else {
278+
long size = shallowSizeOf(arr);
279+
for (Accountable obj : arr) {
280+
size += obj != null ? obj.ramBytesUsed() : 0;
281+
}
282+
return size;
283+
}
284+
}
285+
274286
/** Returns the size in bytes of the String[] object. */
275287
public static long sizeOf(String[] arr) {
276288
if (arr == null) {

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ public static TsBlock wrapBlocksWithoutCopy(
6565

6666
private volatile long retainedSizeInBytes = -1;
6767

68+
private volatile long sizeInBytes = -1;
69+
6870
public TsBlock(int positionCount) {
6971
this(false, positionCount, null, EMPTY_COLUMNS);
7072
}
@@ -117,6 +119,18 @@ public long getRetainedSizeInBytes() {
117119
return retainedSizeInBytes;
118120
}
119121

122+
/**
123+
* Returns the size of this block as if it was compacted, ignoring any over-allocations and any
124+
* unloaded nested blocks. For example, in dictionary blocks, this only counts each dictionary
125+
* entry once, rather than each time a value is referenced.
126+
*/
127+
public long getSizeInBytes() {
128+
if (sizeInBytes < 0) {
129+
return updateSize();
130+
}
131+
return sizeInBytes;
132+
}
133+
120134
/**
121135
* @param positionOffset start offset
122136
* @param length slice length
@@ -504,6 +518,16 @@ private long updateRetainedSize() {
504518
return newRetainedSizeInBytes;
505519
}
506520

521+
private long updateSize() {
522+
long newSizeInBytes = INSTANCE_SIZE;
523+
newSizeInBytes += timeColumn.getSizeInBytes();
524+
for (Column column : valueColumns) {
525+
newSizeInBytes += column.getSizeInBytes();
526+
}
527+
this.sizeInBytes = newSizeInBytes;
528+
return newSizeInBytes;
529+
}
530+
507531
public int getTotalInstanceSize() {
508532
int totalInstanceSize = INSTANCE_SIZE;
509533
totalInstanceSize += timeColumn.getInstanceSize();

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,6 @@ private TsBlockBuilder(int initialExpectedEntries, int maxTsBlockBytes, List<TSD
104104
valueColumnBuilders = new ColumnBuilder[types.size()];
105105

106106
for (int i = 0; i < valueColumnBuilders.length; i++) {
107-
// TODO use Type interface to encapsulate createColumnBuilder to each concrete type class
108-
// instead of switch-case
109107
switch (types.get(i)) {
110108
case BOOLEAN:
111109
valueColumnBuilders[i] =
@@ -176,8 +174,6 @@ public void buildValueColumnBuilders(List<TSDataType> types) {
176174
valueColumnBuilders = new ColumnBuilder[types.size()];
177175
int initialExpectedEntries = timeColumnBuilder.getPositionCount();
178176
for (int i = 0; i < valueColumnBuilders.length; i++) {
179-
// TODO use Type interface to encapsulate createColumnBuilder to each concrete type class
180-
// instead of switch-case
181177
switch (types.get(i)) {
182178
case BOOLEAN:
183179
valueColumnBuilders[i] =

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
import java.util.Optional;
3131

3232
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
33+
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf;
3334
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
34-
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray;
3535

3636
public class BinaryColumn implements Column {
3737

@@ -44,6 +44,7 @@ public class BinaryColumn implements Column {
4444
private final Binary[] values;
4545

4646
private final long retainedSizeInBytes;
47+
private final long sizeInBytes;
4748

4849
public BinaryColumn(int positionCount, Optional<boolean[]> valueIsNull, Binary[] values) {
4950
this(0, positionCount, valueIsNull.orElse(null), values);
@@ -69,9 +70,37 @@ public BinaryColumn(int positionCount, Optional<boolean[]> valueIsNull, Binary[]
6970
}
7071
this.valueIsNull = valueIsNull;
7172

72-
// TODO we need to sum up all the Binary's retainedSize here
73-
retainedSizeInBytes =
74-
INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOfObjectArray(positionCount);
73+
retainedSizeInBytes = INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOf(values);
74+
sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount / values.length : 0L;
75+
}
76+
77+
// called by getRegion which already knows the underlying retainedSizeInBytes
78+
private BinaryColumn(
79+
int arrayOffset,
80+
int positionCount,
81+
boolean[] valueIsNull,
82+
Binary[] values,
83+
long retainedSizeInBytes) {
84+
if (arrayOffset < 0) {
85+
throw new IllegalArgumentException("arrayOffset is negative");
86+
}
87+
this.arrayOffset = arrayOffset;
88+
if (positionCount < 0) {
89+
throw new IllegalArgumentException("positionCount is negative");
90+
}
91+
this.positionCount = positionCount;
92+
93+
if (values.length - arrayOffset < positionCount) {
94+
throw new IllegalArgumentException("values length is less than positionCount");
95+
}
96+
this.values = values;
97+
98+
if (valueIsNull != null && valueIsNull.length - arrayOffset < positionCount) {
99+
throw new IllegalArgumentException("isNull length is less than positionCount");
100+
}
101+
this.valueIsNull = valueIsNull;
102+
this.retainedSizeInBytes = retainedSizeInBytes;
103+
this.sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount / values.length : 0L;
75104
}
76105

77106
@Override
@@ -134,10 +163,16 @@ public long getRetainedSizeInBytes() {
134163
return retainedSizeInBytes;
135164
}
136165

166+
@Override
167+
public long getSizeInBytes() {
168+
return sizeInBytes;
169+
}
170+
137171
@Override
138172
public Column getRegion(int positionOffset, int length) {
139173
checkValidRegion(getPositionCount(), positionOffset, length);
140-
return new BinaryColumn(positionOffset + arrayOffset, length, valueIsNull, values);
174+
return new BinaryColumn(
175+
positionOffset + arrayOffset, length, valueIsNull, values, getRetainedSizeInBytes());
141176
}
142177

143178
@Override

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232

3333
import static java.lang.Math.max;
3434
import static org.apache.tsfile.read.common.block.column.ColumnUtil.calculateBlockResetSize;
35-
import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOf;
3635
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf;
3736

3837
public class BinaryColumnBuilder implements ColumnBuilder {
@@ -124,7 +123,6 @@ public TSDataType getDataType() {
124123

125124
@Override
126125
public long getRetainedSizeInBytes() {
127-
// TODO we need to sum up all the Binary's retainedSize here
128126
long size = INSTANCE_SIZE + arraysRetainedSizeInBytes;
129127
if (columnBuilderStatus != null) {
130128
size += ColumnBuilderStatus.INSTANCE_SIZE;
@@ -134,7 +132,6 @@ public long getRetainedSizeInBytes() {
134132

135133
@Override
136134
public ColumnBuilder newColumnBuilderLike(ColumnBuilderStatus columnBuilderStatus) {
137-
// TODO we should take retain size into account here
138135
return new BinaryColumnBuilder(columnBuilderStatus, calculateBlockResetSize(positionCount));
139136
}
140137

@@ -153,6 +150,6 @@ private void growCapacity() {
153150
}
154151

155152
private void updateArraysDataSize() {
156-
arraysRetainedSizeInBytes = sizeOf(valueIsNull) + shallowSizeOf(values);
153+
arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(values);
157154
}
158155
}

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ public long getRetainedSizeInBytes() {
132132
return retainedSizeInBytes;
133133
}
134134

135+
@Override
136+
public long getSizeInBytes() {
137+
return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
138+
}
139+
135140
@Override
136141
public Column getRegion(int positionOffset, int length) {
137142
checkValidRegion(getPositionCount(), positionOffset, length);

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ public long getRetainedSizeInBytes() {
133133
return retainedSizeInBytes;
134134
}
135135

136+
@Override
137+
public long getSizeInBytes() {
138+
return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
139+
}
140+
136141
@Override
137142
public Column getRegion(int positionOffset, int length) {
138143
checkValidRegion(getPositionCount(), positionOffset, length);

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ public long getRetainedSizeInBytes() {
133133
return retainedSizeInBytes;
134134
}
135135

136+
@Override
137+
public long getSizeInBytes() {
138+
return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
139+
}
140+
136141
@Override
137142
public Column getRegion(int positionOffset, int length) {
138143
checkValidRegion(getPositionCount(), positionOffset, length);

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ public long getRetainedSizeInBytes() {
133133
return retainedSizeInBytes;
134134
}
135135

136+
@Override
137+
public long getSizeInBytes() {
138+
return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
139+
}
140+
136141
@Override
137142
public Column getRegion(int positionOffset, int length) {
138143
checkValidRegion(getPositionCount(), positionOffset, length);

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ public long getRetainedSizeInBytes() {
133133
return retainedSizeInBytes;
134134
}
135135

136+
@Override
137+
public long getSizeInBytes() {
138+
return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
139+
}
140+
136141
@Override
137142
public Column getRegion(int positionOffset, int length) {
138143
checkValidRegion(getPositionCount(), positionOffset, length);

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ public long getRetainedSizeInBytes() {
8383
return retainedSizeInBytes;
8484
}
8585

86+
@Override
87+
public long getSizeInBytes() {
88+
return retainedSizeInBytes;
89+
}
90+
8691
@Override
8792
public Column getRegion(int positionOffset, int length) {
8893
checkValidRegion(getPositionCount(), positionOffset, length);

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,11 @@ public long getRetainedSizeInBytes() {
191191
return INSTANCE_SIZE + value.getRetainedSizeInBytes();
192192
}
193193

194+
@Override
195+
public long getSizeInBytes() {
196+
return value.getSizeInBytes();
197+
}
198+
194199
@Override
195200
public Column getRegion(int positionOffset, int length) {
196201
checkValidRegion(positionCount, positionOffset, length);

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,7 @@ public boolean isNull(int position) {
9898

9999
@Override
100100
public boolean[] isNull() {
101-
// todo
102-
return null;
101+
throw new UnsupportedOperationException("isNull is not supported for TimeColumn");
103102
}
104103

105104
@Override
@@ -112,6 +111,11 @@ public long getRetainedSizeInBytes() {
112111
return retainedSizeInBytes;
113112
}
114113

114+
@Override
115+
public long getSizeInBytes() {
116+
return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
117+
}
118+
115119
@Override
116120
public Column getRegion(int positionOffset, int length) {
117121
ColumnUtil.checkValidRegion(getPositionCount(), positionOffset, length);

java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,12 @@ public TsBlock deserialize(ByteBuffer byteBuffer) {
8989
* @return Serialized tsblock.
9090
*/
9191
public ByteBuffer serialize(TsBlock tsBlock) throws IOException {
92-
if (tsBlock.getRetainedSizeInBytes() > Integer.MAX_VALUE) {
92+
if (tsBlock.getSizeInBytes() > Integer.MAX_VALUE) {
9393
throw new IllegalStateException(
94-
"TsBlock should not be that large: " + tsBlock.getRetainedSizeInBytes());
94+
"TsBlock should not be that large: " + tsBlock.getSizeInBytes());
9595
}
9696
ByteArrayOutputStream byteArrayOutputStream =
97-
new ByteArrayOutputStream((int) tsBlock.getRetainedSizeInBytes());
97+
new ByteArrayOutputStream((int) tsBlock.getSizeInBytes());
9898
DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
9999

100100
// Value column count.

0 commit comments

Comments
 (0)