Skip to content

Commit 3002d75

Browse files
committed
[ntuple] Add GetNEntries() to RPageSink
This is needed by the RNTupleMerger to properly know the initial number of entries in the destination sink in case of incremental merging. Since the Descriptor's NEntries is not updated until the first cluster group is committed - and since we don't commit a cluster group in InitFromDescriptor() - it cannot be used for that purpose.
1 parent 7a16d86 commit 3002d75

9 files changed

+35
-9
lines changed

Diff for: tree/ntuple/v7/inc/ROOT/RNTupleDescriptor.hxx

+2
Original file line numberDiff line numberDiff line change
@@ -1405,6 +1405,8 @@ public:
14051405
RResult<void> AddClusterGroup(RClusterGroupDescriptor &&clusterGroup);
14061406
RResult<void> AddCluster(RClusterDescriptor &&clusterDesc);
14071407

1408+
RResult<void> AddNEntries(std::uint64_t nEntries);
1409+
14081410
RResult<void> AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
14091411

14101412
/// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor

Diff for: tree/ntuple/v7/inc/ROOT/RPageNullSink.hxx

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ public:
4848
return descriptor;
4949
}
5050

51+
NTupleSize_t GetNEntries() const final { return 0; }
52+
5153
void ConnectFields(const std::vector<RFieldBase *> &fields, NTupleSize_t firstEntry)
5254
{
5355
auto connectField = [&](RFieldBase &f) { CallConnectPageSinkOnField(f, *this, firstEntry); };

Diff for: tree/ntuple/v7/inc/ROOT/RPageSinkBuf.hxx

+2
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ public:
133133

134134
const RNTupleDescriptor &GetDescriptor() const final;
135135

136+
NTupleSize_t GetNEntries() const final { return fInnerSink->GetNEntries(); }
137+
136138
void InitImpl(RNTupleModel &model) final;
137139
void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final;
138140
void UpdateExtraTypeInfo(const RExtraTypeInfoDescriptor &extraTypeInfo) final;

Diff for: tree/ntuple/v7/inc/ROOT/RPageStorage.hxx

+4
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,8 @@ public:
317317
/// Return the RNTupleDescriptor being constructed.
318318
virtual const RNTupleDescriptor &GetDescriptor() const = 0;
319319

320+
virtual NTupleSize_t GetNEntries() const = 0;
321+
320322
/// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
321323
/// Init() associates column handles to the columns referenced by the model
322324
void Init(RNTupleModel &model)
@@ -519,6 +521,8 @@ public:
519521

520522
const RNTupleDescriptor &GetDescriptor() const final { return fDescriptorBuilder.GetDescriptor(); }
521523

524+
NTupleSize_t GetNEntries() const final { return fPrevClusterNEntries; }
525+
522526
/// Updates the descriptor and calls InitImpl() that handles the backend-specific details (file, DAOS, etc.)
523527
void InitImpl(RNTupleModel &model) final;
524528
void UpdateSchema(const RNTupleModelChangeset &changeset, NTupleSize_t firstEntry) final;

Diff for: tree/ntuple/v7/src/RNTupleDescriptor.cxx

+12-9
Original file line numberDiff line numberDiff line change
@@ -595,11 +595,9 @@ ROOT::Experimental::RNTupleDescriptor::AddClusterGroupDetails(DescriptorId_t clu
595595
return R__FAIL("invalid attempt to re-populate existing cluster");
596596
}
597597
}
598-
std::sort(clusterIds.begin(), clusterIds.end(),
599-
[this](DescriptorId_t a, DescriptorId_t b)
600-
{
601-
return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
602-
});
598+
std::sort(clusterIds.begin(), clusterIds.end(), [this](DescriptorId_t a, DescriptorId_t b) {
599+
return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
600+
});
603601
auto cgBuilder = Internal::RClusterGroupDescriptorBuilder::FromSummary(iter->second);
604602
cgBuilder.AddSortedClusters(clusterIds);
605603
iter->second = cgBuilder.MoveDescriptor().Unwrap();
@@ -949,10 +947,9 @@ ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Internal::RNTupleDescr
949947
for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
950948
fDescriptor.fSortedClusterGroupIds.emplace_back(id);
951949
std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
952-
[this](DescriptorId_t a, DescriptorId_t b)
953-
{
954-
return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
955-
fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
950+
[this](DescriptorId_t a, DescriptorId_t b) {
951+
return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
952+
fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
956953
});
957954
RNTupleDescriptor result;
958955
std::swap(result, fDescriptor);
@@ -1173,6 +1170,12 @@ ROOT::Experimental::Internal::RNTupleDescriptorBuilder::AddClusterGroup(RCluster
11731170
return RResult<void>::Success();
11741171
}
11751172

1173+
ROOT::RResult<void> ROOT::Experimental::Internal::RNTupleDescriptorBuilder::AddNEntries(std::uint64_t nEntries)
1174+
{
1175+
fDescriptor.fNEntries += nEntries;
1176+
return RResult<void>::Success();
1177+
}
1178+
11761179
void ROOT::Experimental::Internal::RNTupleDescriptorBuilder::Reset()
11771180
{
11781181
fDescriptor.fName = "";

Diff for: tree/ntuple/v7/src/RNTupleParallelWriter.cxx

+2
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ class RPageSynchronizingSink : public RPageSink {
7272

7373
const RNTupleDescriptor &GetDescriptor() const final { return fInnerSink->GetDescriptor(); }
7474

75+
NTupleSize_t GetNEntries() const final { return fInnerSink->GetNEntries(); }
76+
7577
ColumnHandle_t AddColumn(DescriptorId_t, RColumn &) final { return {}; }
7678
void InitImpl(RNTupleModel &) final {}
7779
void UpdateSchema(const RNTupleModelChangeset &, NTupleSize_t) final

Diff for: tree/ntuple/v7/src/RPageStorage.cxx

+7
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ void ROOT::Experimental::Internal::RPageSource::UnzipClusterImpl(RCluster *clust
257257
continue;
258258
const auto &columnInfos = fActivePhysicalColumns.GetColumnInfos(columnId);
259259

260+
allElements.reserve(allElements.size() + columnInfos.size());
260261
for (const auto &info : columnInfos) {
261262
allElements.emplace_back(GenerateColumnElement(info.fElementId));
262263

@@ -857,6 +858,8 @@ void ROOT::Experimental::Internal::RPagePersistentSink::UpdateSchema(const RNTup
857858
}
858859

859860
const auto nColumns = descriptor.GetNPhysicalColumns();
861+
fOpenColumnRanges.reserve(fOpenColumnRanges.size() + (nColumns - nColumnsBeforeUpdate));
862+
fOpenPageRanges.reserve(fOpenPageRanges.size() + (nColumns - nColumnsBeforeUpdate));
860863
for (DescriptorId_t i = nColumnsBeforeUpdate; i < nColumns; ++i) {
861864
RClusterDescriptor::RColumnRange columnRange;
862865
columnRange.fPhysicalColumnId = i;
@@ -899,8 +902,10 @@ void ROOT::Experimental::Internal::RPagePersistentSink::InitImpl(RNTupleModel &m
899902
projectedFields.GetFieldZero().SetOnDiskId(0);
900903

901904
RNTupleModelChangeset initialChangeset{model};
905+
initialChangeset.fAddedFields.reserve(fieldZero.GetSubFields().size());
902906
for (auto f : fieldZero.GetSubFields())
903907
initialChangeset.fAddedFields.emplace_back(f);
908+
initialChangeset.fAddedProjectedFields.reserve(projectedFields.GetFieldZero().GetSubFields().size());
904909
for (auto f : projectedFields.GetFieldZero().GetSubFields())
905910
initialChangeset.fAddedProjectedFields.emplace_back(f);
906911
UpdateSchema(initialChangeset, 0U);
@@ -1133,6 +1138,7 @@ void ROOT::Experimental::Internal::RPagePersistentSink::CommitClusterGroup()
11331138

11341139
const auto nClusters = descriptor.GetNActiveClusters();
11351140
std::vector<DescriptorId_t> physClusterIDs;
1141+
physClusterIDs.reserve(nClusters);
11361142
for (auto i = fNextClusterInGroup; i < nClusters; ++i) {
11371143
physClusterIDs.emplace_back(fSerializationContext.MapClusterId(i));
11381144
}
@@ -1156,6 +1162,7 @@ void ROOT::Experimental::Internal::RPagePersistentSink::CommitClusterGroup()
11561162
.NClusters(nClusters - fNextClusterInGroup);
11571163
}
11581164
std::vector<DescriptorId_t> clusterIds;
1165+
clusterIds.reserve(nClusters);
11591166
for (auto i = fNextClusterInGroup; i < nClusters; ++i) {
11601167
clusterIds.emplace_back(i);
11611168
}

Diff for: tree/ntuple/v7/test/ntuple_endian.cxx

+2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ class RPageSinkMock : public RPageSink {
4949
return descriptor;
5050
}
5151

52+
NTupleSize_t GetNEntries() const final { return 0; }
53+
5254
void InitImpl(RNTupleModel &) final {}
5355
void UpdateSchema(const ROOT::Experimental::Internal::RNTupleModelChangeset &, NTupleSize_t) final {}
5456
void UpdateExtraTypeInfo(const ROOT::Experimental::RExtraTypeInfoDescriptor &) final {}

Diff for: tree/ntuple/v7/test/ntuple_storage.cxx

+2
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ class RPageSinkMock : public RPageSink {
4141
return descriptor;
4242
}
4343

44+
NTupleSize_t GetNEntries() const final { return 0; }
45+
4446
void InitImpl(RNTupleModel &) final {}
4547
void UpdateSchema(const ROOT::Experimental::Internal::RNTupleModelChangeset &, NTupleSize_t) final {}
4648
void UpdateExtraTypeInfo(const ROOT::Experimental::RExtraTypeInfoDescriptor &) final {}

0 commit comments

Comments
 (0)