Skip to content

Commit 7dce919

Browse files
committed
[tmva][sofie] Change UnidirectionalBroadcast to take output as raw array
This commit suggests to refactor the `UnidirectionalBroadcast` implementation to take the output buffer as a raw C-style array. This makes is easier to use in code generation, as the shape doesn't need to be recomputed to be hardcoded in the construction of the output span, or forces us to use a `std::vector` that already encodes the size information. The `std::vector` argument is the real motivation for this change, as it's better to avoid taking vectors as output buffers for intermediate tensors. Forcing the use of `std::vector` prevents some memory optimizations where we take offsetted pointers to a larger memory buffer, and also makes it more difficult to emit code that is differentiable by Clad.
1 parent 4b82f3b commit 7dce919

File tree

7 files changed

+35
-40
lines changed

7 files changed

+35
-40
lines changed

tmva/sofie/inc/TMVA/ROperator_BasicBinary.hxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ public:
151151
fNBroadcastedA = "Broadcasted" + fNA + "to" + fNY;
152152
auto data = model.GetInitializedTensorData(fNA);
153153
std::shared_ptr<void> broadcastedData(
154-
UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeA, fShapeY),
154+
UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeA, fShapeY),
155155
std::default_delete<T[]>());
156156
if (model.Verbose())
157157
std::cout << "broadcasted data A " << ConvertShapeToString(fShapeY) << " : "
@@ -172,7 +172,7 @@ public:
172172
<< ConvertValuesToString(ConvertShapeToLength(fShapeB), static_cast<T *>(data.get()))
173173
<< std::endl;
174174
std::shared_ptr<void> broadcastedData(
175-
UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeB, fShapeY),
175+
UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeB, fShapeY),
176176
std::default_delete<T[]>());
177177
// do not update tensor B but add broadcasted one (since it can be input to some other operators)
178178
if (model.Verbose())

tmva/sofie/inc/TMVA/ROperator_Comparision.hxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ public:
143143
data1 = static_cast<T *>(model.GetInitializedTensorData(fNX1).get());
144144
if (broadcastX1) {
145145
broadcastedData1 = std::unique_ptr<T>(
146-
UTILITY::UnidirectionalBroadcast<T>(data1, fShapeX1, fShapeY));
146+
UTILITY::UnidirectionalBroadcast(data1, fShapeX1, fShapeY));
147147
data1 = broadcastedData1.get();
148148
}
149149

@@ -154,7 +154,7 @@ public:
154154
data2 = static_cast<T *>(model.GetInitializedTensorData(fNX2).get());
155155
if (broadcastX2) {
156156
broadcastedData2 = std::unique_ptr<T>(
157-
UTILITY::UnidirectionalBroadcast<T>(data2, fShapeX2, fShapeY));
157+
UTILITY::UnidirectionalBroadcast(data2, fShapeX2, fShapeY));
158158
data2 = broadcastedData2.get();
159159
}
160160
} else if (model.IsShapeTensor(fNX2)) {

tmva/sofie/inc/TMVA/ROperator_Conv.hxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ public:
289289
shape[0] = fShapeB[0];
290290
auto intTargetShape = ConvertShapeToInt(targetShape);
291291
std::shared_ptr<void> new_data_ptr(
292-
UTILITY::UnidirectionalBroadcast<float>(static_cast<float *>(original_data.get()), shape, intTargetShape),
292+
UTILITY::UnidirectionalBroadcast(static_cast<float *>(original_data.get()), shape, intTargetShape),
293293
std::default_delete<float[]>());
294294
model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), intTargetShape, new_data_ptr);
295295
fShapeB = model.GetTensorShape(fNB);
@@ -347,7 +347,7 @@ public:
347347
out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n";
348348
else
349349
out << SP << "{\n";
350-
out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
350+
out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_"
351351
<< fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n";
352352
out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n";
353353
out << SP << SP << "std::copy(data, data + " << length << ", fTensor_" << fNB << ".begin());\n";

tmva/sofie/inc/TMVA/ROperator_Expand.hxx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public:
9494
auto data = model.GetInitializedTensorData(fNX);
9595
if (fInitBroadcast) {
9696
std::shared_ptr<void> broadcastedData(
97-
UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), shapeX, shapeY),
97+
UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), shapeX, shapeY),
9898
std::default_delete<T[]>());
9999
// Update the data and the shape of X
100100
model.UpdateInitializedTensor(fNX, model.GetTensorType(fNX), shapeY, broadcastedData);
@@ -153,8 +153,8 @@ public:
153153
// No need to broadcast A if it's an initialized tensor or shapes are the same
154154
if (!fInitialized && fShapeX != fShapeY) {
155155
out << SP << "// Broadcasting uninitialized tensor " << fNX << "\n";
156-
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << fType << ">(tensor_" << fNX << ", " << ConvertShapeToString(fShapeX) << ", " << ConvertShapeToString(fShapeY)
157-
<< ", std::span<"<<fType<<">(tensor_"<<fNY<<", "<<ConvertDimShapeToLength(fShapeY)<<"));\n";
156+
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNX << ", " << ConvertShapeToString(fShapeX) << ", " << ConvertShapeToString(fShapeY)
157+
<< ", tensor_"<<fNY<<");\n";
158158
}
159159
return out.str();
160160
}

tmva/sofie/inc/TMVA/ROperator_LayerNormalization.hxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ public:
177177
if (!fNBroadcastedB.empty()) {
178178
out << SP << "// Broadcasting the bias of LayerNormalization op\n";
179179
out << SP << "{\n";
180-
out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_";
180+
out << SP << SP << "float* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_";
181181
out << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeX) << ");\n";
182182
out << SP << "std::copy(data, data + " << fLength << ", tensor_" << fNBroadcastedB << ");\n";
183183
out << SP << "delete[] data;\n";

tmva/sofie/inc/TMVA/ROperator_Where.hxx

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ public:
105105
if (model.IsInitializedTensor(fNA)) {
106106
auto data = model.GetInitializedTensorData(fNA);
107107
std::shared_ptr<void> broadcastedData(
108-
UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeA, fShapeY),
108+
UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeA, fShapeY),
109109
std::default_delete<T[]>());
110110
// Update the data and the shape of A
111111
model.AddConstantTensor(fNBroadcastedA, model.GetTensorType(fNA), fShapeY, broadcastedData);
@@ -121,7 +121,7 @@ public:
121121
if (model.IsInitializedTensor(fNB)) {
122122
auto data = model.GetInitializedTensorData(fNB);
123123
std::shared_ptr<void> broadcastedData(
124-
UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeB, fShapeY),
124+
UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeB, fShapeY),
125125
std::default_delete<T[]>());
126126
// do not update tensor B but add broadcasted one (since it can be input to some other operators)
127127
model.AddConstantTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY, broadcastedData);
@@ -137,7 +137,7 @@ public:
137137
if (model.IsInitializedTensor(fNC)) {
138138
auto data = model.GetInitializedTensorData(fNC);
139139
std::shared_ptr<void> broadcastedData(
140-
UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeC, fShapeY),
140+
UTILITY::UnidirectionalBroadcast(static_cast<T *>(data.get()), fShapeC, fShapeY),
141141
std::default_delete<T[]>());
142142
// do not update tensor C but add broadcasted one (since it can be input to some other operators)
143143
model.AddConstantTensor(fNBroadcastedC, model.GetTensorType(fNC), fShapeY, broadcastedData);
@@ -256,34 +256,34 @@ public:
256256
if (fShapeA != fShapeY) {
257257
out << SP << "// Broadcasting uninitialized tensor " << fNA << "\n";
258258
//out << SP << "{\n";
259-
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << typeName << ">(tensor_" << fNA << ", " << ConvertShapeToString(fShapeA) << ", " << ConvertShapeToString(fShapeY)
260-
<< ", fTensor_" << fNBroadcastedA << ");\n";
259+
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNA << ", " << ConvertShapeToString(fShapeA) << ", " << ConvertShapeToString(fShapeY)
260+
<< ", tensor_" << fNBroadcastedA << ");\n";
261261
}
262262
// Broadcast B if it's uninitialized
263263
if (fShapeB != fShapeY) {
264264
out << SP << "// Broadcasting uninitialized tensor " << fNB << "\n";
265265
//out << SP << "{\n";
266-
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << typeName << ">(tensor_" << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeY)
267-
<< ", fTensor_" << fNBroadcastedB << ");\n";
266+
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNB << ", " << ConvertShapeToString(fShapeB) << ", " << ConvertShapeToString(fShapeY)
267+
<< ", tensor_" << fNBroadcastedB << ");\n";
268268
}
269269
// Broadcast C if it's uninitialized
270270
if (fShapeC != fShapeY) {
271271
// special case if C is an input tensor
272272
if (fIsInputBoolTensor) {
273273
size_t inputLength = ConvertShapeToLength(fShapeC);
274-
out << SP << "std::vector<std::uint8_t> fTensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n";
274+
out << SP << "std::vector<std::uint8_t> tmp_tensor_" << fNC << "(tensor_" << fNC << ", tensor_" << fNC << " + " << inputLength << ");\n";
275275
}
276276
out << SP << "// Broadcasting uninitialized tensor " << fNC << "\n";
277277
//out << SP << "{\n";
278-
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<std::uint8_t>(fTensor_" << fNC << ".data(), " << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY)
279-
<< ", fTensor_" << fNBroadcastedC << ");\n";
278+
out << SP << "TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tmp_tensor_" << fNC << ".data(), " << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY)
279+
<< ", tensor_" << fNBroadcastedC << ");\n";
280280
}
281281
std::string nameA = fNBroadcastedA.empty()? fNA : fNBroadcastedA;
282282
std::string nameB = fNBroadcastedB.empty()? fNB : fNBroadcastedB;
283283
std::string nameC = fNBroadcastedC.empty()? fNC : fNBroadcastedC;
284284
out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n";
285285
// get output tensor applying condition
286-
out << SP << SP << "tensor_" << fNY << "[id] = " << "(fTensor_" << nameC << "[id]) ? tensor_"
286+
out << SP << SP << "tensor_" << fNY << "[id] = " << "tensor_" << nameC << "[id] ? tensor_"
287287
<< nameA << "[id] : tensor_" + nameB + "[id];\n";
288288
out << SP << "}\n";
289289
return out.str();

tmva/sofie/inc/TMVA/SOFIE_common.hxx

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -416,14 +416,12 @@ T* BroadcastConvBias(const T* data, const size_t channel, const std::vector<size
416416
// Broadcast a tensor from shape to targetShape according to numpy broadcasting rules
417417
// See more at https://numpy.org/doc/stable/user/basics.broadcasting.html
418418
// and https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md .
419-
template<typename T, class ConstContT = std::span<const T>, class ContT = std::span<T> >
420-
void BroadcastTensor(ConstContT data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape, ContT broadcastedData) {
419+
template<typename T, class ConstContT = std::span<const T>>
420+
void BroadcastTensor(ConstContT data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape, T *broadcastedData) {
421421
// Size of the shapes (tensor input here have shapes with same sizes, we have already added the needed ones )
422422
size_t size = shape.size();
423423
// Current length of the broadcasted tensor
424424
size_t curLength = data.size();
425-
size_t targetLength = broadcastedData.size();
426-
assert(ConvertShapeToLength(targetShape) == targetLength);
427425
// special case when broadcasting last dimensions (initial shapes must be the same)
428426
if (size > 1 && shape.front() == targetShape.front() && shape.back() == 1) {
429427
size_t bsize = targetShape.back();
@@ -433,16 +431,16 @@ void BroadcastTensor(ConstContT data, const std::vector<size_t>& shape, const st
433431
bsize *= targetShape[k];
434432
}
435433
for (size_t i = 0; i < curLength; i++) {
436-
std::fill(broadcastedData.begin() + i*bsize, broadcastedData.begin() + (i+1)*bsize , data[i]);
434+
std::fill(broadcastedData + i*bsize, broadcastedData + (i+1)*bsize , data[i]);
437435
}
438436
return;
439437
}
440438

441-
std::copy(data.begin(), data.end(), broadcastedData.begin());
439+
std::copy(data.begin(), data.end(), broadcastedData);
442440
// Product of the previous dimensions of targetShape
443441
size_t arrayNum = 1;
444442
// New broadcasted data: is this needed?
445-
std::vector<T> newData(targetLength);
443+
std::vector<T> newData(ConvertShapeToLength(targetShape));
446444

447445
for (size_t idx = 0; idx < size; idx++) {
448446
size_t dim = shape[idx];
@@ -458,8 +456,8 @@ void BroadcastTensor(ConstContT data, const std::vector<size_t>& shape, const st
458456
for (size_t arrayIdx = 0; arrayIdx < arrayNum; arrayIdx++) {
459457
for (size_t targetIdx = 0; targetIdx < targetDim; targetIdx++) {
460458
size_t offset = arrayIdx * arrayLength * targetDim + targetIdx * arrayLength;
461-
std::copy(broadcastedData.begin() + arrayIdx * arrayLength,
462-
broadcastedData.begin() + (arrayIdx + 1) * arrayLength,
459+
std::copy(broadcastedData + arrayIdx * arrayLength,
460+
broadcastedData + (arrayIdx + 1) * arrayLength,
463461
newData.begin() + offset);
464462
}
465463
}
@@ -473,23 +471,20 @@ void BroadcastTensor(ConstContT data, const std::vector<size_t>& shape, const st
473471
// Update current length
474472
curLength = newLength;
475473
// Update broadcasted data
476-
std::copy(newData.begin(), newData.begin() + newLength, broadcastedData.begin());
474+
std::copy(newData.begin(), newData.begin() + newLength, broadcastedData);
477475
}
478476
// Update the number of arrays
479477
arrayNum *= targetDim;
480478
}
481-
//return broadcastedData;
482479
}
483480

484481
// interface where we allocate a new array for broadcasted data
485482
template<typename T>
486483
T* CreateBroadcastTensor(const T* data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape, size_t targetLength) {
487484
// newShape is an array of size equal to dimension along which we are broadcasting the tensor
488485
T* broadcastedData = new T[targetLength];
489-
std::span<T> bData(broadcastedData, broadcastedData+targetLength);
490486
size_t curLength = ConvertShapeToLength(shape);
491-
std::span<const T> inData(data, curLength);
492-
BroadcastTensor<T, std::span<const T>, std::span<T>>(inData, shape, targetShape, bData);
487+
BroadcastTensor<T>({data, curLength}, shape, targetShape, broadcastedData);
493488
return broadcastedData;
494489
}
495490
// Unidirectional broadcasting shape to targetShape// In unidirectional broadcast - only tensor B can have the shape changed not
@@ -502,14 +497,14 @@ T* UnidirectionalBroadcast(const T* data, const std::vector<size_t>& shape, cons
502497
std::vector<size_t> newShape(targetSize, 1);
503498
size_t offset = targetSize - shape.size();
504499
std::copy(shape.begin(), shape.end(), newShape.begin() + offset);
505-
return CreateBroadcastTensor<T>(data, newShape, targetShape, ConvertShapeToLength(targetShape));
500+
return CreateBroadcastTensor(data, newShape, targetShape, ConvertShapeToLength(targetShape));
506501
}
507-
return CreateBroadcastTensor<T>(data, shape, targetShape, ConvertShapeToLength(targetShape));
502+
return CreateBroadcastTensor(data, shape, targetShape, ConvertShapeToLength(targetShape));
508503
}
509504

510505
// Unidirectional broadcasting shape to targetShape using a passed vector to avoid allocations
511506
template<typename T>
512-
void UnidirectionalBroadcast(const T* data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape, std::span<T> broadcastedData) {
507+
void UnidirectionalBroadcast(const T* data, const std::vector<size_t>& shape, const std::vector<size_t>& targetShape, T *broadcastedData) {
513508
size_t curLength = ConvertShapeToLength(shape);
514509
std::span<T> inData(const_cast<T*>(data), curLength);
515510
// Prepend shape with ones
@@ -518,9 +513,9 @@ void UnidirectionalBroadcast(const T* data, const std::vector<size_t>& shape, co
518513
std::vector<size_t> newShape(targetSize, 1);
519514
size_t offset = targetSize - shape.size();
520515
std::copy(shape.begin(), shape.end(), newShape.begin() + offset);
521-
BroadcastTensor<T>(inData, newShape, targetShape, broadcastedData);
516+
BroadcastTensor(inData, newShape, targetShape, broadcastedData);
522517
}
523-
BroadcastTensor<T, std::span<T>>(inData, shape, targetShape, broadcastedData);
518+
BroadcastTensor(inData, shape, targetShape, broadcastedData);
524519
}
525520

526521
/// compute stride of a tensor given its shape (assume layout is row-major)

0 commit comments

Comments
 (0)