diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i old mode 100755 new mode 100644 index 82cb20faaed..5e2ef3e30aa --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_predict_dense_default_batch_impl.i @@ -145,11 +145,11 @@ Status KNNClassificationPredictKernel::compu if (par3 == NULL) return Status(ErrorNullParameterNotSupported); - const Model * const model = static_cast(m); - const auto & kdTreeTable = *(model->impl()->getKDTreeTable()); - const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); - const NumericTable & data = *(model->impl()->getData()); - const NumericTable * labels = nullptr; + const Model * const model = static_cast(m); + const KDTreeTable & kdTreeTable = *(model->impl()->getKDTreeTable()); + const auto rootTreeNodeIndex = model->impl()->getRootNodeIndex(); + const NumericTable & data = *(model->impl()->getData()); + const NumericTable * labels = nullptr; if (resultsToEvaluate != 0) { labels = model->impl()->getLabels().get(); diff --git a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i index 5cc08923966..53b3c8ff9b9 100644 --- a/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i +++ b/cpp/daal/src/algorithms/k_nearest_neighbors/kdtree_knn_classification_train_dense_default_impl.i @@ -61,39 +61,66 @@ using namespace kdtree_knn_classification::internal; template class Queue { + // Default size of the queue. This value is suitable for small allocations + // during construction, but the primary use case involves calling the init(size) + // function. In most cases, init(size) will be called. + static const size_t defaultSize = 4; + public: - Queue() : _data(nullptr) {} + Queue() : _data(nullptr), _first(0), _last(0), _count(0), _capacity(0) {} - ~Queue() - { - services::daal_free(_data); - _data = nullptr; - } + ~Queue() { clear(); } + + Queue(const Queue &) = delete; + Queue & operator=(const Queue &) = delete; bool init(size_t size) { clear(); - _first = _count = 0; - _last = _sizeMinus1 = (_size = size) - 1; - return ((_data = static_cast(service_malloc(size * sizeof(T)))) != nullptr); + if (size == 0) + { + return false; + } + + _data = static_cast(service_malloc(size)); + if (!_data) + { + return false; + } + + _capacity = size; + _first = _last = _count = 0; + return true; } void clear() { - daal_free(_data); - _data = nullptr; + if (_data) + { + daal::services::internal::service_free(_data); + _data = nullptr; + } + _first = _last = _count = _capacity = 0; } - DAAL_FORCEINLINE void push(const T & value) + void reset() { _first = _last = _count = 0; } + + void push(const T & value) { - _data[_last = (_last + 1) & _sizeMinus1] = value; + if (_count >= _capacity) + { + grow(); + } + + _data[_last] = value; + ++_last; ++_count; } - DAAL_FORCEINLINE T pop() + T pop() { - const T value = _data[_first++]; - _first *= (_first != _size); + T value = _data[_first]; + _first = (_first + 1) % _capacity; --_count; return value; } @@ -103,12 +130,30 @@ public: size_t size() const { return _count; } private: + Status grow() + { + int result = 0; + _capacity = (_capacity == 0 ? defaultSize : _capacity * 2); + T * newData = static_cast(service_malloc(_capacity)); + DAAL_CHECK_MALLOC(newData); + + if (_data != nullptr) + { + result = services::internal::daal_memcpy_s(newData, _last * sizeof(T), _data, _last * sizeof(T)); + daal::services::internal::service_free(_data); + _data = nullptr; + } + + _data = newData; + + return (!result) ? Status() : Status(services::ErrorMemoryCopyFailedInternal); + } + T * _data; - size_t _first; - size_t _last; - size_t _count; - size_t _size; - size_t _sizeMinus1; + size_t _first; // Index of the first element + size_t _last; // Index of the next position to insert + size_t _count; // Current number of elements + size_t _capacity; // Maximum capacity of the queue }; struct BuildNode @@ -159,20 +204,15 @@ Status KNNClassificationTrainBatchKernel *>(r->impl()->getIndices().get())->getArray(); Queue q; - BBox * bboxQ = nullptr; - auto oldThreads = services::Environment::getInstance()->getNumberOfThreads(); + BBox * bboxQ = nullptr; DAAL_CHECK_STATUS(status, buildFirstPartOfKDTree(q, bboxQ, *x, *r, indexes, engine)); - // Temporary workaround for threading issues in `buildSecondPartOfKDTree()` - // Fix to be provided in https://github.com/uxlfoundation/oneDAL/pull/2925 - services::Environment::getInstance()->setNumberOfThreads(1); DAAL_CHECK_STATUS(status, buildSecondPartOfKDTree(q, bboxQ, *x, *r, indexes, engine)); - services::Environment::getInstance()->setNumberOfThreads(oldThreads); + DAAL_CHECK_STATUS(status, rearrangePoints(*x, indexes)); if (y) { DAAL_CHECK_STATUS(status, rearrangePoints(*y, indexes)); } - daal_free(bboxQ); bboxQ = nullptr; return status; @@ -189,9 +229,7 @@ Status KNNClassificationTrainBatchKernel BBox; const algorithmFpType base = 2.0; - // The queue size is not impacted by number of threads. - // All operations with the queue are done not in the threader_for primitives. - const size_t queueSize = 2 * Math::sPowx(base, Math::sCeil(Math::sLog(__KDTREE_FIRST_PART_LEAF_NODES_PER_THREAD) / Math::sLog(base))); + const size_t queueSize = 2 * Math::sPowx(base, Math::sCeil(Math::sLog(__KDTREE_FIRST_PART_LEAF_NODES_PER_THREAD) / Math::sLog(base))); const size_t firstPartLeafNodeCount = queueSize / 2; q.init(queueSize); const size_t xColumnCount = x.getNumberOfColumns(); @@ -201,7 +239,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(bboxSize * sizeof(BBox), sizeof(BBox))); + bboxQ = static_cast(service_malloc(bboxSize)); DAAL_CHECK_MALLOC(bboxQ) r.impl()->setLastNodeIndex(0); @@ -226,7 +264,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(subSampleCount * sizeof(algorithmFpType))); + algorithmFpType * subSamples = static_cast(service_malloc(subSampleCount)); DAAL_CHECK_MALLOC(subSamples) while (maxNodeCountForCurrentDepth < firstPartLeafNodeCount) @@ -320,8 +358,8 @@ Status KNNClassificationTrainBatchKernel(x).getBlockOfColumnValues(j, 0, xRowCount, readOnly, columnBD); const algorithmFpType * const dx = columnBD.getBlockPtr(); - - daal::tls bboxTLS([=, &status]() -> BBox * { + SafeStatus safeStat; + daal::tls bboxTLS([&safeStat]() -> BBox * { BBox * const ptr = service_scalable_calloc(1); if (ptr) { @@ -330,51 +368,52 @@ Status KNNClassificationTrainBatchKernel(static_cast(first + rowsPerBlock), xRowCount); + DAAL_CHECK_MALLOC_THR(bboxLocal); + const size_t first = iBlock * rowsPerBlock; + const size_t last = min(static_cast(first + rowsPerBlock), xRowCount); - if (first < last) + if (first < last) + { + BBox b; + size_t i = first; + b.upper = dx[indexes[i]]; + b.lower = dx[indexes[i]]; + PRAGMA_IVDEP + for (++i; i < last; ++i) { - BBox b; - size_t i = first; - b.upper = dx[indexes[i]]; - b.lower = dx[indexes[i]]; - PRAGMA_IVDEP - for (++i; i < last; ++i) - { - if (b.lower > dx[indexes[i]]) - { - b.lower = dx[indexes[i]]; - } - if (b.upper < dx[indexes[i]]) - { - b.upper = dx[indexes[i]]; - } - } - - if (bboxLocal->upper < b.upper) + if (b.lower > dx[indexes[i]]) { - bboxLocal->upper = b.upper; + b.lower = dx[indexes[i]]; } - if (bboxLocal->lower > b.lower) + if (b.upper < dx[indexes[i]]) { - bboxLocal->lower = b.lower; + b.upper = dx[indexes[i]]; } } + + if (bboxLocal->upper < b.upper) + { + bboxLocal->upper = b.upper; + } + if (bboxLocal->lower > b.lower) + { + bboxLocal->lower = b.lower; + } } }); + status = safeStat.detach(); + if (!status) return status; + bboxTLS.reduce([=](BBox * v) -> void { if (v) { @@ -718,8 +757,8 @@ size_t KNNClassificationTrainBatchKernel(service_malloc(idxMultiplier * (blockCount + 1) * sizeof(size_t))); - size_t * rightSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * blockCount * sizeof(size_t))); + size_t * leftSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * (blockCount + 1))); + size_t * rightSegmentStartPerBlock = static_cast(service_malloc(idxMultiplier * blockCount)); if (!leftSegmentStartPerBlock || !rightSegmentStartPerBlock) { @@ -848,10 +887,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(xRowCount * sizeof(algorithmFpType))))); + (rx != wx) ? wx : (buffer ? buffer : (buffer = static_cast(service_malloc(xRowCount)))); if (!awx) { status.add(services::ErrorMemoryAllocationFailed); @@ -936,7 +972,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc(q.size() * sizeof(BuildNode))); + BuildNode * bnQ = static_cast(service_malloc(q.size())); DAAL_CHECK_MALLOC(bnQ) size_t posQ = 0; while (q.size() > 0) @@ -974,7 +1010,7 @@ Status KNNClassificationTrainBatchKernel(service_malloc((maxThreads + 1) * sizeof(*firstNodeIndex))); + size_t * firstNodeIndex = static_cast(service_malloc((maxThreads + 1))); DAAL_CHECK_MALLOC(firstNodeIndex) size_t nodeIndex = lastNodeIndex; for (size_t i = 0; i < maxThreads; ++i) @@ -993,7 +1029,7 @@ Status KNNClassificationTrainBatchKernelbboxes = service_scalable_calloc(ptr->bboxesCapacity * xColumnCount)) != nullptr) && ((ptr->inSortValues = service_scalable_calloc(__KDTREE_INDEX_VALUE_PAIRS_PER_THREAD)) != nullptr) && ((ptr->outSortValues = service_scalable_calloc(__KDTREE_INDEX_VALUE_PAIRS_PER_THREAD)) != nullptr) - && ((ptr->fixupQueue = static_cast(service_malloc(ptr->fixupQueueCapacity * sizeof(size_t)))) != nullptr) + && ((ptr->fixupQueue = static_cast(service_malloc(ptr->fixupQueueCapacity))) != nullptr) && ptr->buildStack.init(stackSize))) { status.add(services::ErrorMemoryAllocationFailed); @@ -1021,7 +1057,8 @@ Status KNNClassificationTrainBatchKernelfixupQueueIndex >= local->fixupQueueCapacity) { const size_t newCapacity = local->fixupQueueCapacity * 2; - size_t * const newQueue = static_cast(service_malloc(newCapacity * sizeof(size_t))); + size_t * const newQueue = static_cast(service_malloc(newCapacity)); DAAL_CHECK_THR(newQueue, services::ErrorMemoryAllocationFailed); result |= daal::services::internal::daal_memcpy_s(newQueue, newCapacity * sizeof(size_t), local->fixupQueue, local->fixupQueueIndex * sizeof(size_t)); @@ -1130,14 +1167,12 @@ Status KNNClassificationTrainBatchKernel( local->extraKDTreeNodesCapacity > 0 ? local->extraKDTreeNodesCapacity * 2 : static_cast(1024), extraIndex + 1); - KDTreeNode * const newNodes = - static_cast(service_malloc(newCapacity * sizeof(KDTreeNode))); + KDTreeNode * const newNodes = static_cast(service_malloc(newCapacity)); DAAL_CHECK_THR(newNodes, services::ErrorMemoryAllocationFailed); - result |= daal::services::internal::daal_memcpy_s(newNodes, newCapacity * sizeof(KDTreeNode), - local->extraKDTreeNodes, - local->extraKDTreeNodesCapacity * sizeof(KDTreeNode)); + result |= daal::services::internal::daal_memcpy_s(newNodes, newCapacity, local->extraKDTreeNodes, + local->extraKDTreeNodesCapacity); KDTreeNode * oldNodes = local->extraKDTreeNodes; local->extraKDTreeNodes = newNodes; local->extraKDTreeNodesCapacity = newCapacity; @@ -1148,8 +1183,8 @@ Status KNNClassificationTrainBatchKernelextraKDTreeNodesCapacity = max(extraIndex + 1, static_cast(1024)); - local->extraKDTreeNodes = static_cast( - service_malloc(local->extraKDTreeNodesCapacity * sizeof(KDTreeNode))); + local->extraKDTreeNodes = + static_cast(service_malloc(local->extraKDTreeNodesCapacity)); DAAL_CHECK_THR(local->extraKDTreeNodes, services::ErrorMemoryAllocationFailed); } @@ -1202,79 +1237,69 @@ Status KNNClassificationTrainBatchKernel Status { - int result = 0; - bool isNeedToReindex = false; - localTLS.reduce([=, &isNeedToReindex](Local * ptr) -> void { - if (ptr && ptr->extraKDTreeNodes) + int result = 0; + + size_t actualNodeCount = lastNodeIndex; + localTLS.reduce([=, &actualNodeCount](Local * ptr) -> void { + if (ptr) { - isNeedToReindex = true; + actualNodeCount += ptr->nodeIndex - firstNodeIndex[ptr->threadIndex]; } }); - if (isNeedToReindex) - { - size_t actualNodeCount = lastNodeIndex; - localTLS.reduce([=, &actualNodeCount](Local * ptr) -> void { - if (ptr) - { - actualNodeCount += ptr->nodeIndex - firstNodeIndex[ptr->threadIndex]; - } - }); + Status s; + KDTreeTablePtr newKDTreeTable(new KDTreeTable(actualNodeCount, s)); + DAAL_CHECK_STATUS_VAR(s); + KDTreeNode * const oldRoot = static_cast(kdTreeTable.getArray()); + KDTreeNode * const newRoot = static_cast(newKDTreeTable->getArray()); - Status s; - KDTreeTablePtr newKDTreeTable(new KDTreeTable(actualNodeCount, s)); - DAAL_CHECK_STATUS_VAR(s); - KDTreeNode * const oldRoot = static_cast(kdTreeTable.getArray()); - KDTreeNode * const newRoot = static_cast(newKDTreeTable->getArray()); + result |= + daal::services::internal::daal_memcpy_s(newRoot, actualNodeCount * sizeof(KDTreeNode), oldRoot, lastNodeIndex * sizeof(KDTreeNode)); - result |= daal::services::internal::daal_memcpy_s(newRoot, actualNodeCount * sizeof(KDTreeNode), oldRoot, - lastNodeIndex * sizeof(KDTreeNode)); - - size_t newNodeIndex = lastNodeIndex; - localTLS.reduce([=, &result, &newNodeIndex](Local * ptr) -> void { - if (ptr) + size_t newNodeIndex = lastNodeIndex; + localTLS.reduce([=, &result, &newNodeIndex](Local * ptr) -> void { + if (ptr) + { + const size_t oldNodeIndex = firstNodeIndex[ptr->threadIndex]; + if (ptr->nodeIndex != oldNodeIndex) { - const size_t oldNodeIndex = firstNodeIndex[ptr->threadIndex]; - if (ptr->nodeIndex != oldNodeIndex) + const size_t extraNodeIndex = firstNodeIndex[ptr->threadIndex + 1]; + if (ptr->nodeIndex > extraNodeIndex) { - const size_t extraNodeIndex = firstNodeIndex[ptr->threadIndex + 1]; - if (ptr->nodeIndex > extraNodeIndex) - { - result |= daal::services::internal::daal_memcpy_s( - &newRoot[newNodeIndex], (actualNodeCount - newNodeIndex) * sizeof(KDTreeNode), &oldRoot[oldNodeIndex], - (extraNodeIndex - oldNodeIndex) * sizeof(KDTreeNode)); - const size_t idx = newNodeIndex + (extraNodeIndex - oldNodeIndex); - result |= daal::services::internal::daal_memcpy_s(&newRoot[idx], (actualNodeCount - idx) * sizeof(KDTreeNode), - ptr->extraKDTreeNodes, - (ptr->nodeIndex - extraNodeIndex) * sizeof(KDTreeNode)); - } - else - { - result |= daal::services::internal::daal_memcpy_s( - &newRoot[newNodeIndex], (actualNodeCount - newNodeIndex) * sizeof(KDTreeNode), &oldRoot[oldNodeIndex], - (ptr->nodeIndex - oldNodeIndex) * sizeof(KDTreeNode)); - } - const long delta = newNodeIndex - oldNodeIndex; - for (size_t i = 0; i < ptr->fixupQueueIndex; ++i) - { - newRoot[ptr->fixupQueue[i]].leftIndex += delta; - newRoot[ptr->fixupQueue[i]].rightIndex += delta; - } - for (size_t i = newNodeIndex, end = newNodeIndex + ptr->nodeIndex - oldNodeIndex; i < end; ++i) + result |= + daal::services::internal::daal_memcpy_s(&newRoot[newNodeIndex], (actualNodeCount - newNodeIndex) * sizeof(KDTreeNode), + &oldRoot[oldNodeIndex], (extraNodeIndex - oldNodeIndex) * sizeof(KDTreeNode)); + const size_t idx = newNodeIndex + (extraNodeIndex - oldNodeIndex); + result |= daal::services::internal::daal_memcpy_s(&newRoot[idx], (actualNodeCount - idx) * sizeof(KDTreeNode), + ptr->extraKDTreeNodes, + (ptr->nodeIndex - extraNodeIndex) * sizeof(KDTreeNode)); + } + else + { + result |= + daal::services::internal::daal_memcpy_s(&newRoot[newNodeIndex], (actualNodeCount - newNodeIndex) * sizeof(KDTreeNode), + &oldRoot[oldNodeIndex], (ptr->nodeIndex - oldNodeIndex) * sizeof(KDTreeNode)); + } + const long delta = newNodeIndex - oldNodeIndex; + for (size_t i = 0; i < ptr->fixupQueueIndex; ++i) + { + newRoot[ptr->fixupQueue[i]].leftIndex += delta; + newRoot[ptr->fixupQueue[i]].rightIndex += delta; + } + for (size_t i = newNodeIndex, end = newNodeIndex + ptr->nodeIndex - oldNodeIndex; i < end; ++i) + { + if (newRoot[i].dimension != __KDTREE_NULLDIMENSION) { - if (newRoot[i].dimension != __KDTREE_NULLDIMENSION) - { - newRoot[i].leftIndex += delta; - newRoot[i].rightIndex += delta; - } + newRoot[i].leftIndex += delta; + newRoot[i].rightIndex += delta; } - newNodeIndex += ptr->nodeIndex - oldNodeIndex; } + newNodeIndex += ptr->nodeIndex - oldNodeIndex; } - }); - r.impl()->setKDTreeTable(newKDTreeTable); - r.impl()->setLastNodeIndex(newNodeIndex); - } + } + }); + r.impl()->setKDTreeTable(newKDTreeTable); + r.impl()->setLastNodeIndex(newNodeIndex); return (!result) ? Status() : Status(ErrorMemoryCopyFailedInternal); }(); @@ -1360,7 +1385,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(service_malloc(sampleCount * sizeof(*samples))); + algorithmFpType * samples = static_cast(service_malloc(sampleCount)); if (!samples) { status = services::ErrorMemoryAllocationFailed; @@ -1385,7 +1410,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(sampleCount, samples); - size_t * hist = static_cast(service_malloc(sampleCount * sizeof(*hist))); + size_t * hist = static_cast(service_malloc(sampleCount)); if (!hist) { status = services::ErrorMemoryAllocationFailed; @@ -1398,7 +1423,7 @@ algorithmFpType KNNClassificationTrainBatchKernel(service_malloc(subSampleCount * sizeof(*subSamples))); + algorithmFpType * subSamples = static_cast(service_malloc(subSampleCount)); if (!subSamples) { status = services::ErrorMemoryAllocationFailed;