Description
In my opinion, it would be better to have a test which covers the topic of advanced data uploading in detail. The current sample code from the docs still has the following issues:
- It's commented code, so it's not really run (compiler doesn't even check if syntax is valid for example)
- The example from the docs only covers the use case of a uniform buffer
I think programmers could learn a lot from a test that covers most common cases (vertex, index, and uniform buffer maybe?), especially when it comes to correct barrier placement. The benefit of such a test would be that we can run it with synchronization validation layers to ensure the barriers are correct. This would give new programmers a good, safe code to use as reference.
I propose to add the following test in Tests.cpp
:
static void TestAdvancedDataUploading() {
wprintf(L"Testing advanced data uploading\n");
auto create_buffer = [](VkDeviceSize bufferSize, VkBufferUsageFlags bufferUsage, VmaAllocationCreateFlags allocationFlags,
VkBuffer& buffer /*out*/, VmaAllocation& alloc /*out*/, VmaAllocationInfo& allocInfo /*out*/, void* myData,
std::size_t myDataSize) {
TEST(myData != nullptr);
TEST(myDataSize != 0);
VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
bufCreateInfo.size = bufferSize;
bufCreateInfo.usage = bufferUsage;
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
allocCreateInfo.flags = allocationFlags;
VkResult result = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buffer, &alloc, &allocInfo);
TEST(result == VK_SUCCESS);
VkMemoryPropertyFlags memPropFlags;
vmaGetAllocationMemoryProperties(g_hAllocator, alloc, &memPropFlags);
BeginSingleTimeCommands();
if (memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
// Allocation ended up in a mappable memory and is already mapped - write to it directly.
memcpy(allocInfo.pMappedData, myData, myDataSize);
result = vmaFlushAllocation(g_hAllocator, alloc, 0, VK_WHOLE_SIZE);
TEST(result == VK_SUCCESS);
VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT;
bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufMemBarrier.buffer = buffer;
bufMemBarrier.offset = 0;
bufMemBarrier.size = VK_WHOLE_SIZE;
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr);
EndSingleTimeCommands();
}
else {
// Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required.
VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
stagingBufCreateInfo.size = bufferSize;
stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VmaAllocationCreateInfo stagingAllocCreateInfo = {};
stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
VMA_ALLOCATION_CREATE_MAPPED_BIT;
VkBuffer stagingBuf;
VmaAllocation stagingAlloc;
VmaAllocationInfo stagingAllocInfo;
result = vmaCreateBuffer(g_hAllocator, &stagingBufCreateInfo, &stagingAllocCreateInfo,
&stagingBuf, &stagingAlloc, &stagingAllocInfo);
TEST(result == VK_SUCCESS);
memcpy(stagingAllocInfo.pMappedData, myData, myDataSize);
result = vmaFlushAllocation(g_hAllocator, stagingAlloc, 0, VK_WHOLE_SIZE);
TEST(result == VK_SUCCESS);
VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufMemBarrier.buffer = stagingBuf;
bufMemBarrier.offset = 0;
bufMemBarrier.size = VK_WHOLE_SIZE;
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr);
VkBufferCopy bufCopy = {
0, // srcOffset
0, // dstOffset,
myDataSize, // size
};
vkCmdCopyBuffer(g_hTemporaryCommandBuffer, stagingBuf, buffer, 1, &bufCopy);
VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer
bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufMemBarrier2.buffer = buffer;
bufMemBarrier2.offset = 0;
bufMemBarrier2.size = VK_WHOLE_SIZE;
vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr);
EndSingleTimeCommands();
vmaDestroyBuffer(g_hAllocator, stagingBuf, stagingAlloc);
}
};
VkDeviceSize buffer_size = 65536;
std::vector<std::uint8_t> myData(buffer_size);
// Fill with random data
for (std::size_t index = 0; index < buffer_size; index++) {
myData[index] = static_cast<uint32_t>(rand());
}
// Create a uniform buffer
VkBuffer uniformBuffer = VK_NULL_HANDLE;
VmaAllocation uniformBufferAlloc{};
VmaAllocationInfo uniformBufferAllocInfo{};
create_buffer(buffer_size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
VMA_ALLOCATION_CREATE_MAPPED_BIT, uniformBuffer, uniformBufferAlloc, uniformBufferAllocInfo, myData.data(), buffer_size);
// Create a vertex buffer
VkBuffer vertexBuffer = VK_NULL_HANDLE;
VmaAllocation vertexBufferAlloc{};
VmaAllocationInfo vertexBufferAllocInfo{};
create_buffer(buffer_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 0, vertexBuffer,
vertexBufferAlloc, vertexBufferAllocInfo, myData.data(), buffer_size);
// Index buffer
VkBuffer indexBuffer = VK_NULL_HANDLE;
VmaAllocation indexBufferAlloc{};
VmaAllocationInfo indexBufferAllocInfo{};
create_buffer(buffer_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0, indexBuffer,
indexBufferAlloc, indexBufferAllocInfo, myData.data(), buffer_size);
vmaDestroyBuffer(g_hAllocator, uniformBuffer, uniformBufferAlloc);
vmaDestroyBuffer(g_hAllocator, vertexBuffer, vertexBufferAlloc);
vmaDestroyBuffer(g_hAllocator, indexBuffer, indexBufferAlloc);
}
This code works on NVIDIA RTX 3090, AMD Ryzen™ 9 7950X, and Intel Arc A770 without validation warnings or errors for TestAdvancedDataUploading
.