Hey guys,
In my application I use a Compute Shader to elaborate data in a fast way. I dispatch a Compute Shader for each instance of my model. So for example, I have 30 instancies, I dispatch a Compute Shader 30 times.
for(int i = 0; i < engineModLoader.instanceNumber; i++)
{
engineRenderer.DispatchCompute(phoenixMesh.totalMeshlets.size(), selectedMeshlet,
engineModLoader.instancesData[i].instancePos);
}
I use the result of the compute shader to fill a Global Index Buffer useful for the drawing of instances. So, all Compute Shaders dispatched have to be termineted before the DrawFrame() call, which renders the instances. Unfortunatle the result returned by the Compute Shader is wrong. I don't know if it is a sync problem or if I miss something else. The DispatchCompute() is the following:
void Renderer::DispatchCompute(int numberOfElements, std::vector<Phoenix::DataToCompute>& selectedMeshlet,
const glm::vec3& instancePos)
{
VkSubmitInfo computeSubmitInfo{};
computeSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
engineTransform.ubo.instancePos = instancePos;
UpdateUniformBuffer(currentComputeFrame);
vkResetFences(engineDevice.logicalDevice, 1, &computeInFlightFences[currentComputeFrame]);
vkResetCommandBuffer(computeCommandBuffers[currentComputeFrame], 0);
RecordComputeBuffer(numberOfElements, computeCommandBuffers[currentComputeFrame]);
computeSubmitInfo.commandBufferCount = 1;
computeSubmitInfo.pCommandBuffers = &computeCommandBuffers[currentComputeFrame];
if (vkQueueSubmit(engineDevice.computeQueue, 1, &computeSubmitInfo, computeInFlightFences[currentComputeFrame])
!= VK_SUCCESS)
{
throw std::runtime_error("failed to submit compute command buffer!");
}
vkWaitForFences(engineDevice.logicalDevice, 1, &computeInFlightFences[currentComputeFrame], VK_TRUE, UINT64_MAX);
VkDeviceSize bufferSize = sizeof(Phoenix::DataToCompute) * numberOfElements;
VkBuffer stagingBuffer;
VkDeviceMemory stagingBufferMemory;
CreateBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
stagingBuffer, stagingBufferMemory);
CopyBuffer(SSBOBuffers[currentComputeFrame], stagingBuffer, bufferSize);
void* bufferData = nullptr;
vkMapMemory(engineDevice.logicalDevice, stagingBufferMemory, 0, bufferSize, 0, &bufferData);
memcpy(selectedMeshlet.data(), bufferData, bufferSize);
vkUnmapMemory(engineDevice.logicalDevice, stagingBufferMemory);
currentComputeFrame = (currentComputeFrame + 1) % MAX_FRAMES_IN_FLIGHT;
vkDestroyBuffer(engineDevice.logicalDevice, stagingBuffer, nullptr);
vkFreeMemory(engineDevice.logicalDevice, stagingBufferMemory, nullptr);
}
void Renderer::RecordComputeBuffer(int numberOfElements, VkCommandBuffer commandBuffer)
{
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
if (vkBeginCommandBuffer(commandBuffer, &beginInfo) != VK_SUCCESS)
{
throw std::runtime_error("failed to begin recording command buffer!");
}
VkDeviceSize ssboSize = sizeof(Phoenix::DataToCompute) * numberOfElements;
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, enginePipeline.computePipeline);
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, enginePipeline.computePipelineLayout, 0, 1,
&descriptorSets[currentComputeFrame], 0, 0);
vkCmdDispatch(commandBuffer, numberOfElements / 32, 1, 1);
if (vkEndCommandBuffer(commandBuffer) != VK_SUCCESS)
{
throw std::runtime_error("failed to record command buffer!");
}
}
As you can see I use vkWaitForFences to wait the termination of command. The Compute Shader is the following:
#version 450
struct DataToCompute
{
int meshletID;
float error;
float parentError;
vec3 boundCenter;
vec3 parentBoundCenter;
float errorThreshold;
bool selected;
int width;
float hFov;
int lod;
};
layout (binding = 0) uniform ParameterUBO {
mat4 model;
mat4 view;
mat4 proj;
vec3 instancePos;
} ubo;
layout(std140, binding = 3) buffer MeshletDataSSBO {
DataToCompute dataIn[ ];
};
layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
const int MAX_LOD_NUMBER = 5;
float ComputeScreenSpaceError(vec3 centerBound, float groupError, int width,
float hFov, vec3 instancePos, mat4 modelView)
{
centerBound += instancePos;
vec4 viewCenter = vec4(centerBound, 1.0f);
//I transform the center in view-space
viewCenter = modelView * viewCenter;
centerBound.x = viewCenter.x;
centerBound.y = viewCenter.y;
centerBound.z = viewCenter.z;
float radius = length(vec3(modelView * vec4(groupError, 0, 0, 0 )));
//https://stackoverflow.com/questions/21648630/radius-of-projected-sphere-in-screen-space
const float cotHalfFov = 1.0f / tan(hFov / 2.0f);
const float d2 = dot(centerBound, centerBound);
const float r = radius;
const float div = sqrt(d2 - r*r);
float screenSpaceError = (width / 2.0f * cotHalfFov * r) / div;
return screenSpaceError;
}
void main()
{
uint index = gl_GlobalInvocationID.x;
if(index >= 325)
return;
mat4 modelView = ubo.view * ubo.model;
float currentError = ComputeScreenSpaceError(dataIn[index].boundCenter, dataIn[index].error, dataIn[index].width,
dataIn[index].hFov, ubo.instancePos, modelView);
if(dataIn[index].lod >= MAX_LOD_NUMBER - 1)
{
if(currentError <= dataIn[index].errorThreshold)
{
dataIn[index].selected = true;
}
}
else
{
float parentError = ComputeScreenSpaceError(dataIn[index].parentBoundCenter, dataIn[index].parentError,
dataIn[index].width, dataIn[index].hFov, ubo.instancePos, modelView);
if(currentError <= dataIn[index].errorThreshold && parentError > dataIn[index].errorThreshold)
{
dataIn[index].selected = true;
}
}
}
Where I'm going wrong ?