diff --git a/source/renderer/backend/Barrier.h b/source/renderer/backend/Barrier.h new file mode 100644 index 0000000000..a946ba1ecc --- /dev/null +++ b/source/renderer/backend/Barrier.h @@ -0,0 +1,108 @@ +/* Copyright (C) 2024 Wildfire Games. + * This file is part of 0 A.D. + * + * 0 A.D. is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * 0 A.D. is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with 0 A.D. If not, see . + */ + +#ifndef INCLUDED_RENDERER_BACKEND_BARRIER +#define INCLUDED_RENDERER_BACKEND_BARRIER + +namespace Renderer +{ + +namespace Backend +{ + +// PipelineStageFlags and AccessFlags are mostly taken from the Vulkan +// specification. + +namespace PipelineStage +{ +static constexpr uint32_t DRAW_INDIRECT{ + 1u << 0u}; +static constexpr uint32_t VERTEX_INPUT{ + 1u << 1u}; +static constexpr uint32_t VERTEX_SHADER{ + 1u << 2u}; +static constexpr uint32_t FRAGMENT_SHADER{ + 1u << 3u}; +static constexpr uint32_t EARLY_FRAGMENT_TESTS{ + 1u << 4u}; +static constexpr uint32_t LATE_FRAGMENT_TESTS{ + 1u << 5u}; +static constexpr uint32_t COLOR_ATTACHMENT_OUTPUT{ + 1u << 6u}; +static constexpr uint32_t COMPUTE_SHADER{ + 1u << 7u}; +static constexpr uint32_t TRANSFER{ + 1u << 8u}; +static constexpr uint32_t HOST{ + 1u << 9u}; +static constexpr uint32_t ACCELERATION_STRUCTURE_BUILD{ + 1u << 10u}; +static constexpr uint32_t RAY_TRACING_SHADER{ + 1u << 11u}; +static constexpr uint32_t TASK_SHADER{ + 1u << 12u}; +static constexpr uint32_t MESH_SHADER{ + 1u << 13u}; +} // namespace PipelineStage + +namespace Access +{ +static constexpr uint32_t INDIRECT_COMMAND_READ{ + 1u << 0u}; +static constexpr uint32_t INDEX_READ{ + 1u << 1u}; +static constexpr uint32_t VERTEX_ATTRIBUTE_READ{ + 1u << 2u}; +static constexpr uint32_t UNIFORM_READ{ + 1u << 3u}; +static constexpr uint32_t INPUT_ATTACHMENT_READ{ + 1u << 4u}; +static constexpr uint32_t SHADER_READ{ + 1u << 5u}; +static constexpr uint32_t SHADER_WRITE{ + 1u << 6u}; +static constexpr uint32_t COLOR_ATTACHMENT_READ{ + 1u << 7u}; +static constexpr uint32_t COLOR_ATTACHMENT_WRITE{ + 1u << 8u}; +static constexpr uint32_t DEPTH_STENCIL_ATTACHMENT_READ{ + 1u << 9u}; +static constexpr uint32_t DEPTH_STENCIL_ATTACHMENT_WRITE{ + 1u << 10u}; +static constexpr uint32_t TRANSFER_READ{ + 1u << 11u}; +static constexpr uint32_t TRANSFER_WRITE{ + 1u << 12u}; +static constexpr uint32_t HOST_READ{ + 1u << 13u}; +static constexpr uint32_t HOST_WRITE{ + 1u << 14u}; +static constexpr uint32_t MEMORY_READ{ + 1u << 15u}; +static constexpr uint32_t MEMORY_WRITE{ + 1u << 16u}; +static constexpr uint32_t ACCELERATION_STRUCTURE_READ{ + 1u << 17u}; +static constexpr uint32_t ACCELERATION_STRUCTURE_WRITE{ + 1u << 18u}; +} // namespace Access + +} // namespace Backend + +} // namespace Renderer + +#endif // INCLUDED_RENDERER_BACKEND_BARRIER diff --git a/source/renderer/backend/Format.h b/source/renderer/backend/Format.h index 570b2eab1a..8a797e700c 100644 --- a/source/renderer/backend/Format.h +++ b/source/renderer/backend/Format.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 Wildfire Games. +/* Copyright (C) 2024 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -43,9 +43,13 @@ enum class Format R16_UNORM, R16_UINT, R16_SINT, + R16_SFLOAT, R16G16_UNORM, R16G16_UINT, R16G16_SINT, + R16G16_SFLOAT, + R16G16B16_SFLOAT, + R16G16B16A16_SFLOAT, R32_SFLOAT, R32G32_SFLOAT, diff --git a/source/renderer/backend/IBuffer.h b/source/renderer/backend/IBuffer.h index 8e7d29bd3b..e873cd75b6 100644 --- a/source/renderer/backend/IBuffer.h +++ b/source/renderer/backend/IBuffer.h @@ -46,6 +46,7 @@ public: static constexpr uint32_t DYNAMIC = 1u << 0u; static constexpr uint32_t TRANSFER_SRC = 1u << 1u; static constexpr uint32_t TRANSFER_DST = 1u << 2u; + static constexpr uint32_t STORAGE = 1u << 3u; }; virtual Type GetType() const = 0; diff --git a/source/renderer/backend/IDevice.h b/source/renderer/backend/IDevice.h index e50f523eaa..e6a4a796c0 100644 --- a/source/renderer/backend/IDevice.h +++ b/source/renderer/backend/IDevice.h @@ -61,6 +61,7 @@ public: float maxAnisotropy; uint32_t maxTextureSize; bool instancing; + bool storage; }; virtual ~IDevice() {} diff --git a/source/renderer/backend/IDeviceCommandContext.h b/source/renderer/backend/IDeviceCommandContext.h index 2dbdf29f0b..57f606b8af 100644 --- a/source/renderer/backend/IDeviceCommandContext.h +++ b/source/renderer/backend/IDeviceCommandContext.h @@ -19,6 +19,7 @@ #define INCLUDED_RENDERER_BACKEND_IDEVICECOMMANDCONTEXT #include "ps/containers/Span.h" +#include "renderer/backend/Barrier.h" #include "renderer/backend/Format.h" #include "renderer/backend/IDeviceObject.h" #include "renderer/backend/PipelineState.h" @@ -184,6 +185,15 @@ public: const uint32_t groupCountY, const uint32_t groupCountZ) = 0; + /** + * Inserts a memory barrier which guarantees that all memory accesses + * matched by `srcAccessMask` in src are completed before all memory accesses + * described by `dstAccessMask` in dst. + */ + virtual void InsertMemoryBarrier( + const uint32_t srcStageMask, const uint32_t dstStageMask, + const uint32_t srcAccessMask, const uint32_t dstAccessMask) = 0; + /** * Sets a read-only texture to the binding slot. */ @@ -193,6 +203,7 @@ public: * Sets a read & write resource to the binding slot. */ virtual void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) = 0; + virtual void SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) = 0; virtual void SetUniform( const int32_t bindingSlot, diff --git a/source/renderer/backend/dummy/DeviceCommandContext.cpp b/source/renderer/backend/dummy/DeviceCommandContext.cpp index 0ffa1869fa..8caccf7b15 100644 --- a/source/renderer/backend/dummy/DeviceCommandContext.cpp +++ b/source/renderer/backend/dummy/DeviceCommandContext.cpp @@ -202,6 +202,11 @@ void CDeviceCommandContext::Dispatch(const uint32_t, const uint32_t, const uint3 { } +void CDeviceCommandContext::InsertMemoryBarrier( + const uint32_t, const uint32_t, const uint32_t, const uint32_t) +{ +} + void CDeviceCommandContext::SetTexture(const int32_t, ITexture*) { } @@ -210,6 +215,10 @@ void CDeviceCommandContext::SetStorageTexture(const int32_t, ITexture*) { } +void CDeviceCommandContext::SetStorageBuffer(const int32_t, IBuffer*) +{ +} + void CDeviceCommandContext::SetUniform(const int32_t, const float) { } diff --git a/source/renderer/backend/dummy/DeviceCommandContext.h b/source/renderer/backend/dummy/DeviceCommandContext.h index 8e5fff595c..82ba9c6a42 100644 --- a/source/renderer/backend/dummy/DeviceCommandContext.h +++ b/source/renderer/backend/dummy/DeviceCommandContext.h @@ -120,9 +120,14 @@ public: const uint32_t groupCountY, const uint32_t groupCountZ) override; + void InsertMemoryBarrier( + const uint32_t srcStageMask, const uint32_t dstStageMask, + const uint32_t srcAccessMask, const uint32_t dstAccessMask) override; + void SetTexture(const int32_t bindingSlot, ITexture* texture) override; void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) override; + void SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) override; void SetUniform( const int32_t bindingSlot, diff --git a/source/renderer/backend/gl/Buffer.cpp b/source/renderer/backend/gl/Buffer.cpp index b4fab48897..99089ac64e 100644 --- a/source/renderer/backend/gl/Buffer.cpp +++ b/source/renderer/backend/gl/Buffer.cpp @@ -35,19 +35,42 @@ namespace Backend namespace GL { +namespace +{ + +GLenum GetTargetFromBufferType(const IBuffer::Type type) +{ + GLenum target{GL_ARRAY_BUFFER}; + switch (type) + { + case IBuffer::Type::INDEX: + target = GL_ELEMENT_ARRAY_BUFFER; + break; + case IBuffer::Type::UNIFORM: + target = GL_UNIFORM_BUFFER; + break; + default: + target = GL_ARRAY_BUFFER; + break; + } + return target; +} + +} // anonymous namespace + // static std::unique_ptr CBuffer::Create( CDevice* device, const char* name, const Type type, const uint32_t size, const uint32_t usage) { - ENSURE(type == Type::VERTEX || type == Type::INDEX); + ENSURE(type == Type::VERTEX || type == Type::INDEX || type == Type::UNIFORM); std::unique_ptr buffer(new CBuffer()); buffer->m_Device = device; buffer->m_Type = type; buffer->m_Size = size; buffer->m_Usage = usage; glGenBuffersARB(1, &buffer->m_Handle); - const GLenum target = type == Type::INDEX ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER; + const GLenum target{GetTargetFromBufferType(type)}; glBindBufferARB(target, buffer->m_Handle); glBufferDataARB(target, size, nullptr, (usage & IBuffer::Usage::DYNAMIC) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW); #if !CONFIG2_GLES diff --git a/source/renderer/backend/gl/Device.cpp b/source/renderer/backend/gl/Device.cpp index 45d8e0e363..47bd6ba312 100644 --- a/source/renderer/backend/gl/Device.cpp +++ b/source/renderer/backend/gl/Device.cpp @@ -441,12 +441,23 @@ std::unique_ptr CDevice::Create(SDL_Window* window, const bool arb) #if CONFIG2_GLES capabilities.instancing = false; + capabilities.storage = false; #else capabilities.instancing = !device->m_ARB && (ogl_HaveVersion(3, 3) || (ogl_HaveExtension("GL_ARB_draw_instanced") && ogl_HaveExtension("GL_ARB_instanced_arrays"))); + GLint maxStorageBufferSize{0}; + if (ogl_HaveExtension("GL_ARB_shader_storage_buffer_object")) + glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &maxStorageBufferSize); + capabilities.storage = + capabilities.computeShaders && maxStorageBufferSize > 0 + && static_cast(maxStorageBufferSize) >= 128 * MiB + && ogl_HaveExtension("GL_ARB_uniform_buffer_object") + && ogl_HaveExtension("GL_ARB_shader_storage_buffer_object") + && ogl_HaveExtension("GL_ARB_half_float_vertex") + && ogl_HaveExtension("GL_ARB_program_interface_query"); #endif return device; @@ -762,6 +773,18 @@ void CDevice::Report(const ScriptRequest& rq, JS::HandleValue settings) INTEGER(MAX_VERTEX_VARYING_COMPONENTS_ARB); } + if (ogl_HaveExtension("GL_ARB_uniform_buffer_object")) + { + INTEGER(MAX_UNIFORM_BLOCK_SIZE); + INTEGER(MAX_UNIFORM_BUFFER_BINDINGS); + } + + if (ogl_HaveExtension("GL_ARB_shader_storage_buffer_object")) + { + INTEGER(MAX_SHADER_STORAGE_BLOCK_SIZE); + INTEGER(MAX_SHADER_STORAGE_BUFFER_BINDINGS); + } + #else // CONFIG2_GLES // Core OpenGL ES 2.0: diff --git a/source/renderer/backend/gl/DeviceCommandContext.cpp b/source/renderer/backend/gl/DeviceCommandContext.cpp index cb17233641..8b1567d6e4 100644 --- a/source/renderer/backend/gl/DeviceCommandContext.cpp +++ b/source/renderer/backend/gl/DeviceCommandContext.cpp @@ -103,8 +103,10 @@ GLenum BufferTypeToGLTarget(const CBuffer::Type type) case CBuffer::Type::INDEX: target = GL_ELEMENT_ARRAY_BUFFER; break; - case CBuffer::Type::UPLOAD: case CBuffer::Type::UNIFORM: + target = GL_UNIFORM_BUFFER; + break; + case CBuffer::Type::UPLOAD: debug_warn("Unsupported buffer type."); break; }; @@ -451,7 +453,9 @@ void CDeviceCommandContext::UploadBufferRegion( ENSURE(dataOffset + dataSize <= buffer->GetSize()); const GLenum target = BufferTypeToGLTarget(buffer->GetType()); ScopedBufferBind scopedBufferBind(this, buffer->As()); - if (buffer->IsDynamic()) + // Uniform buffers is a relatively new feature so we don't need to use a + // dynamic upload. + if (buffer->IsDynamic() && buffer->GetType() != IBuffer::Type::UNIFORM) { UploadDynamicBufferRegionImpl(target, buffer->GetSize(), dataOffset, dataSize, [data, dataSize](u8* mappedData) { @@ -1257,12 +1261,16 @@ void CDeviceCommandContext::Dispatch( const uint32_t groupCountY, const uint32_t groupCountZ) { -#if !CONFIG2_GLES +#if !CONFIG2_GLES ENSURE(m_InsideComputePass); glDispatchCompute(groupCountX, groupCountY, groupCountZ); - // TODO: we might want to do binding tracking to avoid redundant barriers. - glMemoryBarrier( - GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT); + // Storage buffers should be managed explicitly by InsertMemoryBarrier. + if (m_ShaderProgram->HasImageUniforms()) + { + // TODO: we might want to do binding tracking to avoid redundant barriers. + glMemoryBarrier( + GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT ); + } #else UNUSED2(groupCountX); UNUSED2(groupCountY); @@ -1270,6 +1278,35 @@ void CDeviceCommandContext::Dispatch( #endif } +void CDeviceCommandContext::InsertMemoryBarrier( + const uint32_t UNUSED(srcStageMask), const uint32_t dstStageMask, + const uint32_t srcAccessMask, const uint32_t dstAccessMask) +{ +#if !CONFIG2_GLES + ENSURE(!m_InsideFramebufferPass); + GLbitfield barriers{0}; + if (srcAccessMask & Access::SHADER_WRITE) + { + if (dstStageMask & PipelineStage::VERTEX_INPUT) + { + if (dstAccessMask & Access::VERTEX_ATTRIBUTE_READ) + barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT; + if (dstAccessMask & Access::INDEX_READ) + barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT; + } + if (dstStageMask & (PipelineStage::VERTEX_SHADER | PipelineStage::FRAGMENT_SHADER | PipelineStage::COMPUTE_SHADER)) + { + if (dstAccessMask & (Access::SHADER_READ | Access::SHADER_WRITE)) + barriers |= GL_SHADER_STORAGE_BARRIER_BIT; + if (dstAccessMask & Access::UNIFORM_READ) + barriers |= GL_UNIFORM_BARRIER_BIT; + } + } + if (barriers) + glMemoryBarrier(barriers); +#endif +} + void CDeviceCommandContext::SetTexture(const int32_t bindingSlot, ITexture* texture) { ENSURE(m_ShaderProgram); @@ -1332,6 +1369,21 @@ void CDeviceCommandContext::SetStorageTexture(const int32_t bindingSlot, ITextur #endif } +void CDeviceCommandContext::SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) +{ +#if !CONFIG2_GLES + if (bindingSlot < 0) + return; + ENSURE(m_ShaderProgram); + ENSURE(buffer); + ENSURE(buffer->GetUsage() & Renderer::Backend::IBuffer::Usage::STORAGE); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, m_ShaderProgram->GetStorageBuffer(bindingSlot), buffer->As()->GetHandle()); +#else + UNUSED2(bindingSlot); + UNUSED2(buffer); +#endif +} + void CDeviceCommandContext::SetUniform( const int32_t bindingSlot, const float value) diff --git a/source/renderer/backend/gl/DeviceCommandContext.h b/source/renderer/backend/gl/DeviceCommandContext.h index 02a2954d01..5f1a251007 100644 --- a/source/renderer/backend/gl/DeviceCommandContext.h +++ b/source/renderer/backend/gl/DeviceCommandContext.h @@ -129,9 +129,14 @@ public: const uint32_t groupCountY, const uint32_t groupCountZ) override; + void InsertMemoryBarrier( + const uint32_t srcStageMask, const uint32_t dstStageMask, + const uint32_t srcAccessMask, const uint32_t dstAccessMask) override; + void SetTexture(const int32_t bindingSlot, ITexture* texture) override; void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) override; + void SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) override; void SetUniform( const int32_t bindingSlot, diff --git a/source/renderer/backend/gl/ShaderProgram.cpp b/source/renderer/backend/gl/ShaderProgram.cpp index 6c5471672f..ea78921630 100644 --- a/source/renderer/backend/gl/ShaderProgram.cpp +++ b/source/renderer/backend/gl/ShaderProgram.cpp @@ -28,6 +28,7 @@ #include "ps/Filesystem.h" #include "ps/Profile.h" #include "ps/XML/Xeromyces.h" +#include "renderer/backend/gl/Buffer.h" #include "renderer/backend/gl/Device.h" #include "renderer/backend/gl/DeviceCommandContext.h" @@ -80,17 +81,22 @@ GLint GLSizeFromFormat(const Format format) { GLint size = 1; if (format == Renderer::Backend::Format::R32_SFLOAT || - format == Renderer::Backend::Format::R16_SINT) + format == Renderer::Backend::Format::R16_SINT || + format == Renderer::Backend::Format::R16_SFLOAT) size = 1; else if ( format == Renderer::Backend::Format::R8G8_UNORM || format == Renderer::Backend::Format::R8G8_UINT || format == Renderer::Backend::Format::R16G16_SINT || + format == Renderer::Backend::Format::R16G16_SFLOAT || format == Renderer::Backend::Format::R32G32_SFLOAT) size = 2; - else if (format == Renderer::Backend::Format::R32G32B32_SFLOAT) + else if ( + format == Renderer::Backend::Format::R16G16B16_SFLOAT || + format == Renderer::Backend::Format::R32G32B32_SFLOAT) size = 3; else if ( + format == Renderer::Backend::Format::R16G16B16A16_SFLOAT || format == Renderer::Backend::Format::R32G32B32A32_SFLOAT || format == Renderer::Backend::Format::R8G8B8A8_UNORM || format == Renderer::Backend::Format::R8G8B8A8_UINT) @@ -108,6 +114,13 @@ GLenum GLTypeFromFormat(const Format format) format == Renderer::Backend::Format::R32G32B32_SFLOAT || format == Renderer::Backend::Format::R32G32B32A32_SFLOAT) type = GL_FLOAT; +#if !CONFIG2_GLES + else if (format == Renderer::Backend::Format::R16_SFLOAT || + format == Renderer::Backend::Format::R16G16_SFLOAT || + format == Renderer::Backend::Format::R16G16B16_SFLOAT || + format == Renderer::Backend::Format::R16G16B16A16_SFLOAT) + type = GL_HALF_FLOAT; +#endif else if ( format == Renderer::Backend::Format::R16_SINT || format == Renderer::Backend::Format::R16G16_SINT) @@ -444,6 +457,12 @@ public: return textureUnit; } + GLuint GetStorageBuffer(const int32_t UNUSED(bindingSlot)) override + { + debug_warn("ARB shaders don't support storage buffers."); + return 0; + } + void SetUniform( const int32_t bindingSlot, const float value) override @@ -789,6 +808,59 @@ public: std::vector occupiedUnits; +#if !CONFIG2_GLES + const bool isStorageSupported{m_Device->GetCapabilities().storage}; + if (isStorageSupported) + { + constexpr GLint maxBlockNameLength{128}; + char name[maxBlockNameLength]; + + GLint maxUniformBlockNameLength{0}; + glGetProgramInterfaceiv(m_Program, GL_UNIFORM_BLOCK, GL_MAX_NAME_LENGTH, &maxUniformBlockNameLength); + ogl_WarnIfError(); + + GLint numberOfActiveUniformBlocks{0}; + glGetProgramInterfaceiv(m_Program, GL_UNIFORM_BLOCK, GL_ACTIVE_RESOURCES, &numberOfActiveUniformBlocks); + ogl_WarnIfError(); + // Currently we support the only one uniform buffer per shader. + if (numberOfActiveUniformBlocks == 1) + { + GLsizei length{0}; + glGetProgramResourceName(m_Program, GL_UNIFORM_BLOCK, 0, maxBlockNameLength, &length, name); + + const GLuint location{glGetProgramResourceIndex(m_Program, GL_UNIFORM_BLOCK, name)}; + glUniformBlockBinding(m_Program, location, location); + + m_UniformBufferLocation = location; + } + + GLint maxStorageNameLength{0}; + glGetProgramInterfaceiv(m_Program, GL_SHADER_STORAGE_BLOCK, GL_MAX_NAME_LENGTH, &maxStorageNameLength); + ogl_WarnIfError(); + ENSURE(maxStorageNameLength <= maxBlockNameLength); + GLint numberOfActiveStorages{0}; + glGetProgramInterfaceiv(m_Program, GL_SHADER_STORAGE_BLOCK, GL_ACTIVE_RESOURCES, &numberOfActiveStorages); + ogl_WarnIfError(); + for (GLint index{0}; index < numberOfActiveStorages; ++index) + { + GLsizei length{0}; + glGetProgramResourceName(m_Program, GL_SHADER_STORAGE_BLOCK, index, maxBlockNameLength, &length, name); + + const GLuint location{glGetProgramResourceIndex(m_Program, GL_SHADER_STORAGE_BLOCK, name)}; + glShaderStorageBlockBinding(m_Program, location, location); + + const CStrIntern nameIntern(name); + + m_BindingSlotsMapping[nameIntern] = m_BindingSlots.size(); + BindingSlot bindingSlot{}; + bindingSlot.name = nameIntern; + bindingSlot.location = location; + bindingSlot.isStorageBuffer = true; + m_BindingSlots.emplace_back(std::move(bindingSlot)); + } + } +#endif + GLint numUniforms = 0; glGetProgramiv(m_Program, GL_ACTIVE_UNIFORMS, &numUniforms); ogl_WarnIfError(); @@ -824,6 +896,7 @@ public: bindingSlot.size = size; bindingSlot.type = type; bindingSlot.isTexture = false; + bindingSlot.isStorageBuffer = false; #define CASE(TYPE, ELEMENT_TYPE, ELEMENT_COUNT) \ case GL_ ## TYPE: \ @@ -871,6 +944,7 @@ public: case GL_IMAGE_2D: bindingSlot.elementType = GL_IMAGE_2D; bindingSlot.isTexture = true; + m_HasImageUniforms = true; break; #endif default: @@ -895,6 +969,31 @@ public: LOGERROR("CShaderProgramGLSL::Link: unsupported uniform type: 0x%04x", static_cast(type)); } +#if !CONFIG2_GLES + if (isStorageSupported) + { + GLuint uniformIndex{0}; + const GLchar* nameToQuery{name}; + glGetUniformIndices(m_Program, 1, &nameToQuery, &uniformIndex); + ogl_WarnIfError(); + + GLint uniformOffset{0}; + glGetActiveUniformsiv(m_Program, 1, &uniformIndex, GL_UNIFORM_OFFSET, &uniformOffset); + ogl_WarnIfError(); + + // According to the OpenGL spec: + // https://registry.khronos.org/OpenGL-Refpages/es3/html/glGetActiveUniformsiv.xhtml + // For uniforms in the default uniform block, -1 will be returned. + if (uniformOffset >= 0) + { + const uint32_t sizeInBytes{static_cast(bindingSlot.size * bindingSlot.elementCount * sizeof(float))}; + m_UniformBufferSize = std::max(m_UniformBufferSize, uniformOffset + sizeInBytes); + bindingSlot.location = -1; + bindingSlot.offset = uniformOffset; + } + } +#endif + m_BindingSlots.emplace_back(std::move(bindingSlot)); } @@ -918,6 +1017,13 @@ public: ogl_WarnIfError(); } + if (m_UniformBufferSize > 0 && m_UniformBufferLocation != -1) + { + m_UniformBuffer = m_Device->CreateBuffer( + "ShaderProgramUniformBuffer", IBuffer::Type::UNIFORM, m_UniformBufferSize, + IBuffer::Usage::DYNAMIC | IBuffer::Usage::TRANSFER_DST); + } + // TODO: verify that we're not using more samplers than is supported Unbind(); @@ -933,6 +1039,8 @@ public: ENSURE(this != previousShaderProgramGLSL); glUseProgram(m_Program); + if (m_UniformBuffer) + glBindBufferBase(GL_UNIFORM_BUFFER, m_UniformBufferLocation, m_UniformBuffer->As()->GetHandle()); if (previousShaderProgramGLSL) { @@ -1009,6 +1117,15 @@ public: return textureUnit; } + GLuint GetStorageBuffer(const int32_t bindingSlot) override + { + if (bindingSlot < 0 || bindingSlot >= static_cast(m_BindingSlots.size())) + return 0; + if (!m_BindingSlots[bindingSlot].isStorageBuffer) + LOGERROR("CShaderProgramGLSL::GetStorageBuffer(): Invalid slot (expected storage buffer): '%s'", m_BindingSlots[bindingSlot].name.c_str()); + return m_BindingSlots[bindingSlot].location; + } + void SetUniform( const int32_t bindingSlot, const float value) override @@ -1094,6 +1211,17 @@ public: const GLint location = m_BindingSlots[bindingSlot].location; const GLenum type = m_BindingSlots[bindingSlot].type; + if (location == -1) + { + const uint32_t sizeInBytes{ + static_cast(m_BindingSlots[bindingSlot].size * m_BindingSlots[bindingSlot].elementCount * sizeof(float))}; + const uint32_t dataSizeToUpload{std::min( + static_cast(values.size() * sizeof(float)), sizeInBytes)}; + m_Device->GetActiveCommandContext()->UploadBufferRegion( + m_UniformBuffer.get(), values.data(), m_BindingSlots[bindingSlot].offset, dataSizeToUpload); + return; + } + if (type == GL_FLOAT) glUniform1fv(location, 1, values.data()); else if (type == GL_FLOAT_VEC2) @@ -1172,14 +1300,20 @@ private: { CStrIntern name; GLint location; + GLint offset; GLint size; GLenum type; GLenum elementType; GLint elementCount; bool isTexture; + bool isStorageBuffer; }; std::vector m_BindingSlots; std::unordered_map m_BindingSlotsMapping; + + GLint m_UniformBufferLocation{-1}; + uint32_t m_UniformBufferSize{0}; + std::unique_ptr m_UniformBuffer; }; CShaderProgram::CShaderProgram(int streamflags) diff --git a/source/renderer/backend/gl/ShaderProgram.h b/source/renderer/backend/gl/ShaderProgram.h index 1f3305fd6d..90533355d0 100644 --- a/source/renderer/backend/gl/ShaderProgram.h +++ b/source/renderer/backend/gl/ShaderProgram.h @@ -116,6 +116,8 @@ public: }; virtual TextureUnit GetTextureUnit(const int32_t bindingSlot) = 0; + virtual GLuint GetStorageBuffer(const int32_t bindingSlot) = 0; + virtual void SetUniform( const int32_t bindingSlot, const float value) = 0; @@ -141,6 +143,8 @@ public: bool IsStreamActive(const VertexAttributeStream stream) const; + bool HasImageUniforms() const { return m_HasImageUniforms; } + /** * Checks that all the required vertex attributes have been set. * Call this before calling Draw/DrawIndexed etc to avoid potential crashes. @@ -161,6 +165,8 @@ protected: void BindClientStates(); void UnbindClientStates(); int m_ValidStreams; // which streams have been specified via VertexPointer etc since the last Bind + + bool m_HasImageUniforms{false}; }; } // namespace GL diff --git a/source/renderer/backend/vulkan/Buffer.cpp b/source/renderer/backend/vulkan/Buffer.cpp index 39b80c0d98..a1a23509db 100644 --- a/source/renderer/backend/vulkan/Buffer.cpp +++ b/source/renderer/backend/vulkan/Buffer.cpp @@ -40,6 +40,8 @@ VkBufferUsageFlags ToVkBufferUsageFlags(const uint32_t usage) usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; if (usage & IBuffer::Usage::TRANSFER_DST) usageFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + if (usage & IBuffer::Usage::STORAGE) + usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; return usageFlags; } @@ -51,19 +53,20 @@ std::tuple MakeCreati switch (type) { case IBuffer::Type::VERTEX: - ENSURE(usage & IBuffer::Usage::TRANSFER_DST); + ENSURE(usage & (IBuffer::Usage::TRANSFER_DST | IBuffer::Usage::STORAGE)); return { commonFlags | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE}; case IBuffer::Type::INDEX: - ENSURE(usage & IBuffer::Usage::TRANSFER_DST); + ENSURE(usage & (IBuffer::Usage::TRANSFER_DST | IBuffer::Usage::STORAGE)); return { commonFlags | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE}; case IBuffer::Type::UPLOAD: ENSURE(usage & IBuffer::Usage::TRANSFER_SRC); + ENSURE(!(usage & IBuffer::Usage::STORAGE)); return { commonFlags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, @@ -131,6 +134,8 @@ CBuffer::~CBuffer() if (m_Allocation != VK_NULL_HANDLE) m_Device->ScheduleObjectToDestroy( VK_OBJECT_TYPE_BUFFER, m_Buffer, m_Allocation); + + m_Device->ScheduleBufferToDestroy(m_UID); } IDevice* CBuffer::GetDevice() diff --git a/source/renderer/backend/vulkan/DescriptorManager.cpp b/source/renderer/backend/vulkan/DescriptorManager.cpp index ade79d7629..5a10d9e4d1 100644 --- a/source/renderer/backend/vulkan/DescriptorManager.cpp +++ b/source/renderer/backend/vulkan/DescriptorManager.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 Wildfire Games. +/* Copyright (C) 2024 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -27,6 +27,7 @@ #include "renderer/backend/vulkan/Utilities.h" #include +#include #include namespace Renderer @@ -306,6 +307,56 @@ VkDescriptorSet CDescriptorManager::GetSingleTypeDescritorSet( return set; } +VkDescriptorSet CDescriptorManager::GetSingleTypeDescritorSet( + VkDescriptorType type, VkDescriptorSetLayout layout, + const std::vector& buffersUID, + const std::vector& buffers) +{ + ENSURE(buffersUID.size() == buffers.size()); + ENSURE(!buffersUID.empty()); + const auto[set, justCreated] = GetSingleTypeDescritorSetImpl(type, layout, buffersUID); + if (!justCreated) + return set; + + ENSURE( + type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); + const VkPhysicalDeviceLimits& physicalDeviceLimits = m_Device->GetChoosenPhysicalDevice().properties.limits; + const uint32_t maxBufferRange = + type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC + ? physicalDeviceLimits.maxStorageBufferRange + : physicalDeviceLimits.maxUniformBufferRange; + + PS::StaticVector infos; + std::transform(buffers.begin(), buffers.end(), std::back_inserter(infos), + [maxBufferRange](CBuffer* buffer) + { + ENSURE(buffer); + ENSURE(buffer->GetUsage() & IBuffer::Usage::STORAGE); + ENSURE(buffer->GetSize() <= maxBufferRange); + + VkDescriptorBufferInfo descriptorBufferInfo{}; + descriptorBufferInfo.buffer = buffer->GetVkBuffer(); + descriptorBufferInfo.offset = 0; + descriptorBufferInfo.range = buffer->GetSize(); + return descriptorBufferInfo; + }); + + VkWriteDescriptorSet writeDescriptorSet{}; + writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writeDescriptorSet.dstSet = set; + writeDescriptorSet.dstBinding = 0; + writeDescriptorSet.dstArrayElement = 0; + writeDescriptorSet.descriptorType = type; + writeDescriptorSet.descriptorCount = static_cast(infos.size()); + writeDescriptorSet.pBufferInfo = infos.data(); + + vkUpdateDescriptorSets( + m_Device->GetVkDevice(), 1, &writeDescriptorSet, 0, nullptr); + + return set; +} + uint32_t CDescriptorManager::GetUniformSet() const { return m_UseDescriptorIndexing ? 1 : 0; @@ -377,26 +428,36 @@ void CDescriptorManager::OnTextureDestroy(const DeviceObjectUID uid) } else { - auto it = m_UIDToSingleTypePoolMap.find(uid); - if (it == m_UIDToSingleTypePoolMap.end()) - return; - for (const auto& entry : it->second) - { - SingleTypePool& pool = GetSingleTypePool(entry.type, entry.size); - SingleTypePool::Element& element = pool.elements[entry.elementIndex]; - // Multiple textures might be used by the same descriptor set and - // we don't need to reset it if it was already. - if (element.version == entry.version && element.nextFreeIndex == SingleTypePool::INVALID_INDEX) - { - ENSURE(pool.firstFreeIndex != entry.elementIndex); - element.nextFreeIndex = pool.firstFreeIndex; - pool.firstFreeIndex = entry.elementIndex; - } - } - m_UIDToSingleTypePoolMap.erase(it); + OnDeviceObjectDestroy(uid); } } +void CDescriptorManager::OnBufferDestroy(const DeviceObjectUID uid) +{ + OnDeviceObjectDestroy(uid); +} + +void CDescriptorManager::OnDeviceObjectDestroy(const DeviceObjectUID uid) +{ + auto it = m_UIDToSingleTypePoolMap.find(uid); + if (it == m_UIDToSingleTypePoolMap.end()) + return; + for (const auto& entry : it->second) + { + SingleTypePool& pool = GetSingleTypePool(entry.type, entry.size); + SingleTypePool::Element& element = pool.elements[entry.elementIndex]; + // Multiple textures might be used by the same descriptor set and + // we don't need to reset it if it was already. + if (element.version == entry.version && element.nextFreeIndex == SingleTypePool::INVALID_INDEX) + { + ENSURE(pool.firstFreeIndex != entry.elementIndex); + element.nextFreeIndex = pool.firstFreeIndex; + pool.firstFreeIndex = entry.elementIndex; + } + } + m_UIDToSingleTypePoolMap.erase(it); +} + } // namespace Vulkan } // namespace Backend diff --git a/source/renderer/backend/vulkan/DescriptorManager.h b/source/renderer/backend/vulkan/DescriptorManager.h index f5c9904f0d..ec0e8c3485 100644 --- a/source/renderer/backend/vulkan/DescriptorManager.h +++ b/source/renderer/backend/vulkan/DescriptorManager.h @@ -20,6 +20,7 @@ #include "ps/CStrIntern.h" #include "renderer/backend/Sampler.h" +#include "renderer/backend/vulkan/Buffer.h" #include "renderer/backend/vulkan/Device.h" #include "renderer/backend/vulkan/Texture.h" @@ -61,12 +62,19 @@ public: const std::vector& texturesUID, const std::vector& textures); + VkDescriptorSet GetSingleTypeDescritorSet( + VkDescriptorType type, VkDescriptorSetLayout layout, + const std::vector& buffersUID, + const std::vector& buffers); + uint32_t GetUniformSet() const; uint32_t GetTextureDescriptor(CTexture* texture); void OnTextureDestroy(const DeviceObjectUID uid); + void OnBufferDestroy(const DeviceObjectUID uid); + const VkDescriptorSetLayout& GetDescriptorIndexingSetLayout() const { return m_DescriptorIndexingSetLayout; } const VkDescriptorSetLayout& GetUniformDescriptorSetLayout() const { return m_UniformDescriptorSetLayout; } const VkDescriptorSet& GetDescriptorIndexingSet() { return m_DescriptorIndexingSet; } @@ -94,6 +102,8 @@ private: VkDescriptorType type, VkDescriptorSetLayout layout, const std::vector& uids); + void OnDeviceObjectDestroy(const DeviceObjectUID uid); + CDevice* m_Device = nullptr; bool m_UseDescriptorIndexing = false; diff --git a/source/renderer/backend/vulkan/Device.cpp b/source/renderer/backend/vulkan/Device.cpp index ad8780d55e..1b09503894 100644 --- a/source/renderer/backend/vulkan/Device.cpp +++ b/source/renderer/backend/vulkan/Device.cpp @@ -580,6 +580,7 @@ std::unique_ptr CDevice::Create(SDL_Window* window) capabilities.ARBShaders = false; capabilities.ARBShadersShadow = false; capabilities.computeShaders = true; + capabilities.storage = choosenDevice.properties.limits.maxStorageBufferRange >= GiB; capabilities.instancing = true; capabilities.maxSampleCount = 1; const VkSampleCountFlags sampleCountFlags = @@ -657,7 +658,7 @@ CDevice::~CDevice() m_SubmitScheduler.reset(); - ProcessTextureToDestroyQueue(true); + ProcessDeviceObjectToDestroyQueue(true); m_RenderPassManager.reset(); m_SamplerManager.reset(); @@ -813,7 +814,7 @@ void CDevice::Present() m_SubmitScheduler->Present(*m_SwapChain); ProcessObjectToDestroyQueue(); - ProcessTextureToDestroyQueue(); + ProcessDeviceObjectToDestroyQueue(); ++m_FrameID; } @@ -928,6 +929,11 @@ void CDevice::ScheduleTextureToDestroy(const DeviceObjectUID uid) m_TextureToDestroyQueue.push({m_FrameID, uid}); } +void CDevice::ScheduleBufferToDestroy(const DeviceObjectUID uid) +{ + m_BufferToDestroyQueue.push({m_FrameID, uid}); +} + void CDevice::SetObjectName(VkObjectType type, const uint64_t handle, const char* name) { if (!m_Capabilities.debugLabels) @@ -1013,7 +1019,7 @@ void CDevice::ProcessObjectToDestroyQueue(const bool ignoreFrameID) } } -void CDevice::ProcessTextureToDestroyQueue(const bool ignoreFrameID) +void CDevice::ProcessDeviceObjectToDestroyQueue(const bool ignoreFrameID) { while (!m_TextureToDestroyQueue.empty() && (ignoreFrameID || m_TextureToDestroyQueue.front().first + NUMBER_OF_FRAMES_IN_FLIGHT < m_FrameID)) @@ -1021,6 +1027,13 @@ void CDevice::ProcessTextureToDestroyQueue(const bool ignoreFrameID) GetDescriptorManager().OnTextureDestroy(m_TextureToDestroyQueue.front().second); m_TextureToDestroyQueue.pop(); } + + while (!m_BufferToDestroyQueue.empty() && + (ignoreFrameID || m_BufferToDestroyQueue.front().first + NUMBER_OF_FRAMES_IN_FLIGHT < m_FrameID)) + { + GetDescriptorManager().OnBufferDestroy(m_BufferToDestroyQueue.front().second); + m_BufferToDestroyQueue.pop(); + } } CTexture* CDevice::GetCurrentBackbufferTexture() diff --git a/source/renderer/backend/vulkan/Device.h b/source/renderer/backend/vulkan/Device.h index ede13bcd6b..5cd2cd58b8 100644 --- a/source/renderer/backend/vulkan/Device.h +++ b/source/renderer/backend/vulkan/Device.h @@ -145,6 +145,8 @@ public: void ScheduleTextureToDestroy(const DeviceObjectUID uid); + void ScheduleBufferToDestroy(const DeviceObjectUID uid); + void SetObjectName(VkObjectType type, const void* handle, const char* name) { SetObjectName(type, reinterpret_cast(handle), name); @@ -174,7 +176,7 @@ private: void RecreateSwapChain(); bool IsSwapChainValid(); void ProcessObjectToDestroyQueue(const bool ignoreFrameID = false); - void ProcessTextureToDestroyQueue(const bool ignoreFrameID = false); + void ProcessDeviceObjectToDestroyQueue(const bool ignoreFrameID = false); bool IsFormatSupportedForUsage(const Format format, const uint32_t usage) const; @@ -216,6 +218,7 @@ private: }; std::queue m_ObjectToDestroyQueue; std::queue> m_TextureToDestroyQueue; + std::queue> m_BufferToDestroyQueue; std::unique_ptr m_RenderPassManager; std::unique_ptr m_SamplerManager; diff --git a/source/renderer/backend/vulkan/DeviceCommandContext.cpp b/source/renderer/backend/vulkan/DeviceCommandContext.cpp index 054520749c..fd6d026217 100644 --- a/source/renderer/backend/vulkan/DeviceCommandContext.cpp +++ b/source/renderer/backend/vulkan/DeviceCommandContext.cpp @@ -29,6 +29,7 @@ #include "renderer/backend/vulkan/DescriptorManager.h" #include "renderer/backend/vulkan/Device.h" #include "renderer/backend/vulkan/Framebuffer.h" +#include "renderer/backend/vulkan/Mapping.h" #include "renderer/backend/vulkan/PipelineState.h" #include "renderer/backend/vulkan/RingCommandContext.h" #include "renderer/backend/vulkan/ShaderProgram.h" @@ -51,7 +52,7 @@ namespace Vulkan namespace { -constexpr uint32_t UNIFORM_BUFFER_INITIAL_SIZE = 1024 * 1024; +constexpr uint32_t UNIFORM_BUFFER_INITIAL_SIZE = 1024 * 1024 * 32; constexpr uint32_t FRAME_INPLACE_BUFFER_INITIAL_SIZE = 128 * 1024; struct SBaseImageState @@ -915,6 +916,17 @@ void CDeviceCommandContext::Dispatch( m_ShaderProgram->PostDispatch(*m_CommandContext); } +void CDeviceCommandContext::InsertMemoryBarrier( + const uint32_t srcStageMask, const uint32_t dstStageMask, + const uint32_t srcAccessMask, const uint32_t dstAccessMask) +{ + ENSURE(!m_InsideFramebufferPass); + Utilities::SubmitMemoryBarrier( + m_CommandContext->GetCommandBuffer(), + Mapping::FromAccessMask(srcAccessMask), Mapping::FromAccessMask(dstAccessMask), + Mapping::FromPipelineStageMask(srcStageMask), Mapping::FromPipelineStageMask(dstStageMask)); +} + void CDeviceCommandContext::SetTexture(const int32_t bindingSlot, ITexture* texture) { if (bindingSlot < 0) @@ -948,6 +960,16 @@ void CDeviceCommandContext::SetStorageTexture(const int32_t bindingSlot, ITextur m_ShaderProgram->SetStorageTexture(bindingSlot, textureToBind); } +void CDeviceCommandContext::SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) +{ + ENSURE(m_InsidePass || m_InsideComputePass); + ENSURE(buffer); + CBuffer* bufferToBind = buffer->As(); + ENSURE(bufferToBind->GetType() == IBuffer::Type::VERTEX || bufferToBind->GetType() == IBuffer::Type::INDEX); + ENSURE(bufferToBind->GetUsage() & IBuffer::Usage::STORAGE); + m_ShaderProgram->SetStorageBuffer(bindingSlot, bufferToBind); +} + void CDeviceCommandContext::SetUniform( const int32_t bindingSlot, const float value) diff --git a/source/renderer/backend/vulkan/DeviceCommandContext.h b/source/renderer/backend/vulkan/DeviceCommandContext.h index 03397b2d30..0bf489c73d 100644 --- a/source/renderer/backend/vulkan/DeviceCommandContext.h +++ b/source/renderer/backend/vulkan/DeviceCommandContext.h @@ -125,9 +125,14 @@ public: const uint32_t groupCountY, const uint32_t groupCountZ) override; + void InsertMemoryBarrier( + const uint32_t srcStageMask, const uint32_t dstStageMask, + const uint32_t srcAccessMask, const uint32_t dstAccessMask) override; + void SetTexture(const int32_t bindingSlot, ITexture* texture) override; void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) override; + void SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) override; void SetUniform( const int32_t bindingSlot, diff --git a/source/renderer/backend/vulkan/Mapping.cpp b/source/renderer/backend/vulkan/Mapping.cpp index ada46127fb..abf72eb329 100644 --- a/source/renderer/backend/vulkan/Mapping.cpp +++ b/source/renderer/backend/vulkan/Mapping.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 Wildfire Games. +/* Copyright (C) 2024 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -175,9 +175,15 @@ VkFormat FromFormat(const Format format) CASE(R16_UNORM) CASE(R16_UINT) CASE(R16_SINT) + CASE(R16_SFLOAT) CASE(R16G16_UNORM) CASE(R16G16_UINT) CASE(R16G16_SINT) + CASE(R16G16_SFLOAT) + + CASE(R16G16B16_SFLOAT) + + CASE(R16G16B16A16_SFLOAT) CASE(R32_SFLOAT) CASE(R32G32_SFLOAT) @@ -273,6 +279,68 @@ VkAttachmentStoreOp FromAttachmentStoreOp(const AttachmentStoreOp storeOp) return resultStoreOp; } +VkPipelineStageFlags FromPipelineStageMask(const uint32_t mask) +{ + VkPipelineStageFlags flags{0}; + uint32_t checkedMask{0}; +#define CASE(NAME) \ + if (mask & PipelineStage::NAME) { flags |= VK_PIPELINE_STAGE_##NAME##_BIT; checkedMask |= PipelineStage::NAME; } +#define CASE2(NAME, VK_NAME) \ + if (mask & PipelineStage::NAME) { flags |= VK_NAME; checkedMask |= PipelineStage::NAME; } + + CASE(DRAW_INDIRECT) + CASE(VERTEX_INPUT) + CASE(VERTEX_SHADER) + CASE(FRAGMENT_SHADER) + CASE(EARLY_FRAGMENT_TESTS) + CASE(LATE_FRAGMENT_TESTS) + CASE(COLOR_ATTACHMENT_OUTPUT) + CASE(COMPUTE_SHADER) + CASE(TRANSFER) + CASE(HOST) + CASE2(ACCELERATION_STRUCTURE_BUILD, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR) + CASE2(RAY_TRACING_SHADER, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR) + CASE2(TASK_SHADER, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT) + CASE2(MESH_SHADER, VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT) +#undef CASE +#undef CASE2 + ENSURE(mask == checkedMask); + return flags; +} + +VkAccessFlags FromAccessMask(const uint32_t mask) +{ + VkAccessFlags flags{0}; + uint32_t checkedMask{0}; +#define CASE(NAME) \ + if (mask & Access::NAME) { flags |= VK_ACCESS_##NAME##_BIT; checkedMask |= Access::NAME; } +#define CASE2(NAME, VK_NAME) \ + if (mask & Access::NAME) { flags |= VK_NAME; checkedMask |= Access::NAME; } + + CASE(INDIRECT_COMMAND_READ) + CASE(INDEX_READ) + CASE(VERTEX_ATTRIBUTE_READ) + CASE(UNIFORM_READ) + CASE(INPUT_ATTACHMENT_READ) + CASE(SHADER_READ) + CASE(SHADER_WRITE) + CASE(COLOR_ATTACHMENT_READ) + CASE(COLOR_ATTACHMENT_WRITE) + CASE(DEPTH_STENCIL_ATTACHMENT_READ) + CASE(DEPTH_STENCIL_ATTACHMENT_WRITE) + CASE(TRANSFER_READ) + CASE(TRANSFER_WRITE) + CASE(HOST_READ) + CASE(HOST_WRITE) + CASE(MEMORY_READ) + CASE(MEMORY_WRITE) + CASE2(ACCELERATION_STRUCTURE_READ, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR) + CASE2(ACCELERATION_STRUCTURE_WRITE, VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR) +#undef CASE + ENSURE(mask == checkedMask); + return flags; +} + } // namespace Mapping } // namespace Vulkan diff --git a/source/renderer/backend/vulkan/Mapping.h b/source/renderer/backend/vulkan/Mapping.h index 9a2ee4fa7d..19af3cbea9 100644 --- a/source/renderer/backend/vulkan/Mapping.h +++ b/source/renderer/backend/vulkan/Mapping.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 Wildfire Games. +/* Copyright (C) 2024 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -18,6 +18,7 @@ #ifndef INCLUDED_RENDERER_BACKEND_VULKAN_MAPPING #define INCLUDED_RENDERER_BACKEND_VULKAN_MAPPING +#include "renderer/backend/Barrier.h" #include "renderer/backend/Format.h" #include "renderer/backend/IFramebuffer.h" #include "renderer/backend/PipelineState.h" @@ -61,6 +62,10 @@ VkAttachmentLoadOp FromAttachmentLoadOp(const AttachmentLoadOp loadOp); VkAttachmentStoreOp FromAttachmentStoreOp(const AttachmentStoreOp storeOp); +VkPipelineStageFlags FromPipelineStageMask(const uint32_t mask); + +VkAccessFlags FromAccessMask(const uint32_t mask); + } // namespace Mapping } // namespace Vulkan diff --git a/source/renderer/backend/vulkan/ShaderProgram.cpp b/source/renderer/backend/vulkan/ShaderProgram.cpp index 7999eae7cc..54173d13d7 100644 --- a/source/renderer/backend/vulkan/ShaderProgram.cpp +++ b/source/renderer/backend/vulkan/ShaderProgram.cpp @@ -53,7 +53,8 @@ enum class BindingSlotType PUSH_CONSTANT, UNIFORM, TEXTURE, - STORAGE_IMAGE + STORAGE_IMAGE, + STORAGE_BUFFER }; constexpr uint32_t BINDING_SLOT_TYPE_SHIFT{16u}; @@ -247,6 +248,10 @@ std::unique_ptr CShaderProgram::Create( uint32_t storageImageDescriptorSetSize = 0; std::unordered_map storageImageMapping; + VkDescriptorType storageBufferDescriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM; + uint32_t storageBufferDescriptorSetSize = 0; + std::unordered_map storageBufferMapping; + auto addDescriptorSets = [&](const XMBElement& element) -> bool { const bool useDescriptorIndexing = @@ -325,15 +330,13 @@ std::unique_ptr CShaderProgram::Create( texturesDescriptorSetSize = std::max(texturesDescriptorSetSize, binding + 1); } - else if (type == "storageImage" || type == "storageBuffer") + else if (type == "storageImage") { const CStrIntern name{attributes.GetNamedItem(at_name)}; storageImageMapping[name] = binding; storageImageDescriptorSetSize = std::max(storageImageDescriptorSetSize, binding + 1); - const VkDescriptorType descriptorType = type == "storageBuffer" - ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER - : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + const VkDescriptorType descriptorType{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE}; if (storageImageDescriptorType == VK_DESCRIPTOR_TYPE_MAX_ENUM) storageImageDescriptorType = descriptorType; else if (storageImageDescriptorType != descriptorType) @@ -342,6 +345,21 @@ std::unique_ptr CShaderProgram::Create( return false; } } + else if (type == "storageBuffer") + { + const CStrIntern name{attributes.GetNamedItem(at_name)}; + storageBufferMapping[name] = binding; + storageBufferDescriptorSetSize = + std::max(storageBufferDescriptorSetSize, binding + 1); + const VkDescriptorType descriptorType{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER}; + if (storageBufferDescriptorType == VK_DESCRIPTOR_TYPE_MAX_ENUM) + storageBufferDescriptorType = descriptorType; + else if (storageBufferDescriptorType != descriptorType) + { + LOGERROR("Shader should have storages of the same type."); + return false; + } + } else { LOGERROR("Unsupported binding: '%s'", type.c_str()); @@ -573,6 +591,12 @@ std::unique_ptr CShaderProgram::Create( device, storageImageDescriptorType, storageImageDescriptorSetSize, std::move(storageImageMapping)); layouts.emplace_back(shaderProgram->m_StorageImageBinding->GetDescriptorSetLayout()); } + if (storageBufferDescriptorSetSize > 0) + { + shaderProgram->m_StorageBufferBinding.emplace( + device, storageBufferDescriptorType, storageBufferDescriptorSetSize, std::move(storageBufferMapping)); + layouts.emplace_back(shaderProgram->m_StorageBufferBinding->GetDescriptorSetLayout()); + } VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{}; pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; @@ -620,6 +644,8 @@ int32_t CShaderProgram::GetBindingSlot(const CStrIntern name) const return (static_cast(BindingSlotType::TEXTURE) << BINDING_SLOT_TYPE_SHIFT) | bindingSlot; if (const int32_t bindingSlot = m_StorageImageBinding.has_value() ? m_StorageImageBinding->GetBindingSlot(name) : -1; bindingSlot != -1) return (static_cast(BindingSlotType::STORAGE_IMAGE) << BINDING_SLOT_TYPE_SHIFT) | bindingSlot; + if (const int32_t bindingSlot = m_StorageBufferBinding.has_value() ? m_StorageBufferBinding->GetBindingSlot(name) : -1; bindingSlot != -1) + return (static_cast(BindingSlotType::STORAGE_BUFFER) << BINDING_SLOT_TYPE_SHIFT) | bindingSlot; return -1; } @@ -646,6 +672,8 @@ void CShaderProgram::Unbind() m_TextureBinding->Unbind(); if (m_StorageImageBinding.has_value()) m_StorageImageBinding->Unbind(); + if (m_StorageBufferBinding.has_value()) + m_StorageBufferBinding->Unbind(); } void CShaderProgram::PreDraw(CRingCommandContext& commandContext) @@ -730,6 +758,15 @@ void CShaderProgram::BindOutdatedDescriptorSets( constexpr uint32_t STORAGE_IMAGE_BINDING_SET = 2u; descriptortSets.emplace_back(STORAGE_IMAGE_BINDING_SET, m_StorageImageBinding->UpdateAndReturnDescriptorSet()); } + if (m_StorageBufferBinding.has_value() && m_StorageBufferBinding->IsOutdated()) + { + // Currently we assume that in computer shaders we use either textures + // or buffers but not together. + const uint32_t STORAGE_BUFFER_BINDING_SET{ + m_Device->GetDescriptorManager().UseDescriptorIndexing() ? 2u : 1u}; + descriptortSets.emplace_back( + STORAGE_BUFFER_BINDING_SET, m_StorageBufferBinding->UpdateAndReturnDescriptorSet()); + } for (const auto& [firstSet, descriptorSet] : descriptortSets) { @@ -801,8 +838,7 @@ std::pair CShaderProgram::GetUniformData( m_MaterialConstantsDataOutdated = true; const uint32_t size = uniform.size; const uint32_t offset = uniform.offset; - ENSURE(size <= dataSize); - return {m_MaterialConstantsData.get() + offset, size}; + return {m_MaterialConstantsData.get() + offset, std::min(dataSize, size)}; } } @@ -841,6 +877,16 @@ void CShaderProgram::SetStorageTexture(const int32_t bindingSlot, CTexture* text m_StorageImageBinding->SetObject(index, texture); } +void CShaderProgram::SetStorageBuffer(const int32_t bindingSlot, CBuffer* buffer) +{ + if (bindingSlot < 0) + return; + ENSURE(static_cast(bindingSlot >> BINDING_SLOT_TYPE_SHIFT) == BindingSlotType::STORAGE_BUFFER); + const uint32_t index{bindingSlot & BINDING_SLOT_VALUE_MASK}; + ENSURE(m_StorageBufferBinding.has_value()); + m_StorageBufferBinding->SetObject(index, buffer); +} + } // namespace Vulkan } // namespace Backend diff --git a/source/renderer/backend/vulkan/ShaderProgram.h b/source/renderer/backend/vulkan/ShaderProgram.h index ee79c3d8af..ec1b81e429 100644 --- a/source/renderer/backend/vulkan/ShaderProgram.h +++ b/source/renderer/backend/vulkan/ShaderProgram.h @@ -122,6 +122,7 @@ public: void SetTexture(const int32_t bindingSlot, CTexture* texture); void SetStorageTexture(const int32_t bindingSlot, CTexture* texture); + void SetStorageBuffer(const int32_t bindingSlot, CBuffer* buffer); // TODO: rename to something related to buffer. bool IsMaterialConstantsDataOutdated() const { return m_MaterialConstantsDataOutdated; } @@ -178,6 +179,7 @@ private: std::optional> m_TextureBinding; std::optional> m_StorageImageBinding; + std::optional> m_StorageBufferBinding; std::unordered_map m_StreamLocations; };