diff --git a/source/renderer/backend/Barrier.h b/source/renderer/backend/Barrier.h
new file mode 100644
index 0000000000..a946ba1ecc
--- /dev/null
+++ b/source/renderer/backend/Barrier.h
@@ -0,0 +1,108 @@
+/* Copyright (C) 2024 Wildfire Games.
+ * This file is part of 0 A.D.
+ *
+ * 0 A.D. is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * 0 A.D. is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with 0 A.D. If not, see .
+ */
+
+#ifndef INCLUDED_RENDERER_BACKEND_BARRIER
+#define INCLUDED_RENDERER_BACKEND_BARRIER
+
+namespace Renderer
+{
+
+namespace Backend
+{
+
+// PipelineStageFlags and AccessFlags are mostly taken from the Vulkan
+// specification.
+
+namespace PipelineStage
+{
+static constexpr uint32_t DRAW_INDIRECT{
+ 1u << 0u};
+static constexpr uint32_t VERTEX_INPUT{
+ 1u << 1u};
+static constexpr uint32_t VERTEX_SHADER{
+ 1u << 2u};
+static constexpr uint32_t FRAGMENT_SHADER{
+ 1u << 3u};
+static constexpr uint32_t EARLY_FRAGMENT_TESTS{
+ 1u << 4u};
+static constexpr uint32_t LATE_FRAGMENT_TESTS{
+ 1u << 5u};
+static constexpr uint32_t COLOR_ATTACHMENT_OUTPUT{
+ 1u << 6u};
+static constexpr uint32_t COMPUTE_SHADER{
+ 1u << 7u};
+static constexpr uint32_t TRANSFER{
+ 1u << 8u};
+static constexpr uint32_t HOST{
+ 1u << 9u};
+static constexpr uint32_t ACCELERATION_STRUCTURE_BUILD{
+ 1u << 10u};
+static constexpr uint32_t RAY_TRACING_SHADER{
+ 1u << 11u};
+static constexpr uint32_t TASK_SHADER{
+ 1u << 12u};
+static constexpr uint32_t MESH_SHADER{
+ 1u << 13u};
+} // namespace PipelineStage
+
+namespace Access
+{
+static constexpr uint32_t INDIRECT_COMMAND_READ{
+ 1u << 0u};
+static constexpr uint32_t INDEX_READ{
+ 1u << 1u};
+static constexpr uint32_t VERTEX_ATTRIBUTE_READ{
+ 1u << 2u};
+static constexpr uint32_t UNIFORM_READ{
+ 1u << 3u};
+static constexpr uint32_t INPUT_ATTACHMENT_READ{
+ 1u << 4u};
+static constexpr uint32_t SHADER_READ{
+ 1u << 5u};
+static constexpr uint32_t SHADER_WRITE{
+ 1u << 6u};
+static constexpr uint32_t COLOR_ATTACHMENT_READ{
+ 1u << 7u};
+static constexpr uint32_t COLOR_ATTACHMENT_WRITE{
+ 1u << 8u};
+static constexpr uint32_t DEPTH_STENCIL_ATTACHMENT_READ{
+ 1u << 9u};
+static constexpr uint32_t DEPTH_STENCIL_ATTACHMENT_WRITE{
+ 1u << 10u};
+static constexpr uint32_t TRANSFER_READ{
+ 1u << 11u};
+static constexpr uint32_t TRANSFER_WRITE{
+ 1u << 12u};
+static constexpr uint32_t HOST_READ{
+ 1u << 13u};
+static constexpr uint32_t HOST_WRITE{
+ 1u << 14u};
+static constexpr uint32_t MEMORY_READ{
+ 1u << 15u};
+static constexpr uint32_t MEMORY_WRITE{
+ 1u << 16u};
+static constexpr uint32_t ACCELERATION_STRUCTURE_READ{
+ 1u << 17u};
+static constexpr uint32_t ACCELERATION_STRUCTURE_WRITE{
+ 1u << 18u};
+} // namespace Access
+
+} // namespace Backend
+
+} // namespace Renderer
+
+#endif // INCLUDED_RENDERER_BACKEND_BARRIER
diff --git a/source/renderer/backend/Format.h b/source/renderer/backend/Format.h
index 570b2eab1a..8a797e700c 100644
--- a/source/renderer/backend/Format.h
+++ b/source/renderer/backend/Format.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@@ -43,9 +43,13 @@ enum class Format
R16_UNORM,
R16_UINT,
R16_SINT,
+ R16_SFLOAT,
R16G16_UNORM,
R16G16_UINT,
R16G16_SINT,
+ R16G16_SFLOAT,
+ R16G16B16_SFLOAT,
+ R16G16B16A16_SFLOAT,
R32_SFLOAT,
R32G32_SFLOAT,
diff --git a/source/renderer/backend/IBuffer.h b/source/renderer/backend/IBuffer.h
index 8e7d29bd3b..e873cd75b6 100644
--- a/source/renderer/backend/IBuffer.h
+++ b/source/renderer/backend/IBuffer.h
@@ -46,6 +46,7 @@ public:
static constexpr uint32_t DYNAMIC = 1u << 0u;
static constexpr uint32_t TRANSFER_SRC = 1u << 1u;
static constexpr uint32_t TRANSFER_DST = 1u << 2u;
+ static constexpr uint32_t STORAGE = 1u << 3u;
};
virtual Type GetType() const = 0;
diff --git a/source/renderer/backend/IDevice.h b/source/renderer/backend/IDevice.h
index e50f523eaa..e6a4a796c0 100644
--- a/source/renderer/backend/IDevice.h
+++ b/source/renderer/backend/IDevice.h
@@ -61,6 +61,7 @@ public:
float maxAnisotropy;
uint32_t maxTextureSize;
bool instancing;
+ bool storage;
};
virtual ~IDevice() {}
diff --git a/source/renderer/backend/IDeviceCommandContext.h b/source/renderer/backend/IDeviceCommandContext.h
index 2dbdf29f0b..57f606b8af 100644
--- a/source/renderer/backend/IDeviceCommandContext.h
+++ b/source/renderer/backend/IDeviceCommandContext.h
@@ -19,6 +19,7 @@
#define INCLUDED_RENDERER_BACKEND_IDEVICECOMMANDCONTEXT
#include "ps/containers/Span.h"
+#include "renderer/backend/Barrier.h"
#include "renderer/backend/Format.h"
#include "renderer/backend/IDeviceObject.h"
#include "renderer/backend/PipelineState.h"
@@ -184,6 +185,15 @@ public:
const uint32_t groupCountY,
const uint32_t groupCountZ) = 0;
+ /**
+ * Inserts a memory barrier which guarantees that all memory accesses
+ * matched by `srcAccessMask` in src are completed before all memory accesses
+ * described by `dstAccessMask` in dst.
+ */
+ virtual void InsertMemoryBarrier(
+ const uint32_t srcStageMask, const uint32_t dstStageMask,
+ const uint32_t srcAccessMask, const uint32_t dstAccessMask) = 0;
+
/**
* Sets a read-only texture to the binding slot.
*/
@@ -193,6 +203,7 @@ public:
* Sets a read & write resource to the binding slot.
*/
virtual void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) = 0;
+ virtual void SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) = 0;
virtual void SetUniform(
const int32_t bindingSlot,
diff --git a/source/renderer/backend/dummy/DeviceCommandContext.cpp b/source/renderer/backend/dummy/DeviceCommandContext.cpp
index 0ffa1869fa..8caccf7b15 100644
--- a/source/renderer/backend/dummy/DeviceCommandContext.cpp
+++ b/source/renderer/backend/dummy/DeviceCommandContext.cpp
@@ -202,6 +202,11 @@ void CDeviceCommandContext::Dispatch(const uint32_t, const uint32_t, const uint3
{
}
+void CDeviceCommandContext::InsertMemoryBarrier(
+ const uint32_t, const uint32_t, const uint32_t, const uint32_t)
+{
+}
+
void CDeviceCommandContext::SetTexture(const int32_t, ITexture*)
{
}
@@ -210,6 +215,10 @@ void CDeviceCommandContext::SetStorageTexture(const int32_t, ITexture*)
{
}
+void CDeviceCommandContext::SetStorageBuffer(const int32_t, IBuffer*)
+{
+}
+
void CDeviceCommandContext::SetUniform(const int32_t, const float)
{
}
diff --git a/source/renderer/backend/dummy/DeviceCommandContext.h b/source/renderer/backend/dummy/DeviceCommandContext.h
index 8e5fff595c..82ba9c6a42 100644
--- a/source/renderer/backend/dummy/DeviceCommandContext.h
+++ b/source/renderer/backend/dummy/DeviceCommandContext.h
@@ -120,9 +120,14 @@ public:
const uint32_t groupCountY,
const uint32_t groupCountZ) override;
+ void InsertMemoryBarrier(
+ const uint32_t srcStageMask, const uint32_t dstStageMask,
+ const uint32_t srcAccessMask, const uint32_t dstAccessMask) override;
+
void SetTexture(const int32_t bindingSlot, ITexture* texture) override;
void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) override;
+ void SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) override;
void SetUniform(
const int32_t bindingSlot,
diff --git a/source/renderer/backend/gl/Buffer.cpp b/source/renderer/backend/gl/Buffer.cpp
index b4fab48897..99089ac64e 100644
--- a/source/renderer/backend/gl/Buffer.cpp
+++ b/source/renderer/backend/gl/Buffer.cpp
@@ -35,19 +35,42 @@ namespace Backend
namespace GL
{
+namespace
+{
+
+GLenum GetTargetFromBufferType(const IBuffer::Type type)
+{
+ GLenum target{GL_ARRAY_BUFFER};
+ switch (type)
+ {
+ case IBuffer::Type::INDEX:
+ target = GL_ELEMENT_ARRAY_BUFFER;
+ break;
+ case IBuffer::Type::UNIFORM:
+ target = GL_UNIFORM_BUFFER;
+ break;
+ default:
+ target = GL_ARRAY_BUFFER;
+ break;
+ }
+ return target;
+}
+
+} // anonymous namespace
+
// static
std::unique_ptr CBuffer::Create(
CDevice* device, const char* name,
const Type type, const uint32_t size, const uint32_t usage)
{
- ENSURE(type == Type::VERTEX || type == Type::INDEX);
+ ENSURE(type == Type::VERTEX || type == Type::INDEX || type == Type::UNIFORM);
std::unique_ptr buffer(new CBuffer());
buffer->m_Device = device;
buffer->m_Type = type;
buffer->m_Size = size;
buffer->m_Usage = usage;
glGenBuffersARB(1, &buffer->m_Handle);
- const GLenum target = type == Type::INDEX ? GL_ELEMENT_ARRAY_BUFFER : GL_ARRAY_BUFFER;
+ const GLenum target{GetTargetFromBufferType(type)};
glBindBufferARB(target, buffer->m_Handle);
glBufferDataARB(target, size, nullptr, (usage & IBuffer::Usage::DYNAMIC) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW);
#if !CONFIG2_GLES
diff --git a/source/renderer/backend/gl/Device.cpp b/source/renderer/backend/gl/Device.cpp
index 45d8e0e363..47bd6ba312 100644
--- a/source/renderer/backend/gl/Device.cpp
+++ b/source/renderer/backend/gl/Device.cpp
@@ -441,12 +441,23 @@ std::unique_ptr CDevice::Create(SDL_Window* window, const bool arb)
#if CONFIG2_GLES
capabilities.instancing = false;
+ capabilities.storage = false;
#else
capabilities.instancing =
!device->m_ARB &&
(ogl_HaveVersion(3, 3) ||
(ogl_HaveExtension("GL_ARB_draw_instanced") &&
ogl_HaveExtension("GL_ARB_instanced_arrays")));
+ GLint maxStorageBufferSize{0};
+ if (ogl_HaveExtension("GL_ARB_shader_storage_buffer_object"))
+ glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &maxStorageBufferSize);
+ capabilities.storage =
+ capabilities.computeShaders && maxStorageBufferSize > 0
+ && static_cast(maxStorageBufferSize) >= 128 * MiB
+ && ogl_HaveExtension("GL_ARB_uniform_buffer_object")
+ && ogl_HaveExtension("GL_ARB_shader_storage_buffer_object")
+ && ogl_HaveExtension("GL_ARB_half_float_vertex")
+ && ogl_HaveExtension("GL_ARB_program_interface_query");
#endif
return device;
@@ -762,6 +773,18 @@ void CDevice::Report(const ScriptRequest& rq, JS::HandleValue settings)
INTEGER(MAX_VERTEX_VARYING_COMPONENTS_ARB);
}
+ if (ogl_HaveExtension("GL_ARB_uniform_buffer_object"))
+ {
+ INTEGER(MAX_UNIFORM_BLOCK_SIZE);
+ INTEGER(MAX_UNIFORM_BUFFER_BINDINGS);
+ }
+
+ if (ogl_HaveExtension("GL_ARB_shader_storage_buffer_object"))
+ {
+ INTEGER(MAX_SHADER_STORAGE_BLOCK_SIZE);
+ INTEGER(MAX_SHADER_STORAGE_BUFFER_BINDINGS);
+ }
+
#else // CONFIG2_GLES
// Core OpenGL ES 2.0:
diff --git a/source/renderer/backend/gl/DeviceCommandContext.cpp b/source/renderer/backend/gl/DeviceCommandContext.cpp
index cb17233641..8b1567d6e4 100644
--- a/source/renderer/backend/gl/DeviceCommandContext.cpp
+++ b/source/renderer/backend/gl/DeviceCommandContext.cpp
@@ -103,8 +103,10 @@ GLenum BufferTypeToGLTarget(const CBuffer::Type type)
case CBuffer::Type::INDEX:
target = GL_ELEMENT_ARRAY_BUFFER;
break;
- case CBuffer::Type::UPLOAD:
case CBuffer::Type::UNIFORM:
+ target = GL_UNIFORM_BUFFER;
+ break;
+ case CBuffer::Type::UPLOAD:
debug_warn("Unsupported buffer type.");
break;
};
@@ -451,7 +453,9 @@ void CDeviceCommandContext::UploadBufferRegion(
ENSURE(dataOffset + dataSize <= buffer->GetSize());
const GLenum target = BufferTypeToGLTarget(buffer->GetType());
ScopedBufferBind scopedBufferBind(this, buffer->As());
- if (buffer->IsDynamic())
+ // Uniform buffers is a relatively new feature so we don't need to use a
+ // dynamic upload.
+ if (buffer->IsDynamic() && buffer->GetType() != IBuffer::Type::UNIFORM)
{
UploadDynamicBufferRegionImpl(target, buffer->GetSize(), dataOffset, dataSize, [data, dataSize](u8* mappedData)
{
@@ -1257,12 +1261,16 @@ void CDeviceCommandContext::Dispatch(
const uint32_t groupCountY,
const uint32_t groupCountZ)
{
-#if !CONFIG2_GLES
+#if !CONFIG2_GLES
ENSURE(m_InsideComputePass);
glDispatchCompute(groupCountX, groupCountY, groupCountZ);
- // TODO: we might want to do binding tracking to avoid redundant barriers.
- glMemoryBarrier(
- GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT);
+ // Storage buffers should be managed explicitly by InsertMemoryBarrier.
+ if (m_ShaderProgram->HasImageUniforms())
+ {
+ // TODO: we might want to do binding tracking to avoid redundant barriers.
+ glMemoryBarrier(
+ GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT );
+ }
#else
UNUSED2(groupCountX);
UNUSED2(groupCountY);
@@ -1270,6 +1278,35 @@ void CDeviceCommandContext::Dispatch(
#endif
}
+void CDeviceCommandContext::InsertMemoryBarrier(
+ const uint32_t UNUSED(srcStageMask), const uint32_t dstStageMask,
+ const uint32_t srcAccessMask, const uint32_t dstAccessMask)
+{
+#if !CONFIG2_GLES
+ ENSURE(!m_InsideFramebufferPass);
+ GLbitfield barriers{0};
+ if (srcAccessMask & Access::SHADER_WRITE)
+ {
+ if (dstStageMask & PipelineStage::VERTEX_INPUT)
+ {
+ if (dstAccessMask & Access::VERTEX_ATTRIBUTE_READ)
+ barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
+ if (dstAccessMask & Access::INDEX_READ)
+ barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
+ }
+ if (dstStageMask & (PipelineStage::VERTEX_SHADER | PipelineStage::FRAGMENT_SHADER | PipelineStage::COMPUTE_SHADER))
+ {
+ if (dstAccessMask & (Access::SHADER_READ | Access::SHADER_WRITE))
+ barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
+ if (dstAccessMask & Access::UNIFORM_READ)
+ barriers |= GL_UNIFORM_BARRIER_BIT;
+ }
+ }
+ if (barriers)
+ glMemoryBarrier(barriers);
+#endif
+}
+
void CDeviceCommandContext::SetTexture(const int32_t bindingSlot, ITexture* texture)
{
ENSURE(m_ShaderProgram);
@@ -1332,6 +1369,21 @@ void CDeviceCommandContext::SetStorageTexture(const int32_t bindingSlot, ITextur
#endif
}
+void CDeviceCommandContext::SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer)
+{
+#if !CONFIG2_GLES
+ if (bindingSlot < 0)
+ return;
+ ENSURE(m_ShaderProgram);
+ ENSURE(buffer);
+ ENSURE(buffer->GetUsage() & Renderer::Backend::IBuffer::Usage::STORAGE);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, m_ShaderProgram->GetStorageBuffer(bindingSlot), buffer->As()->GetHandle());
+#else
+ UNUSED2(bindingSlot);
+ UNUSED2(buffer);
+#endif
+}
+
void CDeviceCommandContext::SetUniform(
const int32_t bindingSlot,
const float value)
diff --git a/source/renderer/backend/gl/DeviceCommandContext.h b/source/renderer/backend/gl/DeviceCommandContext.h
index 02a2954d01..5f1a251007 100644
--- a/source/renderer/backend/gl/DeviceCommandContext.h
+++ b/source/renderer/backend/gl/DeviceCommandContext.h
@@ -129,9 +129,14 @@ public:
const uint32_t groupCountY,
const uint32_t groupCountZ) override;
+ void InsertMemoryBarrier(
+ const uint32_t srcStageMask, const uint32_t dstStageMask,
+ const uint32_t srcAccessMask, const uint32_t dstAccessMask) override;
+
void SetTexture(const int32_t bindingSlot, ITexture* texture) override;
void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) override;
+ void SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) override;
void SetUniform(
const int32_t bindingSlot,
diff --git a/source/renderer/backend/gl/ShaderProgram.cpp b/source/renderer/backend/gl/ShaderProgram.cpp
index 6c5471672f..ea78921630 100644
--- a/source/renderer/backend/gl/ShaderProgram.cpp
+++ b/source/renderer/backend/gl/ShaderProgram.cpp
@@ -28,6 +28,7 @@
#include "ps/Filesystem.h"
#include "ps/Profile.h"
#include "ps/XML/Xeromyces.h"
+#include "renderer/backend/gl/Buffer.h"
#include "renderer/backend/gl/Device.h"
#include "renderer/backend/gl/DeviceCommandContext.h"
@@ -80,17 +81,22 @@ GLint GLSizeFromFormat(const Format format)
{
GLint size = 1;
if (format == Renderer::Backend::Format::R32_SFLOAT ||
- format == Renderer::Backend::Format::R16_SINT)
+ format == Renderer::Backend::Format::R16_SINT ||
+ format == Renderer::Backend::Format::R16_SFLOAT)
size = 1;
else if (
format == Renderer::Backend::Format::R8G8_UNORM ||
format == Renderer::Backend::Format::R8G8_UINT ||
format == Renderer::Backend::Format::R16G16_SINT ||
+ format == Renderer::Backend::Format::R16G16_SFLOAT ||
format == Renderer::Backend::Format::R32G32_SFLOAT)
size = 2;
- else if (format == Renderer::Backend::Format::R32G32B32_SFLOAT)
+ else if (
+ format == Renderer::Backend::Format::R16G16B16_SFLOAT ||
+ format == Renderer::Backend::Format::R32G32B32_SFLOAT)
size = 3;
else if (
+ format == Renderer::Backend::Format::R16G16B16A16_SFLOAT ||
format == Renderer::Backend::Format::R32G32B32A32_SFLOAT ||
format == Renderer::Backend::Format::R8G8B8A8_UNORM ||
format == Renderer::Backend::Format::R8G8B8A8_UINT)
@@ -108,6 +114,13 @@ GLenum GLTypeFromFormat(const Format format)
format == Renderer::Backend::Format::R32G32B32_SFLOAT ||
format == Renderer::Backend::Format::R32G32B32A32_SFLOAT)
type = GL_FLOAT;
+#if !CONFIG2_GLES
+ else if (format == Renderer::Backend::Format::R16_SFLOAT ||
+ format == Renderer::Backend::Format::R16G16_SFLOAT ||
+ format == Renderer::Backend::Format::R16G16B16_SFLOAT ||
+ format == Renderer::Backend::Format::R16G16B16A16_SFLOAT)
+ type = GL_HALF_FLOAT;
+#endif
else if (
format == Renderer::Backend::Format::R16_SINT ||
format == Renderer::Backend::Format::R16G16_SINT)
@@ -444,6 +457,12 @@ public:
return textureUnit;
}
+ GLuint GetStorageBuffer(const int32_t UNUSED(bindingSlot)) override
+ {
+ debug_warn("ARB shaders don't support storage buffers.");
+ return 0;
+ }
+
void SetUniform(
const int32_t bindingSlot,
const float value) override
@@ -789,6 +808,59 @@ public:
std::vector occupiedUnits;
+#if !CONFIG2_GLES
+ const bool isStorageSupported{m_Device->GetCapabilities().storage};
+ if (isStorageSupported)
+ {
+ constexpr GLint maxBlockNameLength{128};
+ char name[maxBlockNameLength];
+
+ GLint maxUniformBlockNameLength{0};
+ glGetProgramInterfaceiv(m_Program, GL_UNIFORM_BLOCK, GL_MAX_NAME_LENGTH, &maxUniformBlockNameLength);
+ ogl_WarnIfError();
+
+ GLint numberOfActiveUniformBlocks{0};
+ glGetProgramInterfaceiv(m_Program, GL_UNIFORM_BLOCK, GL_ACTIVE_RESOURCES, &numberOfActiveUniformBlocks);
+ ogl_WarnIfError();
+ // Currently we support the only one uniform buffer per shader.
+ if (numberOfActiveUniformBlocks == 1)
+ {
+ GLsizei length{0};
+ glGetProgramResourceName(m_Program, GL_UNIFORM_BLOCK, 0, maxBlockNameLength, &length, name);
+
+ const GLuint location{glGetProgramResourceIndex(m_Program, GL_UNIFORM_BLOCK, name)};
+ glUniformBlockBinding(m_Program, location, location);
+
+ m_UniformBufferLocation = location;
+ }
+
+ GLint maxStorageNameLength{0};
+ glGetProgramInterfaceiv(m_Program, GL_SHADER_STORAGE_BLOCK, GL_MAX_NAME_LENGTH, &maxStorageNameLength);
+ ogl_WarnIfError();
+ ENSURE(maxStorageNameLength <= maxBlockNameLength);
+ GLint numberOfActiveStorages{0};
+ glGetProgramInterfaceiv(m_Program, GL_SHADER_STORAGE_BLOCK, GL_ACTIVE_RESOURCES, &numberOfActiveStorages);
+ ogl_WarnIfError();
+ for (GLint index{0}; index < numberOfActiveStorages; ++index)
+ {
+ GLsizei length{0};
+ glGetProgramResourceName(m_Program, GL_SHADER_STORAGE_BLOCK, index, maxBlockNameLength, &length, name);
+
+ const GLuint location{glGetProgramResourceIndex(m_Program, GL_SHADER_STORAGE_BLOCK, name)};
+ glShaderStorageBlockBinding(m_Program, location, location);
+
+ const CStrIntern nameIntern(name);
+
+ m_BindingSlotsMapping[nameIntern] = m_BindingSlots.size();
+ BindingSlot bindingSlot{};
+ bindingSlot.name = nameIntern;
+ bindingSlot.location = location;
+ bindingSlot.isStorageBuffer = true;
+ m_BindingSlots.emplace_back(std::move(bindingSlot));
+ }
+ }
+#endif
+
GLint numUniforms = 0;
glGetProgramiv(m_Program, GL_ACTIVE_UNIFORMS, &numUniforms);
ogl_WarnIfError();
@@ -824,6 +896,7 @@ public:
bindingSlot.size = size;
bindingSlot.type = type;
bindingSlot.isTexture = false;
+ bindingSlot.isStorageBuffer = false;
#define CASE(TYPE, ELEMENT_TYPE, ELEMENT_COUNT) \
case GL_ ## TYPE: \
@@ -871,6 +944,7 @@ public:
case GL_IMAGE_2D:
bindingSlot.elementType = GL_IMAGE_2D;
bindingSlot.isTexture = true;
+ m_HasImageUniforms = true;
break;
#endif
default:
@@ -895,6 +969,31 @@ public:
LOGERROR("CShaderProgramGLSL::Link: unsupported uniform type: 0x%04x", static_cast(type));
}
+#if !CONFIG2_GLES
+ if (isStorageSupported)
+ {
+ GLuint uniformIndex{0};
+ const GLchar* nameToQuery{name};
+ glGetUniformIndices(m_Program, 1, &nameToQuery, &uniformIndex);
+ ogl_WarnIfError();
+
+ GLint uniformOffset{0};
+ glGetActiveUniformsiv(m_Program, 1, &uniformIndex, GL_UNIFORM_OFFSET, &uniformOffset);
+ ogl_WarnIfError();
+
+ // According to the OpenGL spec:
+ // https://registry.khronos.org/OpenGL-Refpages/es3/html/glGetActiveUniformsiv.xhtml
+ // For uniforms in the default uniform block, -1 will be returned.
+ if (uniformOffset >= 0)
+ {
+ const uint32_t sizeInBytes{static_cast(bindingSlot.size * bindingSlot.elementCount * sizeof(float))};
+ m_UniformBufferSize = std::max(m_UniformBufferSize, uniformOffset + sizeInBytes);
+ bindingSlot.location = -1;
+ bindingSlot.offset = uniformOffset;
+ }
+ }
+#endif
+
m_BindingSlots.emplace_back(std::move(bindingSlot));
}
@@ -918,6 +1017,13 @@ public:
ogl_WarnIfError();
}
+ if (m_UniformBufferSize > 0 && m_UniformBufferLocation != -1)
+ {
+ m_UniformBuffer = m_Device->CreateBuffer(
+ "ShaderProgramUniformBuffer", IBuffer::Type::UNIFORM, m_UniformBufferSize,
+ IBuffer::Usage::DYNAMIC | IBuffer::Usage::TRANSFER_DST);
+ }
+
// TODO: verify that we're not using more samplers than is supported
Unbind();
@@ -933,6 +1039,8 @@ public:
ENSURE(this != previousShaderProgramGLSL);
glUseProgram(m_Program);
+ if (m_UniformBuffer)
+ glBindBufferBase(GL_UNIFORM_BUFFER, m_UniformBufferLocation, m_UniformBuffer->As()->GetHandle());
if (previousShaderProgramGLSL)
{
@@ -1009,6 +1117,15 @@ public:
return textureUnit;
}
+ GLuint GetStorageBuffer(const int32_t bindingSlot) override
+ {
+ if (bindingSlot < 0 || bindingSlot >= static_cast(m_BindingSlots.size()))
+ return 0;
+ if (!m_BindingSlots[bindingSlot].isStorageBuffer)
+ LOGERROR("CShaderProgramGLSL::GetStorageBuffer(): Invalid slot (expected storage buffer): '%s'", m_BindingSlots[bindingSlot].name.c_str());
+ return m_BindingSlots[bindingSlot].location;
+ }
+
void SetUniform(
const int32_t bindingSlot,
const float value) override
@@ -1094,6 +1211,17 @@ public:
const GLint location = m_BindingSlots[bindingSlot].location;
const GLenum type = m_BindingSlots[bindingSlot].type;
+ if (location == -1)
+ {
+ const uint32_t sizeInBytes{
+ static_cast(m_BindingSlots[bindingSlot].size * m_BindingSlots[bindingSlot].elementCount * sizeof(float))};
+ const uint32_t dataSizeToUpload{std::min(
+ static_cast(values.size() * sizeof(float)), sizeInBytes)};
+ m_Device->GetActiveCommandContext()->UploadBufferRegion(
+ m_UniformBuffer.get(), values.data(), m_BindingSlots[bindingSlot].offset, dataSizeToUpload);
+ return;
+ }
+
if (type == GL_FLOAT)
glUniform1fv(location, 1, values.data());
else if (type == GL_FLOAT_VEC2)
@@ -1172,14 +1300,20 @@ private:
{
CStrIntern name;
GLint location;
+ GLint offset;
GLint size;
GLenum type;
GLenum elementType;
GLint elementCount;
bool isTexture;
+ bool isStorageBuffer;
};
std::vector m_BindingSlots;
std::unordered_map m_BindingSlotsMapping;
+
+ GLint m_UniformBufferLocation{-1};
+ uint32_t m_UniformBufferSize{0};
+ std::unique_ptr m_UniformBuffer;
};
CShaderProgram::CShaderProgram(int streamflags)
diff --git a/source/renderer/backend/gl/ShaderProgram.h b/source/renderer/backend/gl/ShaderProgram.h
index 1f3305fd6d..90533355d0 100644
--- a/source/renderer/backend/gl/ShaderProgram.h
+++ b/source/renderer/backend/gl/ShaderProgram.h
@@ -116,6 +116,8 @@ public:
};
virtual TextureUnit GetTextureUnit(const int32_t bindingSlot) = 0;
+ virtual GLuint GetStorageBuffer(const int32_t bindingSlot) = 0;
+
virtual void SetUniform(
const int32_t bindingSlot,
const float value) = 0;
@@ -141,6 +143,8 @@ public:
bool IsStreamActive(const VertexAttributeStream stream) const;
+ bool HasImageUniforms() const { return m_HasImageUniforms; }
+
/**
* Checks that all the required vertex attributes have been set.
* Call this before calling Draw/DrawIndexed etc to avoid potential crashes.
@@ -161,6 +165,8 @@ protected:
void BindClientStates();
void UnbindClientStates();
int m_ValidStreams; // which streams have been specified via VertexPointer etc since the last Bind
+
+ bool m_HasImageUniforms{false};
};
} // namespace GL
diff --git a/source/renderer/backend/vulkan/Buffer.cpp b/source/renderer/backend/vulkan/Buffer.cpp
index 39b80c0d98..a1a23509db 100644
--- a/source/renderer/backend/vulkan/Buffer.cpp
+++ b/source/renderer/backend/vulkan/Buffer.cpp
@@ -40,6 +40,8 @@ VkBufferUsageFlags ToVkBufferUsageFlags(const uint32_t usage)
usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
if (usage & IBuffer::Usage::TRANSFER_DST)
usageFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ if (usage & IBuffer::Usage::STORAGE)
+ usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
return usageFlags;
}
@@ -51,19 +53,20 @@ std::tuple MakeCreati
switch (type)
{
case IBuffer::Type::VERTEX:
- ENSURE(usage & IBuffer::Usage::TRANSFER_DST);
+ ENSURE(usage & (IBuffer::Usage::TRANSFER_DST | IBuffer::Usage::STORAGE));
return {
commonFlags | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE};
case IBuffer::Type::INDEX:
- ENSURE(usage & IBuffer::Usage::TRANSFER_DST);
+ ENSURE(usage & (IBuffer::Usage::TRANSFER_DST | IBuffer::Usage::STORAGE));
return {
commonFlags | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE};
case IBuffer::Type::UPLOAD:
ENSURE(usage & IBuffer::Usage::TRANSFER_SRC);
+ ENSURE(!(usage & IBuffer::Usage::STORAGE));
return {
commonFlags,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
@@ -131,6 +134,8 @@ CBuffer::~CBuffer()
if (m_Allocation != VK_NULL_HANDLE)
m_Device->ScheduleObjectToDestroy(
VK_OBJECT_TYPE_BUFFER, m_Buffer, m_Allocation);
+
+ m_Device->ScheduleBufferToDestroy(m_UID);
}
IDevice* CBuffer::GetDevice()
diff --git a/source/renderer/backend/vulkan/DescriptorManager.cpp b/source/renderer/backend/vulkan/DescriptorManager.cpp
index ade79d7629..5a10d9e4d1 100644
--- a/source/renderer/backend/vulkan/DescriptorManager.cpp
+++ b/source/renderer/backend/vulkan/DescriptorManager.cpp
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@@ -27,6 +27,7 @@
#include "renderer/backend/vulkan/Utilities.h"
#include
+#include
#include
namespace Renderer
@@ -306,6 +307,56 @@ VkDescriptorSet CDescriptorManager::GetSingleTypeDescritorSet(
return set;
}
+VkDescriptorSet CDescriptorManager::GetSingleTypeDescritorSet(
+ VkDescriptorType type, VkDescriptorSetLayout layout,
+ const std::vector& buffersUID,
+ const std::vector& buffers)
+{
+ ENSURE(buffersUID.size() == buffers.size());
+ ENSURE(!buffersUID.empty());
+ const auto[set, justCreated] = GetSingleTypeDescritorSetImpl(type, layout, buffersUID);
+ if (!justCreated)
+ return set;
+
+ ENSURE(
+ type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
+ const VkPhysicalDeviceLimits& physicalDeviceLimits = m_Device->GetChoosenPhysicalDevice().properties.limits;
+ const uint32_t maxBufferRange =
+ type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
+ ? physicalDeviceLimits.maxStorageBufferRange
+ : physicalDeviceLimits.maxUniformBufferRange;
+
+ PS::StaticVector infos;
+ std::transform(buffers.begin(), buffers.end(), std::back_inserter(infos),
+ [maxBufferRange](CBuffer* buffer)
+ {
+ ENSURE(buffer);
+ ENSURE(buffer->GetUsage() & IBuffer::Usage::STORAGE);
+ ENSURE(buffer->GetSize() <= maxBufferRange);
+
+ VkDescriptorBufferInfo descriptorBufferInfo{};
+ descriptorBufferInfo.buffer = buffer->GetVkBuffer();
+ descriptorBufferInfo.offset = 0;
+ descriptorBufferInfo.range = buffer->GetSize();
+ return descriptorBufferInfo;
+ });
+
+ VkWriteDescriptorSet writeDescriptorSet{};
+ writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+ writeDescriptorSet.dstSet = set;
+ writeDescriptorSet.dstBinding = 0;
+ writeDescriptorSet.dstArrayElement = 0;
+ writeDescriptorSet.descriptorType = type;
+ writeDescriptorSet.descriptorCount = static_cast(infos.size());
+ writeDescriptorSet.pBufferInfo = infos.data();
+
+ vkUpdateDescriptorSets(
+ m_Device->GetVkDevice(), 1, &writeDescriptorSet, 0, nullptr);
+
+ return set;
+}
+
uint32_t CDescriptorManager::GetUniformSet() const
{
return m_UseDescriptorIndexing ? 1 : 0;
@@ -377,26 +428,36 @@ void CDescriptorManager::OnTextureDestroy(const DeviceObjectUID uid)
}
else
{
- auto it = m_UIDToSingleTypePoolMap.find(uid);
- if (it == m_UIDToSingleTypePoolMap.end())
- return;
- for (const auto& entry : it->second)
- {
- SingleTypePool& pool = GetSingleTypePool(entry.type, entry.size);
- SingleTypePool::Element& element = pool.elements[entry.elementIndex];
- // Multiple textures might be used by the same descriptor set and
- // we don't need to reset it if it was already.
- if (element.version == entry.version && element.nextFreeIndex == SingleTypePool::INVALID_INDEX)
- {
- ENSURE(pool.firstFreeIndex != entry.elementIndex);
- element.nextFreeIndex = pool.firstFreeIndex;
- pool.firstFreeIndex = entry.elementIndex;
- }
- }
- m_UIDToSingleTypePoolMap.erase(it);
+ OnDeviceObjectDestroy(uid);
}
}
+void CDescriptorManager::OnBufferDestroy(const DeviceObjectUID uid)
+{
+ OnDeviceObjectDestroy(uid);
+}
+
+void CDescriptorManager::OnDeviceObjectDestroy(const DeviceObjectUID uid)
+{
+ auto it = m_UIDToSingleTypePoolMap.find(uid);
+ if (it == m_UIDToSingleTypePoolMap.end())
+ return;
+ for (const auto& entry : it->second)
+ {
+ SingleTypePool& pool = GetSingleTypePool(entry.type, entry.size);
+ SingleTypePool::Element& element = pool.elements[entry.elementIndex];
+ // Multiple textures might be used by the same descriptor set and
+ // we don't need to reset it if it was already.
+ if (element.version == entry.version && element.nextFreeIndex == SingleTypePool::INVALID_INDEX)
+ {
+ ENSURE(pool.firstFreeIndex != entry.elementIndex);
+ element.nextFreeIndex = pool.firstFreeIndex;
+ pool.firstFreeIndex = entry.elementIndex;
+ }
+ }
+ m_UIDToSingleTypePoolMap.erase(it);
+}
+
} // namespace Vulkan
} // namespace Backend
diff --git a/source/renderer/backend/vulkan/DescriptorManager.h b/source/renderer/backend/vulkan/DescriptorManager.h
index f5c9904f0d..ec0e8c3485 100644
--- a/source/renderer/backend/vulkan/DescriptorManager.h
+++ b/source/renderer/backend/vulkan/DescriptorManager.h
@@ -20,6 +20,7 @@
#include "ps/CStrIntern.h"
#include "renderer/backend/Sampler.h"
+#include "renderer/backend/vulkan/Buffer.h"
#include "renderer/backend/vulkan/Device.h"
#include "renderer/backend/vulkan/Texture.h"
@@ -61,12 +62,19 @@ public:
const std::vector& texturesUID,
const std::vector& textures);
+ VkDescriptorSet GetSingleTypeDescritorSet(
+ VkDescriptorType type, VkDescriptorSetLayout layout,
+ const std::vector& buffersUID,
+ const std::vector& buffers);
+
uint32_t GetUniformSet() const;
uint32_t GetTextureDescriptor(CTexture* texture);
void OnTextureDestroy(const DeviceObjectUID uid);
+ void OnBufferDestroy(const DeviceObjectUID uid);
+
const VkDescriptorSetLayout& GetDescriptorIndexingSetLayout() const { return m_DescriptorIndexingSetLayout; }
const VkDescriptorSetLayout& GetUniformDescriptorSetLayout() const { return m_UniformDescriptorSetLayout; }
const VkDescriptorSet& GetDescriptorIndexingSet() { return m_DescriptorIndexingSet; }
@@ -94,6 +102,8 @@ private:
VkDescriptorType type, VkDescriptorSetLayout layout,
const std::vector& uids);
+ void OnDeviceObjectDestroy(const DeviceObjectUID uid);
+
CDevice* m_Device = nullptr;
bool m_UseDescriptorIndexing = false;
diff --git a/source/renderer/backend/vulkan/Device.cpp b/source/renderer/backend/vulkan/Device.cpp
index ad8780d55e..1b09503894 100644
--- a/source/renderer/backend/vulkan/Device.cpp
+++ b/source/renderer/backend/vulkan/Device.cpp
@@ -580,6 +580,7 @@ std::unique_ptr CDevice::Create(SDL_Window* window)
capabilities.ARBShaders = false;
capabilities.ARBShadersShadow = false;
capabilities.computeShaders = true;
+ capabilities.storage = choosenDevice.properties.limits.maxStorageBufferRange >= GiB;
capabilities.instancing = true;
capabilities.maxSampleCount = 1;
const VkSampleCountFlags sampleCountFlags =
@@ -657,7 +658,7 @@ CDevice::~CDevice()
m_SubmitScheduler.reset();
- ProcessTextureToDestroyQueue(true);
+ ProcessDeviceObjectToDestroyQueue(true);
m_RenderPassManager.reset();
m_SamplerManager.reset();
@@ -813,7 +814,7 @@ void CDevice::Present()
m_SubmitScheduler->Present(*m_SwapChain);
ProcessObjectToDestroyQueue();
- ProcessTextureToDestroyQueue();
+ ProcessDeviceObjectToDestroyQueue();
++m_FrameID;
}
@@ -928,6 +929,11 @@ void CDevice::ScheduleTextureToDestroy(const DeviceObjectUID uid)
m_TextureToDestroyQueue.push({m_FrameID, uid});
}
+void CDevice::ScheduleBufferToDestroy(const DeviceObjectUID uid)
+{
+ m_BufferToDestroyQueue.push({m_FrameID, uid});
+}
+
void CDevice::SetObjectName(VkObjectType type, const uint64_t handle, const char* name)
{
if (!m_Capabilities.debugLabels)
@@ -1013,7 +1019,7 @@ void CDevice::ProcessObjectToDestroyQueue(const bool ignoreFrameID)
}
}
-void CDevice::ProcessTextureToDestroyQueue(const bool ignoreFrameID)
+void CDevice::ProcessDeviceObjectToDestroyQueue(const bool ignoreFrameID)
{
while (!m_TextureToDestroyQueue.empty() &&
(ignoreFrameID || m_TextureToDestroyQueue.front().first + NUMBER_OF_FRAMES_IN_FLIGHT < m_FrameID))
@@ -1021,6 +1027,13 @@ void CDevice::ProcessTextureToDestroyQueue(const bool ignoreFrameID)
GetDescriptorManager().OnTextureDestroy(m_TextureToDestroyQueue.front().second);
m_TextureToDestroyQueue.pop();
}
+
+ while (!m_BufferToDestroyQueue.empty() &&
+ (ignoreFrameID || m_BufferToDestroyQueue.front().first + NUMBER_OF_FRAMES_IN_FLIGHT < m_FrameID))
+ {
+ GetDescriptorManager().OnBufferDestroy(m_BufferToDestroyQueue.front().second);
+ m_BufferToDestroyQueue.pop();
+ }
}
CTexture* CDevice::GetCurrentBackbufferTexture()
diff --git a/source/renderer/backend/vulkan/Device.h b/source/renderer/backend/vulkan/Device.h
index ede13bcd6b..5cd2cd58b8 100644
--- a/source/renderer/backend/vulkan/Device.h
+++ b/source/renderer/backend/vulkan/Device.h
@@ -145,6 +145,8 @@ public:
void ScheduleTextureToDestroy(const DeviceObjectUID uid);
+ void ScheduleBufferToDestroy(const DeviceObjectUID uid);
+
void SetObjectName(VkObjectType type, const void* handle, const char* name)
{
SetObjectName(type, reinterpret_cast(handle), name);
@@ -174,7 +176,7 @@ private:
void RecreateSwapChain();
bool IsSwapChainValid();
void ProcessObjectToDestroyQueue(const bool ignoreFrameID = false);
- void ProcessTextureToDestroyQueue(const bool ignoreFrameID = false);
+ void ProcessDeviceObjectToDestroyQueue(const bool ignoreFrameID = false);
bool IsFormatSupportedForUsage(const Format format, const uint32_t usage) const;
@@ -216,6 +218,7 @@ private:
};
std::queue m_ObjectToDestroyQueue;
std::queue> m_TextureToDestroyQueue;
+ std::queue> m_BufferToDestroyQueue;
std::unique_ptr m_RenderPassManager;
std::unique_ptr m_SamplerManager;
diff --git a/source/renderer/backend/vulkan/DeviceCommandContext.cpp b/source/renderer/backend/vulkan/DeviceCommandContext.cpp
index 054520749c..fd6d026217 100644
--- a/source/renderer/backend/vulkan/DeviceCommandContext.cpp
+++ b/source/renderer/backend/vulkan/DeviceCommandContext.cpp
@@ -29,6 +29,7 @@
#include "renderer/backend/vulkan/DescriptorManager.h"
#include "renderer/backend/vulkan/Device.h"
#include "renderer/backend/vulkan/Framebuffer.h"
+#include "renderer/backend/vulkan/Mapping.h"
#include "renderer/backend/vulkan/PipelineState.h"
#include "renderer/backend/vulkan/RingCommandContext.h"
#include "renderer/backend/vulkan/ShaderProgram.h"
@@ -51,7 +52,7 @@ namespace Vulkan
namespace
{
-constexpr uint32_t UNIFORM_BUFFER_INITIAL_SIZE = 1024 * 1024;
+constexpr uint32_t UNIFORM_BUFFER_INITIAL_SIZE = 1024 * 1024 * 32;
constexpr uint32_t FRAME_INPLACE_BUFFER_INITIAL_SIZE = 128 * 1024;
struct SBaseImageState
@@ -915,6 +916,17 @@ void CDeviceCommandContext::Dispatch(
m_ShaderProgram->PostDispatch(*m_CommandContext);
}
+void CDeviceCommandContext::InsertMemoryBarrier(
+ const uint32_t srcStageMask, const uint32_t dstStageMask,
+ const uint32_t srcAccessMask, const uint32_t dstAccessMask)
+{
+ ENSURE(!m_InsideFramebufferPass);
+ Utilities::SubmitMemoryBarrier(
+ m_CommandContext->GetCommandBuffer(),
+ Mapping::FromAccessMask(srcAccessMask), Mapping::FromAccessMask(dstAccessMask),
+ Mapping::FromPipelineStageMask(srcStageMask), Mapping::FromPipelineStageMask(dstStageMask));
+}
+
void CDeviceCommandContext::SetTexture(const int32_t bindingSlot, ITexture* texture)
{
if (bindingSlot < 0)
@@ -948,6 +960,16 @@ void CDeviceCommandContext::SetStorageTexture(const int32_t bindingSlot, ITextur
m_ShaderProgram->SetStorageTexture(bindingSlot, textureToBind);
}
+void CDeviceCommandContext::SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer)
+{
+ ENSURE(m_InsidePass || m_InsideComputePass);
+ ENSURE(buffer);
+ CBuffer* bufferToBind = buffer->As();
+ ENSURE(bufferToBind->GetType() == IBuffer::Type::VERTEX || bufferToBind->GetType() == IBuffer::Type::INDEX);
+ ENSURE(bufferToBind->GetUsage() & IBuffer::Usage::STORAGE);
+ m_ShaderProgram->SetStorageBuffer(bindingSlot, bufferToBind);
+}
+
void CDeviceCommandContext::SetUniform(
const int32_t bindingSlot,
const float value)
diff --git a/source/renderer/backend/vulkan/DeviceCommandContext.h b/source/renderer/backend/vulkan/DeviceCommandContext.h
index 03397b2d30..0bf489c73d 100644
--- a/source/renderer/backend/vulkan/DeviceCommandContext.h
+++ b/source/renderer/backend/vulkan/DeviceCommandContext.h
@@ -125,9 +125,14 @@ public:
const uint32_t groupCountY,
const uint32_t groupCountZ) override;
+ void InsertMemoryBarrier(
+ const uint32_t srcStageMask, const uint32_t dstStageMask,
+ const uint32_t srcAccessMask, const uint32_t dstAccessMask) override;
+
void SetTexture(const int32_t bindingSlot, ITexture* texture) override;
void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) override;
+ void SetStorageBuffer(const int32_t bindingSlot, IBuffer* buffer) override;
void SetUniform(
const int32_t bindingSlot,
diff --git a/source/renderer/backend/vulkan/Mapping.cpp b/source/renderer/backend/vulkan/Mapping.cpp
index ada46127fb..abf72eb329 100644
--- a/source/renderer/backend/vulkan/Mapping.cpp
+++ b/source/renderer/backend/vulkan/Mapping.cpp
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@@ -175,9 +175,15 @@ VkFormat FromFormat(const Format format)
CASE(R16_UNORM)
CASE(R16_UINT)
CASE(R16_SINT)
+ CASE(R16_SFLOAT)
CASE(R16G16_UNORM)
CASE(R16G16_UINT)
CASE(R16G16_SINT)
+ CASE(R16G16_SFLOAT)
+
+ CASE(R16G16B16_SFLOAT)
+
+ CASE(R16G16B16A16_SFLOAT)
CASE(R32_SFLOAT)
CASE(R32G32_SFLOAT)
@@ -273,6 +279,68 @@ VkAttachmentStoreOp FromAttachmentStoreOp(const AttachmentStoreOp storeOp)
return resultStoreOp;
}
+VkPipelineStageFlags FromPipelineStageMask(const uint32_t mask)
+{
+ VkPipelineStageFlags flags{0};
+ uint32_t checkedMask{0};
+#define CASE(NAME) \
+ if (mask & PipelineStage::NAME) { flags |= VK_PIPELINE_STAGE_##NAME##_BIT; checkedMask |= PipelineStage::NAME; }
+#define CASE2(NAME, VK_NAME) \
+ if (mask & PipelineStage::NAME) { flags |= VK_NAME; checkedMask |= PipelineStage::NAME; }
+
+ CASE(DRAW_INDIRECT)
+ CASE(VERTEX_INPUT)
+ CASE(VERTEX_SHADER)
+ CASE(FRAGMENT_SHADER)
+ CASE(EARLY_FRAGMENT_TESTS)
+ CASE(LATE_FRAGMENT_TESTS)
+ CASE(COLOR_ATTACHMENT_OUTPUT)
+ CASE(COMPUTE_SHADER)
+ CASE(TRANSFER)
+ CASE(HOST)
+ CASE2(ACCELERATION_STRUCTURE_BUILD, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR)
+ CASE2(RAY_TRACING_SHADER, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR)
+ CASE2(TASK_SHADER, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT)
+ CASE2(MESH_SHADER, VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT)
+#undef CASE
+#undef CASE2
+ ENSURE(mask == checkedMask);
+ return flags;
+}
+
+VkAccessFlags FromAccessMask(const uint32_t mask)
+{
+ VkAccessFlags flags{0};
+ uint32_t checkedMask{0};
+#define CASE(NAME) \
+ if (mask & Access::NAME) { flags |= VK_ACCESS_##NAME##_BIT; checkedMask |= Access::NAME; }
+#define CASE2(NAME, VK_NAME) \
+ if (mask & Access::NAME) { flags |= VK_NAME; checkedMask |= Access::NAME; }
+
+ CASE(INDIRECT_COMMAND_READ)
+ CASE(INDEX_READ)
+ CASE(VERTEX_ATTRIBUTE_READ)
+ CASE(UNIFORM_READ)
+ CASE(INPUT_ATTACHMENT_READ)
+ CASE(SHADER_READ)
+ CASE(SHADER_WRITE)
+ CASE(COLOR_ATTACHMENT_READ)
+ CASE(COLOR_ATTACHMENT_WRITE)
+ CASE(DEPTH_STENCIL_ATTACHMENT_READ)
+ CASE(DEPTH_STENCIL_ATTACHMENT_WRITE)
+ CASE(TRANSFER_READ)
+ CASE(TRANSFER_WRITE)
+ CASE(HOST_READ)
+ CASE(HOST_WRITE)
+ CASE(MEMORY_READ)
+ CASE(MEMORY_WRITE)
+ CASE2(ACCELERATION_STRUCTURE_READ, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR)
+ CASE2(ACCELERATION_STRUCTURE_WRITE, VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)
+#undef CASE
+ ENSURE(mask == checkedMask);
+ return flags;
+}
+
} // namespace Mapping
} // namespace Vulkan
diff --git a/source/renderer/backend/vulkan/Mapping.h b/source/renderer/backend/vulkan/Mapping.h
index 9a2ee4fa7d..19af3cbea9 100644
--- a/source/renderer/backend/vulkan/Mapping.h
+++ b/source/renderer/backend/vulkan/Mapping.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
@@ -18,6 +18,7 @@
#ifndef INCLUDED_RENDERER_BACKEND_VULKAN_MAPPING
#define INCLUDED_RENDERER_BACKEND_VULKAN_MAPPING
+#include "renderer/backend/Barrier.h"
#include "renderer/backend/Format.h"
#include "renderer/backend/IFramebuffer.h"
#include "renderer/backend/PipelineState.h"
@@ -61,6 +62,10 @@ VkAttachmentLoadOp FromAttachmentLoadOp(const AttachmentLoadOp loadOp);
VkAttachmentStoreOp FromAttachmentStoreOp(const AttachmentStoreOp storeOp);
+VkPipelineStageFlags FromPipelineStageMask(const uint32_t mask);
+
+VkAccessFlags FromAccessMask(const uint32_t mask);
+
} // namespace Mapping
} // namespace Vulkan
diff --git a/source/renderer/backend/vulkan/ShaderProgram.cpp b/source/renderer/backend/vulkan/ShaderProgram.cpp
index 7999eae7cc..54173d13d7 100644
--- a/source/renderer/backend/vulkan/ShaderProgram.cpp
+++ b/source/renderer/backend/vulkan/ShaderProgram.cpp
@@ -53,7 +53,8 @@ enum class BindingSlotType
PUSH_CONSTANT,
UNIFORM,
TEXTURE,
- STORAGE_IMAGE
+ STORAGE_IMAGE,
+ STORAGE_BUFFER
};
constexpr uint32_t BINDING_SLOT_TYPE_SHIFT{16u};
@@ -247,6 +248,10 @@ std::unique_ptr CShaderProgram::Create(
uint32_t storageImageDescriptorSetSize = 0;
std::unordered_map storageImageMapping;
+ VkDescriptorType storageBufferDescriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM;
+ uint32_t storageBufferDescriptorSetSize = 0;
+ std::unordered_map storageBufferMapping;
+
auto addDescriptorSets = [&](const XMBElement& element) -> bool
{
const bool useDescriptorIndexing =
@@ -325,15 +330,13 @@ std::unique_ptr CShaderProgram::Create(
texturesDescriptorSetSize =
std::max(texturesDescriptorSetSize, binding + 1);
}
- else if (type == "storageImage" || type == "storageBuffer")
+ else if (type == "storageImage")
{
const CStrIntern name{attributes.GetNamedItem(at_name)};
storageImageMapping[name] = binding;
storageImageDescriptorSetSize =
std::max(storageImageDescriptorSetSize, binding + 1);
- const VkDescriptorType descriptorType = type == "storageBuffer"
- ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
- : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ const VkDescriptorType descriptorType{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE};
if (storageImageDescriptorType == VK_DESCRIPTOR_TYPE_MAX_ENUM)
storageImageDescriptorType = descriptorType;
else if (storageImageDescriptorType != descriptorType)
@@ -342,6 +345,21 @@ std::unique_ptr CShaderProgram::Create(
return false;
}
}
+ else if (type == "storageBuffer")
+ {
+ const CStrIntern name{attributes.GetNamedItem(at_name)};
+ storageBufferMapping[name] = binding;
+ storageBufferDescriptorSetSize =
+ std::max(storageBufferDescriptorSetSize, binding + 1);
+ const VkDescriptorType descriptorType{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER};
+ if (storageBufferDescriptorType == VK_DESCRIPTOR_TYPE_MAX_ENUM)
+ storageBufferDescriptorType = descriptorType;
+ else if (storageBufferDescriptorType != descriptorType)
+ {
+ LOGERROR("Shader should have storages of the same type.");
+ return false;
+ }
+ }
else
{
LOGERROR("Unsupported binding: '%s'", type.c_str());
@@ -573,6 +591,12 @@ std::unique_ptr CShaderProgram::Create(
device, storageImageDescriptorType, storageImageDescriptorSetSize, std::move(storageImageMapping));
layouts.emplace_back(shaderProgram->m_StorageImageBinding->GetDescriptorSetLayout());
}
+ if (storageBufferDescriptorSetSize > 0)
+ {
+ shaderProgram->m_StorageBufferBinding.emplace(
+ device, storageBufferDescriptorType, storageBufferDescriptorSetSize, std::move(storageBufferMapping));
+ layouts.emplace_back(shaderProgram->m_StorageBufferBinding->GetDescriptorSetLayout());
+ }
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{};
pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
@@ -620,6 +644,8 @@ int32_t CShaderProgram::GetBindingSlot(const CStrIntern name) const
return (static_cast(BindingSlotType::TEXTURE) << BINDING_SLOT_TYPE_SHIFT) | bindingSlot;
if (const int32_t bindingSlot = m_StorageImageBinding.has_value() ? m_StorageImageBinding->GetBindingSlot(name) : -1; bindingSlot != -1)
return (static_cast(BindingSlotType::STORAGE_IMAGE) << BINDING_SLOT_TYPE_SHIFT) | bindingSlot;
+ if (const int32_t bindingSlot = m_StorageBufferBinding.has_value() ? m_StorageBufferBinding->GetBindingSlot(name) : -1; bindingSlot != -1)
+ return (static_cast(BindingSlotType::STORAGE_BUFFER) << BINDING_SLOT_TYPE_SHIFT) | bindingSlot;
return -1;
}
@@ -646,6 +672,8 @@ void CShaderProgram::Unbind()
m_TextureBinding->Unbind();
if (m_StorageImageBinding.has_value())
m_StorageImageBinding->Unbind();
+ if (m_StorageBufferBinding.has_value())
+ m_StorageBufferBinding->Unbind();
}
void CShaderProgram::PreDraw(CRingCommandContext& commandContext)
@@ -730,6 +758,15 @@ void CShaderProgram::BindOutdatedDescriptorSets(
constexpr uint32_t STORAGE_IMAGE_BINDING_SET = 2u;
descriptortSets.emplace_back(STORAGE_IMAGE_BINDING_SET, m_StorageImageBinding->UpdateAndReturnDescriptorSet());
}
+ if (m_StorageBufferBinding.has_value() && m_StorageBufferBinding->IsOutdated())
+ {
+ // Currently we assume that in computer shaders we use either textures
+ // or buffers but not together.
+ const uint32_t STORAGE_BUFFER_BINDING_SET{
+ m_Device->GetDescriptorManager().UseDescriptorIndexing() ? 2u : 1u};
+ descriptortSets.emplace_back(
+ STORAGE_BUFFER_BINDING_SET, m_StorageBufferBinding->UpdateAndReturnDescriptorSet());
+ }
for (const auto& [firstSet, descriptorSet] : descriptortSets)
{
@@ -801,8 +838,7 @@ std::pair CShaderProgram::GetUniformData(
m_MaterialConstantsDataOutdated = true;
const uint32_t size = uniform.size;
const uint32_t offset = uniform.offset;
- ENSURE(size <= dataSize);
- return {m_MaterialConstantsData.get() + offset, size};
+ return {m_MaterialConstantsData.get() + offset, std::min(dataSize, size)};
}
}
@@ -841,6 +877,16 @@ void CShaderProgram::SetStorageTexture(const int32_t bindingSlot, CTexture* text
m_StorageImageBinding->SetObject(index, texture);
}
+void CShaderProgram::SetStorageBuffer(const int32_t bindingSlot, CBuffer* buffer)
+{
+ if (bindingSlot < 0)
+ return;
+ ENSURE(static_cast(bindingSlot >> BINDING_SLOT_TYPE_SHIFT) == BindingSlotType::STORAGE_BUFFER);
+ const uint32_t index{bindingSlot & BINDING_SLOT_VALUE_MASK};
+ ENSURE(m_StorageBufferBinding.has_value());
+ m_StorageBufferBinding->SetObject(index, buffer);
+}
+
} // namespace Vulkan
} // namespace Backend
diff --git a/source/renderer/backend/vulkan/ShaderProgram.h b/source/renderer/backend/vulkan/ShaderProgram.h
index ee79c3d8af..ec1b81e429 100644
--- a/source/renderer/backend/vulkan/ShaderProgram.h
+++ b/source/renderer/backend/vulkan/ShaderProgram.h
@@ -122,6 +122,7 @@ public:
void SetTexture(const int32_t bindingSlot, CTexture* texture);
void SetStorageTexture(const int32_t bindingSlot, CTexture* texture);
+ void SetStorageBuffer(const int32_t bindingSlot, CBuffer* buffer);
// TODO: rename to something related to buffer.
bool IsMaterialConstantsDataOutdated() const { return m_MaterialConstantsDataOutdated; }
@@ -178,6 +179,7 @@ private:
std::optional> m_TextureBinding;
std::optional> m_StorageImageBinding;
+ std::optional> m_StorageBufferBinding;
std::unordered_map m_StreamLocations;
};