2025-04-16 13:38:34 -07:00
|
|
|
/* Copyright (C) 2025 Wildfire Games.
|
2011-11-29 13:04:38 -08:00
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
|
* a copy of this software and associated documentation files (the
|
|
|
|
|
* "Software"), to deal in the Software without restriction, including
|
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
|
* the following conditions:
|
2016-11-23 05:02:58 -08:00
|
|
|
*
|
2011-11-29 13:04:38 -08:00
|
|
|
* The above copyright notice and this permission notice shall be included
|
|
|
|
|
* in all copies or substantial portions of the Software.
|
2016-11-23 05:02:58 -08:00
|
|
|
*
|
2011-11-29 13:04:38 -08:00
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
|
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
|
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
|
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "precompiled.h"
|
|
|
|
|
|
|
|
|
|
#include "Profiler2GPU.h"
|
|
|
|
|
|
|
|
|
|
#include "ps/ConfigDB.h"
|
|
|
|
|
#include "ps/Profiler2.h"
|
2022-05-08 15:02:46 -07:00
|
|
|
#include "ps/VideoMode.h"
|
2022-10-25 15:45:54 -07:00
|
|
|
#include "renderer/backend/IDevice.h"
|
2025-04-16 13:38:34 -07:00
|
|
|
#include "renderer/Renderer.h"
|
2011-11-29 13:04:38 -08:00
|
|
|
|
2020-11-18 03:35:36 -08:00
|
|
|
#include <deque>
|
|
|
|
|
#include <stack>
|
2021-11-25 09:33:17 -08:00
|
|
|
#include <vector>
|
2020-11-18 03:35:36 -08:00
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
/**
|
2011-11-29 13:04:38 -08:00
|
|
|
* At each enter/leave-region event, we do an async GPU timestamp query.
|
|
|
|
|
* When all the queries for a frame have their results available,
|
2025-04-16 13:38:34 -07:00
|
|
|
* we convert their GPU timestamps into durations and record the data.
|
2011-11-29 13:04:38 -08:00
|
|
|
*/
|
2025-04-16 13:38:34 -07:00
|
|
|
class CProfiler2GPUImpl
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
NONCOPYABLE(CProfiler2GPUImpl);
|
2022-03-28 10:32:09 -07:00
|
|
|
|
2011-11-29 13:04:38 -08:00
|
|
|
struct SEvent
|
|
|
|
|
{
|
|
|
|
|
const char* id;
|
2025-04-16 13:38:34 -07:00
|
|
|
uint32_t queryHandle;
|
2011-11-29 13:04:38 -08:00
|
|
|
bool isEnter; // true if entering region; false if leaving
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct SFrame
|
|
|
|
|
{
|
|
|
|
|
u32 num;
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
double syncTimeStart; // CPU time at start of this frame.
|
2011-11-29 13:04:38 -08:00
|
|
|
|
|
|
|
|
std::vector<SEvent> events;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
std::deque<SFrame> m_Frames;
|
|
|
|
|
|
|
|
|
|
public:
|
2025-04-16 13:38:34 -07:00
|
|
|
CProfiler2GPUImpl(CProfiler2& profiler)
|
|
|
|
|
: m_Profiler(profiler), m_Storage(*new CProfiler2::ThreadStorage(profiler, "gpu"))
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2022-03-28 10:32:09 -07:00
|
|
|
m_Storage.RecordSyncMarker(m_Profiler.GetTime());
|
|
|
|
|
m_Storage.Record(CProfiler2::ITEM_EVENT, m_Profiler.GetTime(), "thread start");
|
|
|
|
|
|
|
|
|
|
m_Profiler.AddThreadStorage(&m_Storage);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
~CProfiler2GPUImpl()
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
|
|
|
|
while (!m_Frames.empty())
|
|
|
|
|
PopFrontFrame();
|
2022-03-28 10:32:09 -07:00
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
for (const uint32_t queryHandle : m_FreeQueries)
|
|
|
|
|
g_VideoMode.GetBackendDevice()->FreeQuery(queryHandle);
|
|
|
|
|
m_FreeQueries.clear();
|
2022-03-28 10:32:09 -07:00
|
|
|
|
|
|
|
|
m_Profiler.RemoveThreadStorage(&m_Storage);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void FrameStart(Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
|
|
|
|
ProcessFrames();
|
|
|
|
|
|
|
|
|
|
SFrame frame;
|
|
|
|
|
frame.num = m_Profiler.GetFrameNumber();
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
// GL backend:
|
2011-11-29 13:04:38 -08:00
|
|
|
// On (at least) some NVIDIA Windows drivers, when GPU-bound, or when
|
|
|
|
|
// vsync enabled and not CPU-bound, the first glGet* call at the start
|
|
|
|
|
// of a frame appears to trigger a wait (to stop the GPU getting too
|
|
|
|
|
// far behind, or to wait for the vsync period).
|
|
|
|
|
// That will be this GL_TIMESTAMP get, which potentially distorts the
|
|
|
|
|
// reported results. So we'll only do it fairly rarely, and for most
|
|
|
|
|
// frames we'll just assume the clocks don't drift much
|
2016-11-23 06:09:58 -08:00
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
// Timestamps might shift and overflow for all backends. So for
|
|
|
|
|
// simplicity we don't synchronize the frame start on CPU and GPU. As
|
|
|
|
|
// we only need durations. We just roughly assume that the first
|
|
|
|
|
// timestamp on GPU matches the CPU frame start. For real timestamps
|
|
|
|
|
// it's better to use GPU Trace instruments.
|
2011-11-29 13:04:38 -08:00
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
frame.syncTimeStart = m_Profiler.GetTime();
|
2011-11-29 13:04:38 -08:00
|
|
|
m_Frames.push_back(frame);
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
RegionEnter(deviceCommandContext, "frame");
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void FrameEnd(Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
RegionLeave(deviceCommandContext, "frame");
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void RecordRegion(Renderer::Backend::IDeviceCommandContext* deviceCommandContext, const char* id, bool isEnter)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
|
|
|
|
ENSURE(!m_Frames.empty());
|
|
|
|
|
SFrame& frame = m_Frames.back();
|
|
|
|
|
|
|
|
|
|
SEvent event;
|
|
|
|
|
event.id = id;
|
2025-04-16 13:38:34 -07:00
|
|
|
event.queryHandle = NewQuery();
|
2011-11-29 13:04:38 -08:00
|
|
|
event.isEnter = isEnter;
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
deviceCommandContext->InsertTimestampQuery(event.queryHandle, isEnter);
|
2011-11-29 13:04:38 -08:00
|
|
|
|
|
|
|
|
frame.events.push_back(event);
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void RegionEnter(Renderer::Backend::IDeviceCommandContext* deviceCommandContext, const char* id)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
RecordRegion(deviceCommandContext, id, true);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void RegionLeave(Renderer::Backend::IDeviceCommandContext* deviceCommandContext, const char* id)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
RecordRegion(deviceCommandContext, id, false);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
|
|
void ProcessFrames()
|
|
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
Renderer::Backend::IDevice* device{g_VideoMode.GetBackendDevice()};
|
2011-11-29 13:04:38 -08:00
|
|
|
while (!m_Frames.empty())
|
|
|
|
|
{
|
|
|
|
|
SFrame& frame = m_Frames.front();
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
// We assume queries become available in order so we only need to
|
|
|
|
|
// check the last one.
|
|
|
|
|
if (!device->IsQueryResultAvailable(frame.events.back().queryHandle))
|
2011-11-29 13:04:38 -08:00
|
|
|
break;
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
// We use the first event GPU timestamp as a frame start.
|
|
|
|
|
const uint64_t firstFrameTimestamp{!frame.events.empty()
|
|
|
|
|
? device->GetQueryResult(frame.events[0].queryHandle) : 0u};
|
|
|
|
|
|
|
|
|
|
const double timestampMultiplier{
|
|
|
|
|
device->GetCapabilities().timestampMultiplier};
|
|
|
|
|
|
|
|
|
|
std::vector<std::pair<int, uint64_t>> stack;
|
2011-11-29 13:04:38 -08:00
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
// The frame's queries are now available, so retrieve and record all their results:
|
2011-11-29 13:04:38 -08:00
|
|
|
for (size_t i = 0; i < frame.events.size(); ++i)
|
|
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
const uint64_t queryTimestamp{
|
|
|
|
|
i == 0 ? firstFrameTimestamp : device->GetQueryResult(frame.events[i].queryHandle)};
|
|
|
|
|
ENSURE(queryTimestamp >= firstFrameTimestamp);
|
2011-11-29 13:04:38 -08:00
|
|
|
|
|
|
|
|
// Convert to absolute CPU-clock time
|
2025-04-16 13:38:34 -07:00
|
|
|
const double t{
|
|
|
|
|
frame.syncTimeStart + static_cast<double>(queryTimestamp - firstFrameTimestamp) * timestampMultiplier};
|
2011-11-29 13:04:38 -08:00
|
|
|
|
|
|
|
|
// Record a frame-start for syncing
|
|
|
|
|
if (i == 0)
|
|
|
|
|
m_Storage.RecordFrameStart(t);
|
|
|
|
|
|
|
|
|
|
if (frame.events[i].isEnter)
|
|
|
|
|
m_Storage.Record(CProfiler2::ITEM_ENTER, t, frame.events[i].id);
|
|
|
|
|
else
|
2016-06-25 02:11:10 -07:00
|
|
|
m_Storage.RecordLeave(t);
|
2011-11-29 13:04:38 -08:00
|
|
|
|
|
|
|
|
// Associate the frame number with the "frame" region
|
|
|
|
|
if (i == 0)
|
2012-01-01 08:43:10 -08:00
|
|
|
m_Storage.RecordAttributePrintf("%u", frame.num);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PopFrontFrame();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void PopFrontFrame()
|
|
|
|
|
{
|
|
|
|
|
ENSURE(!m_Frames.empty());
|
|
|
|
|
SFrame& frame = m_Frames.front();
|
|
|
|
|
for (size_t i = 0; i < frame.events.size(); ++i)
|
2025-04-16 13:38:34 -07:00
|
|
|
m_FreeQueries.push_back(frame.events[i].queryHandle);
|
2011-11-29 13:04:38 -08:00
|
|
|
m_Frames.pop_front();
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
// Returns a new backend query handle (or a recycled old one).
|
|
|
|
|
uint32_t NewQuery()
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2022-03-28 10:32:09 -07:00
|
|
|
if (m_FreeQueries.empty())
|
2025-04-16 13:38:34 -07:00
|
|
|
return g_VideoMode.GetBackendDevice()->AllocateQuery();
|
2011-11-29 13:04:38 -08:00
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
const uint32_t queryHandle{m_FreeQueries.back()};
|
2022-03-28 10:32:09 -07:00
|
|
|
m_FreeQueries.pop_back();
|
2025-04-16 13:38:34 -07:00
|
|
|
return queryHandle;
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2022-03-28 10:32:09 -07:00
|
|
|
CProfiler2& m_Profiler;
|
|
|
|
|
CProfiler2::ThreadStorage& m_Storage;
|
2011-11-29 13:04:38 -08:00
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
std::vector<uint32_t> m_FreeQueries; // query objects that are allocated but not currently in used
|
2011-11-29 13:04:38 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
CProfiler2GPU::CProfiler2GPU(CProfiler2& profiler) :
|
2022-03-28 10:32:09 -07:00
|
|
|
m_Profiler(profiler)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
if (g_ConfigDB.Get("profiler2.gpu.enable", false) && g_VideoMode.GetBackendDevice()->GetCapabilities().timestamps)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
m_Impl = std::make_unique<CProfiler2GPUImpl>(m_Profiler);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-28 10:32:09 -07:00
|
|
|
CProfiler2GPU::~CProfiler2GPU() = default;
|
2011-11-29 13:04:38 -08:00
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void CProfiler2GPU::FrameStart(Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
if (m_Impl)
|
|
|
|
|
m_Impl->FrameStart(deviceCommandContext);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void CProfiler2GPU::FrameEnd(Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
if (m_Impl)
|
|
|
|
|
m_Impl->FrameEnd(deviceCommandContext);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void CProfiler2GPU::RegionEnter(
|
|
|
|
|
Renderer::Backend::IDeviceCommandContext* deviceCommandContext, const char* id)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
if (m_Impl)
|
|
|
|
|
m_Impl->RegionEnter(deviceCommandContext, id);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|
|
|
|
|
|
2025-04-16 13:38:34 -07:00
|
|
|
void CProfiler2GPU::RegionLeave(
|
|
|
|
|
Renderer::Backend::IDeviceCommandContext* deviceCommandContext, const char* id)
|
2011-11-29 13:04:38 -08:00
|
|
|
{
|
2025-04-16 13:38:34 -07:00
|
|
|
if (m_Impl)
|
|
|
|
|
m_Impl->RegionLeave(deviceCommandContext, id);
|
2011-11-29 13:04:38 -08:00
|
|
|
}
|