mirror of
https://gitea.wildfiregames.com/0ad/0ad
synced 2026-06-20 07:13:56 -07:00
Make include-what-you-use happy with some files in source/lib and fix what needs to be fixed. Ref: #8086 Signed-off-by: Ralph Sennhauser <ralph.sennhauser@gmail.com>
515 lines
14 KiB
C++
515 lines
14 KiB
C++
/* Copyright (C) 2025 Wildfire Games.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
/*
|
|
* CPU-specific routines common to 32 and 64-bit x86
|
|
*/
|
|
|
|
#include "precompiled.h"
|
|
|
|
#include "x86_x64.h"
|
|
|
|
#include "lib/bits.h"
|
|
#include "lib/code_annotation.h"
|
|
#include "lib/debug.h"
|
|
#include "lib/lib.h"
|
|
#include "lib/module_init.h"
|
|
#include "lib/posix/posix_pthread.h"
|
|
#include "lib/secure_crt.h"
|
|
#include "lib/status.h"
|
|
#include "lib/sysdep/cpu.h"
|
|
#include "lib/timer.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <vector>
|
|
|
|
#if MSC_VERSION
|
|
# include <intrin.h> // __rdtsc
|
|
#endif
|
|
|
|
namespace x86_x64 {
|
|
|
|
#if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729
|
|
// VC10+ and VC9 SP1: __cpuidex is already available
|
|
#elif GCC_VERSION
|
|
# define __cpuidex(regsArray, level, index)\
|
|
__asm__ __volatile__ ("cpuid"\
|
|
: "=a" ((regsArray)[0]), "=b" ((regsArray)[1]), "=c" ((regsArray)[2]), "=d" ((regsArray)[3])\
|
|
: "0" (level), "2" (index));
|
|
#else
|
|
# error "compiler not supported"
|
|
#endif
|
|
|
|
|
|
// some of this module's functions are frequently called but require
|
|
// non-trivial initialization, so caching is helpful. isInitialized
|
|
// flags aren't thread-safe, so we use ModuleInit. calling it from
|
|
// every function is a bit wasteful, but it is convenient to avoid
|
|
// requiring users to pass around a global state object.
|
|
// one big Init() would be prone to deadlock if its subroutines also
|
|
// call a public function (that re-enters ModuleInit), so each
|
|
// function gets its own initState.
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// CPUID
|
|
|
|
static void Invoke_cpuid(CpuidRegs* regs)
|
|
{
|
|
cassert(sizeof(regs->eax) == sizeof(int));
|
|
cassert(sizeof(*regs) == 4*sizeof(int));
|
|
__cpuidex((int*)regs, regs->eax, regs->ecx);
|
|
}
|
|
|
|
static u32 cpuid_maxFunction;
|
|
static u32 cpuid_maxExtendedFunction;
|
|
|
|
static Status InitCpuid()
|
|
{
|
|
CpuidRegs regs = { 0 };
|
|
|
|
regs.eax = 0;
|
|
Invoke_cpuid(®s);
|
|
cpuid_maxFunction = regs.eax;
|
|
|
|
regs.eax = 0x80000000;
|
|
Invoke_cpuid(®s);
|
|
cpuid_maxExtendedFunction = regs.eax;
|
|
|
|
return INFO::OK;
|
|
}
|
|
|
|
bool cpuid(CpuidRegs* regs)
|
|
{
|
|
static ModuleInitState initState{ 0 };
|
|
ModuleInit(&initState, InitCpuid);
|
|
|
|
const u32 function = regs->eax;
|
|
if(function > cpuid_maxExtendedFunction)
|
|
return false;
|
|
if(function < 0x80000000 && function > cpuid_maxFunction)
|
|
return false;
|
|
|
|
Invoke_cpuid(regs);
|
|
return true;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// capability bits
|
|
|
|
// treated as 128 bit field; order: std ecx, std edx, ext ecx, ext edx
|
|
// keep in sync with enum Cap!
|
|
static u32 caps[4];
|
|
|
|
static ModuleInitState capsInitState{ 0 };
|
|
|
|
static Status InitCaps()
|
|
{
|
|
CpuidRegs regs = { 0 };
|
|
regs.eax = 1;
|
|
if(cpuid(®s))
|
|
{
|
|
caps[0] = regs.ecx;
|
|
caps[1] = regs.edx;
|
|
}
|
|
regs.eax = 0x80000001;
|
|
if(cpuid(®s))
|
|
{
|
|
caps[2] = regs.ecx;
|
|
caps[3] = regs.edx;
|
|
}
|
|
|
|
return INFO::OK;
|
|
}
|
|
|
|
bool Cap(Caps cap)
|
|
{
|
|
ModuleInit(&capsInitState, InitCaps);
|
|
|
|
const size_t index = cap >> 5;
|
|
const size_t bit = cap & 0x1F;
|
|
if(index >= ARRAY_SIZE(caps))
|
|
{
|
|
DEBUG_WARN_ERR(ERR::INVALID_PARAM);
|
|
return false;
|
|
}
|
|
return IsBitSet(caps[index], bit);
|
|
}
|
|
|
|
void GetCapBits(u32* d0, u32* d1, u32* d2, u32* d3)
|
|
{
|
|
ModuleInit(&capsInitState, InitCaps);
|
|
|
|
*d0 = caps[0];
|
|
*d1 = caps[1];
|
|
*d2 = caps[2];
|
|
*d3 = caps[3];
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// vendor
|
|
|
|
static Vendors vendor;
|
|
|
|
static Status InitVendor()
|
|
{
|
|
CpuidRegs regs = { 0 };
|
|
regs.eax = 0;
|
|
if(!cpuid(®s))
|
|
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
|
|
|
|
// copy regs to string
|
|
// note: 'strange' ebx,edx,ecx reg order is due to ModR/M encoding order.
|
|
char vendorString[13];
|
|
memcpy(&vendorString[0], ®s.ebx, 4);
|
|
memcpy(&vendorString[4], ®s.edx, 4);
|
|
memcpy(&vendorString[8], ®s.ecx, 4);
|
|
vendorString[12] = '\0'; // 0-terminate
|
|
|
|
if(!strcmp(vendorString, "AuthenticAMD"))
|
|
vendor = x86_x64::VENDOR_AMD;
|
|
else if(!strcmp(vendorString, "GenuineIntel"))
|
|
vendor = x86_x64::VENDOR_INTEL;
|
|
else
|
|
{
|
|
DEBUG_WARN_ERR(ERR::CPU_UNKNOWN_VENDOR);
|
|
vendor = x86_x64::VENDOR_UNKNOWN;
|
|
}
|
|
|
|
return INFO::OK;
|
|
}
|
|
|
|
Vendors Vendor()
|
|
{
|
|
static ModuleInitState initState{ 0 };
|
|
ModuleInit(&initState, InitVendor);
|
|
return vendor;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// signature
|
|
|
|
static size_t m_Model;
|
|
static size_t m_Family;
|
|
static ModuleInitState signatureInitState{ 0 };
|
|
|
|
static Status InitSignature()
|
|
{
|
|
CpuidRegs regs = { 0 };
|
|
regs.eax = 1;
|
|
if(!cpuid(®s))
|
|
DEBUG_WARN_ERR(ERR::CPU_FEATURE_MISSING);
|
|
m_Model = bits(regs.eax, 4, 7);
|
|
m_Family = bits(regs.eax, 8, 11);
|
|
const size_t extendedModel = bits(regs.eax, 16, 19);
|
|
const size_t extendedFamily = bits(regs.eax, 20, 27);
|
|
if(m_Family == 0xF)
|
|
m_Family += extendedFamily;
|
|
if(m_Family == 0xF || (Vendor() == x86_x64::VENDOR_INTEL && m_Family == 6))
|
|
m_Model += extendedModel << 4;
|
|
return INFO::OK;
|
|
}
|
|
|
|
size_t Model()
|
|
{
|
|
ModuleInit(&signatureInitState, InitSignature);
|
|
return m_Model;
|
|
}
|
|
|
|
size_t Family()
|
|
{
|
|
ModuleInit(&signatureInitState, InitSignature);
|
|
return m_Family;
|
|
}
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// identifier string
|
|
|
|
/// functor to remove substrings from the CPU identifier string
|
|
class StringStripper
|
|
{
|
|
public:
|
|
StringStripper(char* string, size_t max_chars)
|
|
: m_string(string), m_max_chars(max_chars)
|
|
{
|
|
}
|
|
|
|
// remove all instances of substring from m_string
|
|
void operator()(const char* substring)
|
|
{
|
|
const size_t substring_length = strlen(substring);
|
|
for(;;)
|
|
{
|
|
char* substring_pos = strstr(m_string, substring);
|
|
if(!substring_pos)
|
|
break;
|
|
const size_t substring_ofs = substring_pos - m_string;
|
|
const size_t num_chars = m_max_chars - substring_ofs - substring_length;
|
|
memmove(substring_pos, substring_pos+substring_length, num_chars);
|
|
}
|
|
}
|
|
|
|
private:
|
|
char* m_string;
|
|
size_t m_max_chars;
|
|
};
|
|
|
|
// 3 calls x 4 registers x 4 bytes = 48 + 0-terminator
|
|
static char identifierString[48+1];
|
|
|
|
static Status InitIdentifierString()
|
|
{
|
|
// get brand string (if available)
|
|
char* pos = identifierString;
|
|
bool gotBrandString = true;
|
|
for(u32 function = 0x80000002; function <= 0x80000004; function++)
|
|
{
|
|
CpuidRegs regs = { 0 };
|
|
regs.eax = function;
|
|
gotBrandString &= cpuid(®s);
|
|
memcpy(pos, ®s, 16);
|
|
pos += 16;
|
|
}
|
|
|
|
// fall back to manual detect of CPU type because either:
|
|
// - CPU doesn't support brand string (we use a flag to indicate this
|
|
// rather than comparing against a default value because it is safer);
|
|
// - the brand string is useless, e.g. "Unknown". this happens on
|
|
// some older boards whose BIOS reprograms the string for CPUs it
|
|
// doesn't recognize.
|
|
if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0)
|
|
{
|
|
const size_t family = Family();
|
|
const size_t model = Model();
|
|
switch(Vendor())
|
|
{
|
|
case x86_x64::VENDOR_AMD:
|
|
// everything else is either too old, or should have a brand string.
|
|
if(family == 6)
|
|
{
|
|
if(model == 3 || model == 7)
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Duron");
|
|
else if(model <= 5)
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon");
|
|
else
|
|
{
|
|
if(Cap(x86_x64::CAP_AMD_MP))
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon MP");
|
|
else
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "AMD Athlon XP");
|
|
}
|
|
}
|
|
break;
|
|
|
|
case x86_x64::VENDOR_INTEL:
|
|
// everything else is either too old, or should have a brand string.
|
|
if(family == 6)
|
|
{
|
|
if(model == 1)
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium Pro");
|
|
else if(model == 3 || model == 5)
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium II");
|
|
else if(model == 6)
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Celeron");
|
|
else
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Intel Pentium III");
|
|
}
|
|
break;
|
|
|
|
default:
|
|
strcpy_s(identifierString, ARRAY_SIZE(identifierString), "Unknown, non-Intel/AMD");
|
|
break;
|
|
}
|
|
}
|
|
// identifierString already holds a valid brand string; pretty it up.
|
|
else
|
|
{
|
|
const char* const undesiredStrings[] = { "(tm)", "(TM)", "(R)", "CPU ", " " };
|
|
std::for_each(undesiredStrings, undesiredStrings+ARRAY_SIZE(undesiredStrings),
|
|
StringStripper(identifierString, strlen(identifierString)+1));
|
|
|
|
// note: Intel brand strings include a frequency, but we can't rely
|
|
// on it because the CPU may be overclocked. we'll leave it in the
|
|
// string to show measurement accuracy and if SpeedStep is active.
|
|
}
|
|
|
|
return INFO::OK;
|
|
}
|
|
|
|
static const char* IdentifierString()
|
|
{
|
|
static ModuleInitState initState{ 0 };
|
|
ModuleInit(&initState, InitIdentifierString);
|
|
return identifierString;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// miscellaneous stateless functions
|
|
|
|
#if !MSC_VERSION // ensure not already defined in header
|
|
u64 rdtsc()
|
|
{
|
|
#if GCC_VERSION
|
|
// GCC supports "portable" assembly for both x86 and x64
|
|
volatile u32 lo, hi;
|
|
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
|
|
return u64_from_u32(hi, lo);
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
|
|
void DebugBreak()
|
|
{
|
|
#if MSC_VERSION
|
|
__debugbreak();
|
|
#elif GCC_VERSION
|
|
// note: this probably isn't necessary, since unix_debug_break
|
|
// (SIGTRAP) is most probably available if GCC_VERSION.
|
|
// we include it for completeness, though.
|
|
__asm__ __volatile__ ("int $3");
|
|
#endif
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// CPU frequency
|
|
|
|
// set scheduling priority and restore when going out of scope.
|
|
class ScopedSetPriority
|
|
{
|
|
public:
|
|
ScopedSetPriority(int newPriority)
|
|
{
|
|
// get current scheduling policy and priority
|
|
pthread_getschedparam(pthread_self(), &m_oldPolicy, &m_oldParam);
|
|
|
|
// set new priority
|
|
sched_param newParam = {0};
|
|
newParam.sched_priority = newPriority;
|
|
pthread_setschedparam(pthread_self(), SCHED_FIFO, &newParam);
|
|
}
|
|
|
|
~ScopedSetPriority()
|
|
{
|
|
// restore previous policy and priority.
|
|
pthread_setschedparam(pthread_self(), m_oldPolicy, &m_oldParam);
|
|
}
|
|
|
|
private:
|
|
int m_oldPolicy;
|
|
sched_param m_oldParam;
|
|
};
|
|
|
|
// note: this function uses timer.cpp!timer_Time, which is implemented via
|
|
// whrt.cpp on Windows.
|
|
double ClockFrequency()
|
|
{
|
|
// if the TSC isn't available, there's really no good way to count the
|
|
// actual CPU clocks per known time interval, so bail.
|
|
// note: loop iterations ("bogomips") are not a reliable measure due
|
|
// to differing IPC and compiler optimizations.
|
|
if(!Cap(x86_x64::CAP_TSC))
|
|
return -1.0; // impossible value
|
|
|
|
// increase priority to reduce interference while measuring.
|
|
const int priority = sched_get_priority_max(SCHED_FIFO)-1;
|
|
ScopedSetPriority ssp(priority);
|
|
|
|
// note: no need to "warm up" cpuid - it will already have been
|
|
// called several times by the time this code is reached.
|
|
// (background: it's used in rdtsc() to serialize instruction flow;
|
|
// the first call is documented to be slower on Intel CPUs)
|
|
|
|
size_t numSamples = 16;
|
|
// if clock is low-res, do less samples so it doesn't take too long.
|
|
// balance measuring time (~ 10 ms) and accuracy (< 0.1% error -
|
|
// ok for using the TSC as a time reference)
|
|
if(timer_Resolution() >= 1e-3)
|
|
numSamples = 8;
|
|
std::vector<double> samples(numSamples);
|
|
|
|
for(size_t i = 0; i < numSamples; i++)
|
|
{
|
|
double dt;
|
|
i64 dc; // (i64 instead of u64 for faster conversion to double)
|
|
|
|
// count # of clocks in max{1 tick, 1 ms}:
|
|
// .. wait for start of tick.
|
|
const double t0 = timer_Time();
|
|
u64 c1; double t1;
|
|
do
|
|
{
|
|
// note: timer_Time effectively has a long delay (up to 5 us)
|
|
// before returning the time. we call it before rdtsc to
|
|
// minimize the delay between actually sampling time / TSC,
|
|
// thus decreasing the chance for interference.
|
|
// (if unavoidable background activity, e.g. interrupts,
|
|
// delays the second reading, inaccuracy is introduced).
|
|
t1 = timer_Time();
|
|
c1 = rdtsc();
|
|
}
|
|
while(t1 == t0);
|
|
// .. wait until start of next tick and at least 1 ms elapsed.
|
|
do
|
|
{
|
|
const double t2 = timer_Time();
|
|
const u64 c2 = rdtsc();
|
|
dc = (i64)(c2 - c1);
|
|
dt = t2 - t1;
|
|
}
|
|
while(dt < 1e-3);
|
|
|
|
// .. freq = (delta_clocks) / (delta_seconds);
|
|
// rdtsc/timer overhead is negligible.
|
|
const double freq = dc / dt;
|
|
samples[i] = freq;
|
|
}
|
|
|
|
std::sort(samples.begin(), samples.end());
|
|
|
|
// median filter (remove upper and lower 25% and average the rest).
|
|
// note: don't just take the lowest value! it could conceivably be
|
|
// too low, if background processing delays reading c1 (see above).
|
|
double sum = 0.0;
|
|
const size_t lo = numSamples/4, hi = 3*numSamples/4;
|
|
for(size_t i = lo; i < hi; i++)
|
|
sum += samples[i];
|
|
|
|
const double clockFrequency = sum / (hi-lo);
|
|
return clockFrequency;
|
|
}
|
|
|
|
} // namespace x86_x64
|
|
|
|
|
|
const char* cpu_IdentifierString()
|
|
{
|
|
return x86_x64::IdentifierString();
|
|
}
|