2007-05-26 09:57:39 -07:00
|
|
|
/**
|
|
|
|
|
* =========================================================================
|
|
|
|
|
* File : tsc.cpp
|
|
|
|
|
* Project : 0 A.D.
|
|
|
|
|
* Description : Timer implementation using RDTSC
|
|
|
|
|
* =========================================================================
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// license: GPL; see lib/license.txt
|
|
|
|
|
|
|
|
|
|
#include "precompiled.h"
|
|
|
|
|
#include "tsc.h"
|
|
|
|
|
|
2007-05-30 17:11:38 -07:00
|
|
|
#include "lib/bits.h"
|
2008-05-13 12:43:02 -07:00
|
|
|
#include "lib/sysdep/os_cpu.h"
|
2008-06-30 10:34:18 -07:00
|
|
|
#include "lib/sysdep/os/win/win.h"
|
|
|
|
|
#include "lib/sysdep/os/win/wutil.h"
|
2007-05-26 09:57:39 -07:00
|
|
|
|
2008-05-12 11:15:08 -07:00
|
|
|
#if ARCH_IA32 || ARCH_AMD64
|
2008-06-30 10:34:18 -07:00
|
|
|
# include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc
|
|
|
|
|
# include "lib/sysdep/arch/x86_x64/topology.h"
|
2008-04-19 11:10:00 -07:00
|
|
|
#endif
|
|
|
|
|
|
2007-05-26 09:57:39 -07:00
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
2007-05-30 17:11:38 -07:00
|
|
|
// detect throttling
|
2007-05-26 09:57:39 -07:00
|
|
|
|
2007-05-30 17:11:38 -07:00
|
|
|
enum AmdPowerNowFlags
|
2007-05-26 09:57:39 -07:00
|
|
|
{
|
2007-05-30 17:11:38 -07:00
|
|
|
PN_FREQ_ID_CTRL = BIT(1),
|
|
|
|
|
PN_SW_THERMAL_CTRL = BIT(5),
|
|
|
|
|
PN_INVARIANT_TSC = BIT(8)
|
2007-05-26 09:57:39 -07:00
|
|
|
};
|
|
|
|
|
|
2007-05-30 17:11:38 -07:00
|
|
|
static bool IsThrottlingPossible()
|
2007-05-29 09:28:34 -07:00
|
|
|
{
|
2008-05-12 11:15:08 -07:00
|
|
|
#if ARCH_IA32 || ARCH_AMD64
|
|
|
|
|
x86_x64_CpuidRegs regs;
|
|
|
|
|
switch(x86_x64_Vendor())
|
2007-05-29 09:28:34 -07:00
|
|
|
{
|
2008-05-12 11:15:08 -07:00
|
|
|
case X86_X64_VENDOR_INTEL:
|
|
|
|
|
if(x86_x64_cap(X86_X64_CAP_TM_SCC) || x86_x64_cap(X86_X64_CAP_EST))
|
2007-05-30 17:11:38 -07:00
|
|
|
return true;
|
|
|
|
|
break;
|
2007-05-29 09:28:34 -07:00
|
|
|
|
2008-05-12 11:15:08 -07:00
|
|
|
case X86_X64_VENDOR_AMD:
|
had to remove uint and ulong from lib/types.h due to conflict with other library.
this snowballed into a massive search+destroy of the hodgepodge of
mostly equivalent types we had in use (int, uint, unsigned, unsigned
int, i32, u32, ulong, uintN).
it is more efficient to use 64-bit types in 64-bit mode, so the
preferred default is size_t (for anything remotely resembling a size or
index). tile coordinates are ssize_t to allow more efficient conversion
to/from floating point. flags are int because we almost never need more
than 15 distinct bits, bit test/set is not slower and int is fastest to
type. finally, some data that is pretty much directly passed to OpenGL
is now typed accordingly.
after several hours, the code now requires fewer casts and less
guesswork.
other changes:
- unit and player IDs now have an "invalid id" constant in the
respective class to avoid casting and -1
- fix some endian/64-bit bugs in the map (un)packing. added a
convenience function to write/read a size_t.
- ia32: change CPUID interface to allow passing in ecx (required for
cache topology detection, which I need at work). remove some unneeded
functions from asm, replace with intrinsics where possible.
This was SVN commit r5942.
2008-05-11 11:48:32 -07:00
|
|
|
regs.eax = 0x80000007;
|
2008-05-12 11:15:08 -07:00
|
|
|
if(x86_x64_cpuid(®s))
|
2007-05-29 09:28:34 -07:00
|
|
|
{
|
had to remove uint and ulong from lib/types.h due to conflict with other library.
this snowballed into a massive search+destroy of the hodgepodge of
mostly equivalent types we had in use (int, uint, unsigned, unsigned
int, i32, u32, ulong, uintN).
it is more efficient to use 64-bit types in 64-bit mode, so the
preferred default is size_t (for anything remotely resembling a size or
index). tile coordinates are ssize_t to allow more efficient conversion
to/from floating point. flags are int because we almost never need more
than 15 distinct bits, bit test/set is not slower and int is fastest to
type. finally, some data that is pretty much directly passed to OpenGL
is now typed accordingly.
after several hours, the code now requires fewer casts and less
guesswork.
other changes:
- unit and player IDs now have an "invalid id" constant in the
respective class to avoid casting and -1
- fix some endian/64-bit bugs in the map (un)packing. added a
convenience function to write/read a size_t.
- ia32: change CPUID interface to allow passing in ecx (required for
cache topology detection, which I need at work). remove some unneeded
functions from asm, replace with intrinsics where possible.
This was SVN commit r5942.
2008-05-11 11:48:32 -07:00
|
|
|
if(regs.edx & (PN_FREQ_ID_CTRL|PN_SW_THERMAL_CTRL))
|
2007-05-30 17:11:38 -07:00
|
|
|
return true;
|
2007-05-29 09:28:34 -07:00
|
|
|
}
|
2007-05-30 17:11:38 -07:00
|
|
|
break;
|
2007-05-29 09:28:34 -07:00
|
|
|
}
|
2007-05-30 17:11:38 -07:00
|
|
|
return false;
|
2008-04-19 11:10:00 -07:00
|
|
|
#endif
|
2007-05-29 09:28:34 -07:00
|
|
|
}
|
2007-05-28 02:25:38 -07:00
|
|
|
|
2007-05-26 09:57:39 -07:00
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
|
|
2007-05-28 02:25:38 -07:00
|
|
|
LibError CounterTSC::Activate()
|
2007-05-26 09:57:39 -07:00
|
|
|
{
|
2008-05-12 11:15:08 -07:00
|
|
|
#if ARCH_IA32 || ARCH_AMD64
|
|
|
|
|
if(!x86_x64_cap(X86_X64_CAP_TSC))
|
2007-05-28 02:25:38 -07:00
|
|
|
return ERR::NO_SYS; // NOWARN (CPU doesn't support RDTSC)
|
2008-04-19 11:10:00 -07:00
|
|
|
#endif
|
2007-05-26 09:57:39 -07:00
|
|
|
|
2007-05-28 02:25:38 -07:00
|
|
|
return INFO::OK;
|
2007-05-26 09:57:39 -07:00
|
|
|
}
|
|
|
|
|
|
2007-05-28 02:25:38 -07:00
|
|
|
void CounterTSC::Shutdown()
|
2007-05-26 09:57:39 -07:00
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2007-05-28 02:25:38 -07:00
|
|
|
bool CounterTSC::IsSafe() const
|
2007-05-26 09:57:39 -07:00
|
|
|
{
|
2007-05-30 17:11:38 -07:00
|
|
|
// use of the TSC for timing is subject to a litany of potential problems:
|
|
|
|
|
// - separate, unsynchronized counters with offset and drift;
|
|
|
|
|
// - frequency changes (P-state transitions and STPCLK throttling);
|
|
|
|
|
// - failure to increment in C3 and C4 deep-sleep states.
|
|
|
|
|
// we will discuss the specifics below.
|
|
|
|
|
|
|
|
|
|
// SMP or multi-core => counters are unsynchronized. this could be
|
|
|
|
|
// solved by maintaining separate per-core counter states, but that
|
|
|
|
|
// requires atomic reads of the TSC and the current processor number.
|
|
|
|
|
//
|
|
|
|
|
// (otherwise, we have a subtle race condition: if preempted while
|
|
|
|
|
// reading the time and rescheduled on a different core, incorrect
|
|
|
|
|
// results may be returned, which would be unacceptable.)
|
|
|
|
|
//
|
|
|
|
|
// unfortunately this isn't possible without OS support or the
|
|
|
|
|
// as yet unavailable RDTSCP instruction => unsafe.
|
|
|
|
|
//
|
|
|
|
|
// (note: if the TSC is invariant, drift is no longer a concern.
|
|
|
|
|
// we could synchronize the TSC MSRs during initialization and avoid
|
|
|
|
|
// per-core counter state and the abovementioned race condition.
|
|
|
|
|
// however, we won't bother, since such platforms aren't yet widespread
|
|
|
|
|
// and would surely support the nice and safe HPET, anyway)
|
2008-05-13 12:43:02 -07:00
|
|
|
{
|
|
|
|
|
WinScopedLock lock(WHRT_CS);
|
|
|
|
|
const CpuTopology* topology = cpu_topology_Detect();
|
|
|
|
|
if(cpu_topology_NumPackages(topology) != 1 || cpu_topology_CoresPerPackage(topology) != 1)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2007-05-30 17:11:38 -07:00
|
|
|
|
2008-05-12 11:15:08 -07:00
|
|
|
#if ARCH_IA32 || ARCH_AMD64
|
2007-05-30 17:11:38 -07:00
|
|
|
// recent CPU:
|
2008-05-12 11:15:08 -07:00
|
|
|
if(x86_x64_Generation() >= 7)
|
2007-05-26 09:57:39 -07:00
|
|
|
{
|
2007-05-30 17:11:38 -07:00
|
|
|
// note: 8th generation CPUs support C1-clock ramping, which causes
|
|
|
|
|
// drift on multi-core systems, but those were excluded above.
|
|
|
|
|
|
2008-05-12 11:15:08 -07:00
|
|
|
x86_x64_CpuidRegs regs;
|
had to remove uint and ulong from lib/types.h due to conflict with other library.
this snowballed into a massive search+destroy of the hodgepodge of
mostly equivalent types we had in use (int, uint, unsigned, unsigned
int, i32, u32, ulong, uintN).
it is more efficient to use 64-bit types in 64-bit mode, so the
preferred default is size_t (for anything remotely resembling a size or
index). tile coordinates are ssize_t to allow more efficient conversion
to/from floating point. flags are int because we almost never need more
than 15 distinct bits, bit test/set is not slower and int is fastest to
type. finally, some data that is pretty much directly passed to OpenGL
is now typed accordingly.
after several hours, the code now requires fewer casts and less
guesswork.
other changes:
- unit and player IDs now have an "invalid id" constant in the
respective class to avoid casting and -1
- fix some endian/64-bit bugs in the map (un)packing. added a
convenience function to write/read a size_t.
- ia32: change CPUID interface to allow passing in ecx (required for
cache topology detection, which I need at work). remove some unneeded
functions from asm, replace with intrinsics where possible.
This was SVN commit r5942.
2008-05-11 11:48:32 -07:00
|
|
|
regs.eax = 0x80000007;
|
2008-05-12 11:15:08 -07:00
|
|
|
if(x86_x64_cpuid(®s))
|
2007-05-30 17:11:38 -07:00
|
|
|
{
|
|
|
|
|
// TSC is invariant WRT P-state, C-state and STPCLK => safe.
|
had to remove uint and ulong from lib/types.h due to conflict with other library.
this snowballed into a massive search+destroy of the hodgepodge of
mostly equivalent types we had in use (int, uint, unsigned, unsigned
int, i32, u32, ulong, uintN).
it is more efficient to use 64-bit types in 64-bit mode, so the
preferred default is size_t (for anything remotely resembling a size or
index). tile coordinates are ssize_t to allow more efficient conversion
to/from floating point. flags are int because we almost never need more
than 15 distinct bits, bit test/set is not slower and int is fastest to
type. finally, some data that is pretty much directly passed to OpenGL
is now typed accordingly.
after several hours, the code now requires fewer casts and less
guesswork.
other changes:
- unit and player IDs now have an "invalid id" constant in the
respective class to avoid casting and -1
- fix some endian/64-bit bugs in the map (un)packing. added a
convenience function to write/read a size_t.
- ia32: change CPUID interface to allow passing in ecx (required for
cache topology detection, which I need at work). remove some unneeded
functions from asm, replace with intrinsics where possible.
This was SVN commit r5942.
2008-05-11 11:48:32 -07:00
|
|
|
if(regs.edx & PN_INVARIANT_TSC)
|
2007-05-30 17:11:38 -07:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// in addition to P-state transitions, we're also subject to
|
|
|
|
|
// STPCLK throttling. this happens when the chipset thinks the
|
|
|
|
|
// system is dangerously overheated; the OS isn't even notified.
|
|
|
|
|
// it may be rare, but could cause incorrect results => unsafe.
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// newer systems also support the C3 Deep Sleep state, in which
|
|
|
|
|
// the TSC isn't incremented. that's not nice, but irrelevant
|
|
|
|
|
// since STPCLK dooms the TSC on those systems anyway.
|
2007-05-26 09:57:39 -07:00
|
|
|
}
|
2008-04-19 11:10:00 -07:00
|
|
|
#endif
|
2007-05-26 09:57:39 -07:00
|
|
|
|
2007-05-30 17:11:38 -07:00
|
|
|
// we're dealing with a single older CPU; the only problem there is
|
|
|
|
|
// throttling, i.e. changes to the TSC frequency. we don't want to
|
|
|
|
|
// disable this because it may be important for cooling. the OS
|
|
|
|
|
// initiates changes but doesn't notify us; jumps are too frequent
|
|
|
|
|
// and drastic to detect and account for => unsafe.
|
|
|
|
|
if(IsThrottlingPossible())
|
|
|
|
|
return false;
|
2007-05-26 09:57:39 -07:00
|
|
|
|
2007-05-30 17:11:38 -07:00
|
|
|
return true;
|
2007-05-26 09:57:39 -07:00
|
|
|
}
|
|
|
|
|
|
2007-05-28 02:25:38 -07:00
|
|
|
u64 CounterTSC::Counter() const
|
2007-05-26 09:57:39 -07:00
|
|
|
{
|
2008-05-12 11:15:08 -07:00
|
|
|
return x86_x64_rdtsc();
|
2007-05-26 09:57:39 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* WHRT uses this to ensure the counter (running at nominal frequency)
|
|
|
|
|
* doesn't overflow more than once during CALIBRATION_INTERVAL_MS.
|
|
|
|
|
**/
|
had to remove uint and ulong from lib/types.h due to conflict with other library.
this snowballed into a massive search+destroy of the hodgepodge of
mostly equivalent types we had in use (int, uint, unsigned, unsigned
int, i32, u32, ulong, uintN).
it is more efficient to use 64-bit types in 64-bit mode, so the
preferred default is size_t (for anything remotely resembling a size or
index). tile coordinates are ssize_t to allow more efficient conversion
to/from floating point. flags are int because we almost never need more
than 15 distinct bits, bit test/set is not slower and int is fastest to
type. finally, some data that is pretty much directly passed to OpenGL
is now typed accordingly.
after several hours, the code now requires fewer casts and less
guesswork.
other changes:
- unit and player IDs now have an "invalid id" constant in the
respective class to avoid casting and -1
- fix some endian/64-bit bugs in the map (un)packing. added a
convenience function to write/read a size_t.
- ia32: change CPUID interface to allow passing in ecx (required for
cache topology detection, which I need at work). remove some unneeded
functions from asm, replace with intrinsics where possible.
This was SVN commit r5942.
2008-05-11 11:48:32 -07:00
|
|
|
size_t CounterTSC::CounterBits() const
|
2007-05-26 09:57:39 -07:00
|
|
|
{
|
|
|
|
|
return 64;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* initial measurement of the tick rate. not necessarily correct
|
2008-05-13 12:43:02 -07:00
|
|
|
* (e.g. when using TSC: os_cpu_ClockFrequency isn't exact).
|
2007-05-26 09:57:39 -07:00
|
|
|
**/
|
2007-05-28 02:25:38 -07:00
|
|
|
double CounterTSC::NominalFrequency() const
|
2007-05-26 09:57:39 -07:00
|
|
|
{
|
2008-05-13 12:43:02 -07:00
|
|
|
// WARNING: do not call x86_x64_ClockFrequency because it uses the
|
|
|
|
|
// HRT, which we're currently in the process of initializing.
|
|
|
|
|
// instead query CPU clock frequency via OS.
|
|
|
|
|
//
|
|
|
|
|
// note: even here, initial accuracy isn't critical because the
|
|
|
|
|
// clock is subject to thermal drift and would require continual
|
|
|
|
|
// recalibration anyway.
|
|
|
|
|
return os_cpu_ClockFrequency();
|
2007-05-26 09:57:39 -07:00
|
|
|
}
|