diff --git a/source/lib/bits.h b/source/lib/bits.h index a9b2e6d4af..1ec4248b2f 100644 --- a/source/lib/bits.h +++ b/source/lib/bits.h @@ -40,7 +40,7 @@ template T Bit(size_t n) { const T one = T(1); - return (one << n); + return (T)(one << n); } /** @@ -71,16 +71,14 @@ bool IsBitSet(T value, size_t index) template T bit_mask(size_t numBits) { - if(numBits == 0) // prevent shift count == bitsInT, which would be undefined. - return 0; - // notes: - // - the perhaps more intuitive (1 << numBits)-1 cannot - // handle numBits == bitsInT, but this implementation does. - // - though bulky, the below statements avoid sign-conversion warnings. const T bitsInT = sizeof(T)*CHAR_BIT; - T mask(0); - mask = T(~mask); - mask >>= T(bitsInT-numBits); + const T allBits = (T)~T(0); + // (shifts of at least bitsInT are undefined) + if(numBits >= bitsInT) + return allBits; + // (note: the previous allBits >> (bitsInT-numBits) is not safe + // because right-shifts of negative numbers are undefined.) + const T mask = T(T(1) << numBits)-1; return mask; } @@ -98,12 +96,31 @@ T bit_mask(size_t numBits) template inline T bits(T num, size_t lo_idx, size_t hi_idx) { - const size_t count = (hi_idx - lo_idx)+1; // # bits to return + const size_t numBits = (hi_idx - lo_idx)+1; // # bits to return T result = T(num >> lo_idx); - result = T(result & bit_mask(count)); + result = T(result & bit_mask(numBits)); return result; } +/** + * set the value of bits hi_idx:lo_idx + * + * @param lo_idx bit index of lowest bit to include + * @param hi_idx bit index of highest bit to include + * @param value new value to be assigned to these bits + **/ +template +inline T SetBitsTo(T num, size_t lo_idx, size_t hi_idx, size_t value) +{ + const size_t numBits = (hi_idx - lo_idx)+1; + debug_assert(value < (T(1) << numBits)); + const T mask = bit_mask(numBits) << lo_idx; + T result = num & ~mask; + result = T(result | (value << lo_idx)); + return result; +} + + /** * @return number of 1-bits in mask **/ @@ -127,7 +144,7 @@ size_t PopulationCount(T mask) * @return whether the given number is a power of two. **/ template -bool is_pow2(T n) +inline bool is_pow2(T n) { // 0 would pass the test below but isn't a POT. if(n == 0) @@ -135,6 +152,19 @@ bool is_pow2(T n) return (n & (n-1)) == 0; } +template +inline T LeastSignificantBit(T x) +{ + const T negX = T(~x + 1); // 2's complement (avoids 'negating unsigned type' warning) + return x & negX; +} + +template +inline T ClearLeastSignificantBit(T x) +{ + return x & (x-1); +} + /** * ceil(log2(x)) * diff --git a/source/lib/code_annotation.h b/source/lib/code_annotation.h index a63acb6f0f..3aceeef810 100644 --- a/source/lib/code_annotation.h +++ b/source/lib/code_annotation.h @@ -43,60 +43,48 @@ /** -"unreachable code" helpers - -unreachable lines of code are often the source or symptom of subtle bugs. -they are flagged by compiler warnings; however, the opposite problem - -erroneously reaching certain spots (e.g. due to missing return statement) -is worse and not detected automatically. - -to defend against this, the programmer can annotate their code to -indicate to humans that a particular spot should never be reached. -however, that isn't much help; better is a sentinel that raises an -error if if it is actually reached. hence, the UNREACHABLE macro. - -ironically, if the code guarded by UNREACHABLE works as it should, -compilers may flag the macro's code as unreachable. this would -distract from genuine warnings, which is unacceptable. - -even worse, compilers differ in their code checking: GCC only complains if -non-void functions end without returning a value (i.e. missing return -statement), while VC checks if lines are unreachable (e.g. if they are -preceded by a return on all paths). - -our implementation of UNREACHABLE solves this dilemna as follows: -- on GCC: call abort(); since it has the noreturn attributes, the - "non-void" warning disappears. -- on VC: avoid generating any code. we allow the compiler to assume the - spot is actually unreachable, which incidentally helps optimization. - if reached after all, a crash usually results. in that case, compile with - CONFIG_PARANOIA, which will cause an error message to be displayed. - -this approach still allows for the possiblity of automated -checking, but does not cause any compiler warnings. -**/ + * "unreachable code" helpers + * + * unreachable lines of code are often the source or symptom of subtle bugs. + * they are flagged by compiler warnings; however, the opposite problem - + * erroneously reaching certain spots (e.g. due to missing return statement) + * is worse and not detected automatically. + * + * to defend against this, the programmer can annotate their code to + * indicate to humans that a particular spot should never be reached. + * however, that isn't much help; better is a sentinel that raises an + * error if if it is actually reached. hence, the UNREACHABLE macro. + * + * ironically, if the code guarded by UNREACHABLE works as it should, + * compilers may flag the macro's code as unreachable. this would + * distract from genuine warnings, which is unacceptable. + * + * even worse, compilers differ in their code checking: GCC only complains if + * non-void functions end without returning a value (i.e. missing return + * statement), while VC checks if lines are unreachable (e.g. if they are + * preceded by a return on all paths). + * + * the implementation below enables optimization and automated checking + * without raising warnings. + **/ #define UNREACHABLE // actually defined below.. this is for # undef UNREACHABLE // CppDoc's benefit only. -// 1) final build: optimize assuming this location cannot be reached. -// may crash if that turns out to be untrue, but removes checking overhead. -#if CONFIG_FINAL +// compiler supports ASSUME_UNREACHABLE => allow it to assume the code is +// never reached (improves optimization at the cost of undefined behavior +// if the annotation turns out to be incorrect). +#if HAVE_ASSUME_UNREACHABLE && !CONFIG_PARANOIA # define UNREACHABLE ASSUME_UNREACHABLE -// 2) normal build: +// otherwise (or if CONFIG_PARANOIA is set), add a user-visible +// warning if the code is reached. note that abort() fails to stop +// ICC from warning about the lack of a return statement, so we +// use an infinite loop instead. #else -// a) normal implementation: includes "abort", which is declared with -// noreturn attribute and therefore avoids GCC's "execution reaches -// end of non-void function" warning. -# if !MSC_VERSION || ICC_VERSION || CONFIG_PARANOIA -# define UNREACHABLE\ +# define UNREACHABLE\ STMT(\ debug_assert(0); /* hit supposedly unreachable code */\ - abort();\ + for(;;){};\ ) -// b) VC only: don't generate any code; squelch the warning and optimize. -# else -# define UNREACHABLE ASSUME_UNREACHABLE -# endif #endif /** diff --git a/source/lib/sysdep/arch/x86_x64/topology.cpp b/source/lib/sysdep/arch/x86_x64/topology.cpp index af24f6b51b..7326dd0f8c 100644 --- a/source/lib/sysdep/arch/x86_x64/topology.cpp +++ b/source/lib/sysdep/arch/x86_x64/topology.cpp @@ -116,7 +116,7 @@ static size_t MaxLogicalPerCache() //----------------------------------------------------------------------------- -// determination of enabled cores/HTs +// APIC IDs // APIC IDs consist of variable-length fields identifying the logical unit, // core, package and shared cache. if they are available, we can determine @@ -174,106 +174,102 @@ const u8* ApicIds() } -/** - * count the number of unique APIC IDs after application of a mask. - * - * this is used to implement NumUniqueValuesInField and also required - * for counting the number of caches. - **/ -static size_t NumUniqueMaskedValues(const u8* apicIds, u8 mask) +// (if maxValues == 1, the field is zero-width and thus zero) +static size_t ApicField(size_t apicId, size_t indexOfLowestBit, size_t maxValues) { - std::set ids; - for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) - { - const u8 apicId = apicIds[processor]; - const u8 field = u8(apicId & mask); - ids.insert(field); - } - - return ids.size(); + const size_t numBits = ceil_log2(maxValues); + const size_t mask = bit_mask(numBits); + return (apicId >> indexOfLowestBit) & mask; } -/** - * Count the number of values assumed by a certain field within APIC IDs. - * - * @param apicIds - * @param offset Index of the lowest bit that is part of the field. - * @param numValues Number of values that can be assumed by the field. - * If equal to one, the field is zero-width. - * @return number of unique values (for convenience of the topology code, - * this is always at least one) - **/ -static size_t NumUniqueValuesInField(const u8* apicIds, size_t offset, size_t numValues) -{ - if(numValues == 1) // see parameter description above - return 1; - const size_t numBits = ceil_log2(numValues); - const u8 mask = u8((bit_mask(numBits) << offset) & 0xFF); - return NumUniqueMaskedValues(apicIds, mask); -} - - -static size_t MinPackages(size_t maxCoresPerPackage, size_t maxLogicalPerCore) -{ - const size_t numNodes = numa_NumNodes(); - const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0)); - // NB: some cores or logical processors may be disabled. - const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore; - const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage); - return minPackagesPerNode*numNodes; -} - +//----------------------------------------------------------------------------- +// CPU topology interface struct CpuTopology // POD { - size_t numPackages; - size_t coresPerPackage; + size_t maxLogicalPerCore; + size_t maxCoresPerPackage; + + size_t logicalOffset; + size_t coreOffset; + size_t packageOffset; + + // how many are actually enabled size_t logicalPerCore; + size_t coresPerPackage; + size_t numPackages; }; static CpuTopology cpuTopology; static ModuleInitState cpuInitState; static LibError InitCpuTopology() { - const size_t numProcessors = os_cpu_NumProcessors(); - const size_t maxCoresPerPackage = MaxCoresPerPackage(); - const size_t maxLogicalPerCore = MaxLogicalPerCore(); + cpuTopology.maxLogicalPerCore = MaxLogicalPerCore(); + cpuTopology.maxCoresPerPackage = MaxCoresPerPackage(); + + cpuTopology.logicalOffset = 0; + cpuTopology.coreOffset = ceil_log2(cpuTopology.maxLogicalPerCore); + cpuTopology.packageOffset = cpuTopology.coreOffset + ceil_log2(cpuTopology.maxCoresPerPackage); const u8* apicIds = ApicIds(); if(apicIds) { - const size_t packageOffset = ceil_log2(maxCoresPerPackage) + ceil_log2(maxLogicalPerCore); - const size_t coreOffset = ceil_log2(maxLogicalPerCore); - const size_t logicalOffset = 0; - cpuTopology.numPackages = NumUniqueValuesInField(apicIds, packageOffset, 256); - cpuTopology.coresPerPackage = NumUniqueValuesInField(apicIds, coreOffset, maxCoresPerPackage); - cpuTopology.logicalPerCore = NumUniqueValuesInField(apicIds, logicalOffset, maxLogicalPerCore); + struct NumUniqueValuesInField + { + size_t operator()(const u8* apicIds, size_t indexOfLowestBit, size_t numValues) const + { + std::set values; + for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) + { + const size_t value = ApicField(apicIds[processor], numValues, indexOfLowestBit); + values.insert(value); + } + return values.size(); + } + }; + + cpuTopology.logicalPerCore = NumUniqueValuesInField()(apicIds, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore); + cpuTopology.coresPerPackage = NumUniqueValuesInField()(apicIds, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage); + cpuTopology.numPackages = NumUniqueValuesInField()(apicIds, cpuTopology.packageOffset, 256); } else // the processor lacks an xAPIC, or the IDs are invalid { + struct MinPackages + { + size_t operator()(size_t maxCoresPerPackage, size_t maxLogicalPerCore) const + { + const size_t numNodes = numa_NumNodes(); + const size_t logicalPerNode = PopulationCount(numa_ProcessorMaskFromNode(0)); + // NB: some cores or logical processors may be disabled. + const size_t maxLogicalPerPackage = maxCoresPerPackage*maxLogicalPerCore; + const size_t minPackagesPerNode = DivideRoundUp(logicalPerNode, maxLogicalPerPackage); + return minPackagesPerNode*numNodes; + } + }; + // we can't differentiate between cores and logical processors. // since the former are less likely to be disabled, we seek the // maximum feasible number of cores and minimal number of packages: - const size_t minPackages = MinPackages(maxCoresPerPackage, maxLogicalPerCore); - const size_t maxPackages = numProcessors; - for(size_t numPackages = minPackages; numPackages <= maxPackages; numPackages++) + const size_t minPackages = MinPackages()(cpuTopology.maxCoresPerPackage, cpuTopology.maxLogicalPerCore); + const size_t numProcessors = os_cpu_NumProcessors(); + for(size_t numPackages = minPackages; numPackages <= numProcessors; numPackages++) { if(numProcessors % numPackages != 0) continue; const size_t logicalPerPackage = numProcessors / numPackages; - const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, maxLogicalPerCore); - for(size_t coresPerPackage = maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--) + const size_t minCoresPerPackage = DivideRoundUp(logicalPerPackage, cpuTopology.maxLogicalPerCore); + for(size_t coresPerPackage = cpuTopology.maxCoresPerPackage; coresPerPackage >= minCoresPerPackage; coresPerPackage--) { if(logicalPerPackage % coresPerPackage != 0) continue; const size_t logicalPerCore = logicalPerPackage / coresPerPackage; - if(logicalPerCore <= maxLogicalPerCore) + if(logicalPerCore <= cpuTopology.maxLogicalPerCore) { debug_assert(numProcessors == numPackages*coresPerPackage*logicalPerCore); - cpuTopology.numPackages = numPackages; - cpuTopology.coresPerPackage = coresPerPackage; cpuTopology.logicalPerCore = logicalPerCore; + cpuTopology.coresPerPackage = coresPerPackage; + cpuTopology.numPackages = numPackages; return INFO::OK; } } @@ -303,6 +299,24 @@ size_t cpu_topology_LogicalPerCore() return cpuTopology.logicalPerCore; } +size_t cpu_topology_LogicalFromId(size_t apicId) +{ + ModuleInit(&cpuInitState, InitCpuTopology); + return ApicField(apicId, cpuTopology.logicalOffset, cpuTopology.maxLogicalPerCore); +} + +size_t cpu_topology_CoreFromId(size_t apicId) +{ + ModuleInit(&cpuInitState, InitCpuTopology); + return ApicField(apicId, cpuTopology.coreOffset, cpuTopology.maxCoresPerPackage); +} + +size_t cpu_topology_PackageFromId(size_t apicId) +{ + ModuleInit(&cpuInitState, InitCpuTopology); + return ApicField(apicId, cpuTopology.packageOffset, 256); +} + //----------------------------------------------------------------------------- // cache topology diff --git a/source/lib/sysdep/arch/x86_x64/topology.h b/source/lib/sysdep/arch/x86_x64/topology.h index 5f0ae7969a..47e6b55106 100644 --- a/source/lib/sysdep/arch/x86_x64/topology.h +++ b/source/lib/sysdep/arch/x86_x64/topology.h @@ -65,6 +65,11 @@ LIB_API size_t cpu_topology_CoresPerPackage(); LIB_API size_t cpu_topology_LogicalPerCore(); +LIB_API size_t cpu_topology_LogicalFromId(size_t apicId); +LIB_API size_t cpu_topology_CoreFromId(size_t apicId); +LIB_API size_t cpu_topology_PackageFromId(size_t apicId); + + //----------------------------------------------------------------------------- // L2 cache diff --git a/source/lib/sysdep/arch/x86_x64/x86_x64.cpp b/source/lib/sysdep/arch/x86_x64/x86_x64.cpp index df8a268ada..f093c0b7f1 100644 --- a/source/lib/sysdep/arch/x86_x64/x86_x64.cpp +++ b/source/lib/sysdep/arch/x86_x64/x86_x64.cpp @@ -157,7 +157,7 @@ bool x86_x64_cap(x86_x64_Cap cap) //----------------------------------------------------------------------------- -// CPU identification +// vendor static x86_x64_Vendors vendor; @@ -197,10 +197,14 @@ x86_x64_Vendors x86_x64_Vendor() } +//----------------------------------------------------------------------------- +// signature + static size_t model; static size_t family; +static ModuleInitState signatureInitState; -static void InitModelAndFamily() +static LibError InitSignature() { x86_x64_CpuidRegs regs = { 0 }; regs.eax = 1; @@ -214,71 +218,19 @@ static void InitModelAndFamily() family += extendedFamily; if(family == 0xF || (x86_x64_Vendor() == X86_X64_VENDOR_INTEL && family == 6)) model += extendedModel << 4; -} - - -static size_t generation; - -static LibError InitGeneration() -{ - InitModelAndFamily(); - - switch(x86_x64_Vendor()) - { - case X86_X64_VENDOR_AMD: - switch(family) - { - case 5: - if(model < 6) - generation = 5; // K5 - else - generation = 6; // K6 - break; - - case 6: - generation = 7; // K7 (Athlon) - break; - - case 0xF: - case 0x10: - generation = 8; // K8 (Opteron) - break; - } - break; - - case X86_X64_VENDOR_INTEL: - switch(family) - { - case 5: - generation = 5; // Pentium - break; - - case 6: - if(model < 0xF) - generation = 6; // Pentium Pro/II/III/M - else - generation = 8; // Core2Duo - break; - - case 0xF: - if(model <= 6) - generation = 7; // Pentium 4/D - break; - } - if(family >= 0x10) - generation = 9; - break; - } - - debug_assert(generation != 0); return INFO::OK; } -size_t x86_x64_Generation() +size_t x86_x64_Model() { - static ModuleInitState initState; - ModuleInit(&initState, InitGeneration); - return generation; + ModuleInit(&signatureInitState, InitSignature); + return model; +} + +size_t x86_x64_Family() +{ + ModuleInit(&signatureInitState, InitSignature); + return family; } @@ -832,7 +784,8 @@ static LibError InitIdentifierString() // doesn't recognize. if(!gotBrandString || strncmp(identifierString, "Unknow", 6) == 0) { - InitModelAndFamily(); + const size_t family = x86_x64_Family(); + const size_t model = x86_x64_Model(); switch(x86_x64_Vendor()) { case X86_X64_VENDOR_AMD: diff --git a/source/lib/sysdep/arch/x86_x64/x86_x64.h b/source/lib/sysdep/arch/x86_x64/x86_x64.h index 3b525759dc..c9f29c3024 100644 --- a/source/lib/sysdep/arch/x86_x64/x86_x64.h +++ b/source/lib/sysdep/arch/x86_x64/x86_x64.h @@ -73,6 +73,11 @@ enum x86_x64_Vendors LIB_API x86_x64_Vendors x86_x64_Vendor(); +LIB_API size_t x86_x64_Model(); + +LIB_API size_t x86_x64_Family(); + + /** * @return the colloquial processor generation * (5 = Pentium, 6 = Pentium Pro/II/III / K6, 7 = Pentium4 / Athlon, 8 = Core / Opteron) @@ -96,6 +101,7 @@ enum x86_x64_Cap // standard (edx) X86_X64_CAP_FPU = 32+0, // Floating Point Unit X86_X64_CAP_TSC = 32+4, // TimeStamp Counter + X86_X64_CAP_MSR = 32+5, // Model Specific Registers X86_X64_CAP_CMOV = 32+15, // Conditional MOVe X86_X64_CAP_TM_SCC = 32+22, // Thermal Monitoring and Software Controlled Clock X86_X64_CAP_MMX = 32+23, // MultiMedia eXtensions diff --git a/source/lib/sysdep/compiler.h b/source/lib/sysdep/compiler.h index 7be48e1e7d..fdad9daa31 100644 --- a/source/lib/sysdep/compiler.h +++ b/source/lib/sysdep/compiler.h @@ -175,10 +175,15 @@ // this macro should not generate any fallback code; it is merely the // compiler-specific backend for lib.h's UNREACHABLE. // #define it to nothing if the compiler doesn't support such a hint. -#if MSC_VERSION +#define HAVE_ASSUME_UNREACHABLE 1 +#if MSC_VERSION && !ICC_VERSION // (ICC ignores this) # define ASSUME_UNREACHABLE __assume(0) +#elif GCC_VERSION >= 450 +# define ASSUME_UNREACHABLE __builtin_unreachable() #else # define ASSUME_UNREACHABLE +# undef HAVE_ASSUME_UNREACHABLE +# define HAVE_ASSUME_UNREACHABLE 0 #endif diff --git a/source/lib/sysdep/os/win/aken/aken.h b/source/lib/sysdep/os/win/aken/aken.h index 77722cc7d7..582249ca5c 100644 --- a/source/lib/sysdep/os/win/aken/aken.h +++ b/source/lib/sysdep/os/win/aken/aken.h @@ -41,47 +41,75 @@ #define IOCTL_AKEN_WRITE_PORT CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+1, METHOD_BUFFERED, FILE_ANY_ACCESS) #define IOCTL_AKEN_MAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+2, METHOD_BUFFERED, FILE_ANY_ACCESS) #define IOCTL_AKEN_UNMAP CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+3, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define IOCTL_AKEN_READ_MSR CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+4, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define IOCTL_AKEN_WRITE_MSR CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+5, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define IOCTL_AKEN_READ_PMC CTL_CODE(FILE_DEVICE_AKEN, AKEN_IOCTL+6, METHOD_BUFFERED, FILE_ANY_ACCESS) // input and output data structures for the IOCTLs #pragma pack(push, 1) -struct AkenReadPortIn +typedef struct AkenReadPortIn_ { USHORT port; UCHAR numBytes; -}; +} +AkenReadPortIn; -struct AkenReadPortOut +typedef struct AkenReadPortOut_ { DWORD32 value; -}; +} +AkenReadPortOut; -struct AkenWritePortIn +typedef struct AkenWritePortIn_ { DWORD32 value; USHORT port; UCHAR numBytes; -}; +} +AkenWritePortIn; -struct AkenMapIn +typedef struct AkenMapIn_ { // note: fixed-width types allow the 32 or 64-bit Mahaf wrapper to // interoperate with the 32 or 64-bit Aken driver. DWORD64 physicalAddress; DWORD64 numBytes; -}; +} +AkenMapIn; -struct AkenMapOut +typedef struct AkenMapOut_ { DWORD64 virtualAddress; -}; +} +AkenMapOut; -struct AkenUnmapIn +typedef struct AkenUnmapIn_ { DWORD64 virtualAddress; -}; +} +AkenUnmapIn; + +typedef struct AkenReadRegisterIn_ +{ + DWORD64 reg; +} +AkenReadRegisterIn; + +typedef struct AkenReadRegisterOut_ +{ + DWORD64 value; +} +AkenReadRegisterOut; + +typedef struct AkenWriteRegisterIn_ +{ + DWORD64 reg; + DWORD64 value; +} +AkenWriteRegisterIn; #pragma pack(pop) diff --git a/source/lib/sysdep/os/win/mahaf.cpp b/source/lib/sysdep/os/win/mahaf.cpp index 9fd059e9e3..6f4425973e 100644 --- a/source/lib/sysdep/os/win/mahaf.cpp +++ b/source/lib/sysdep/os/win/mahaf.cpp @@ -25,6 +25,7 @@ */ #include "precompiled.h" +#include "lib/sysdep/os/win/mahaf.h" #include "lib/sysdep/os/win/win.h" #include @@ -56,8 +57,7 @@ static u32 ReadPort(u16 port, u8 numBytes) } debug_assert(bytesReturned == sizeof(out)); - const u32 value = out.value; - return value; + return out.value; } u8 mahaf_ReadPort8(u16 port) @@ -159,6 +159,48 @@ void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress) } +static u64 ReadRegister(DWORD ioctl, u64 reg) +{ + AkenReadRegisterIn in; + in.reg = reg; + AkenReadRegisterOut out; + + DWORD bytesReturned; + LPOVERLAPPED ovl = 0; // synchronous + BOOL ok = DeviceIoControl(hAken, ioctl, &in, sizeof(in), &out, sizeof(out), &bytesReturned, ovl); + if(!ok) + { + WARN_WIN32_ERR; + return 0; + } + + debug_assert(bytesReturned == sizeof(out)); + return out.value; +} + +u64 mahaf_ReadModelSpecificRegister(u64 reg) +{ + return ReadRegister((DWORD)IOCTL_AKEN_READ_MSR, reg); +} + +u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg) +{ + return ReadRegister((DWORD)IOCTL_AKEN_READ_PMC, reg); +} + +void mahaf_WriteModelSpecificRegister(u64 reg, u64 value) +{ + AkenWriteRegisterIn in; + in.reg = reg; + in.value = value; + + DWORD bytesReturned; // unused but must be passed to DeviceIoControl + LPOVERLAPPED ovl = 0; // synchronous + BOOL ok = DeviceIoControl(hAken, (DWORD)IOCTL_AKEN_WRITE_MSR, &in, sizeof(in), 0, 0u, &bytesReturned, ovl); + WARN_IF_FALSE(ok); +} + + //----------------------------------------------------------------------------- // driver installation //----------------------------------------------------------------------------- diff --git a/source/lib/sysdep/os/win/mahaf.h b/source/lib/sysdep/os/win/mahaf.h index 74b7dc804d..bea5a4fff3 100644 --- a/source/lib/sysdep/os/win/mahaf.h +++ b/source/lib/sysdep/os/win/mahaf.h @@ -39,20 +39,26 @@ * note: mahaf_MapPhysicalMemory will complain if it * is called despite this function having returned true. **/ -extern bool mahaf_IsPhysicalMappingDangerous(); +LIB_API bool mahaf_IsPhysicalMappingDangerous(); -extern LibError mahaf_Init(); -extern void mahaf_Shutdown(); +LIB_API LibError mahaf_Init(); +LIB_API void mahaf_Shutdown(); -extern u8 mahaf_ReadPort8 (u16 port); -extern u16 mahaf_ReadPort16(u16 port); -extern u32 mahaf_ReadPort32(u16 port); -extern void mahaf_WritePort8 (u16 port, u8 value); -extern void mahaf_WritePort16(u16 port, u16 value); -extern void mahaf_WritePort32(u16 port, u32 value); +LIB_API u8 mahaf_ReadPort8 (u16 port); +LIB_API u16 mahaf_ReadPort16(u16 port); +LIB_API u32 mahaf_ReadPort32(u16 port); +LIB_API void mahaf_WritePort8 (u16 port, u8 value); +LIB_API void mahaf_WritePort16(u16 port, u16 value); +LIB_API void mahaf_WritePort32(u16 port, u32 value); -extern volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes); -extern void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress); +LIB_API volatile void* mahaf_MapPhysicalMemory(uintptr_t physicalAddress, size_t numBytes); +LIB_API void mahaf_UnmapPhysicalMemory(volatile void* virtualAddress); + +LIB_API u64 mahaf_ReadModelSpecificRegister(u64 reg); +LIB_API void mahaf_WriteModelSpecificRegister(u64 reg, u64 value); + +// must be done in the driver because Windows clears CR4.PCE[8] +LIB_API u64 mahaf_ReadPerformanceMonitoringCounter(u64 reg); #endif // INCLUDED_MAHAF diff --git a/source/lib/sysdep/os/win/whrt/tsc.cpp b/source/lib/sysdep/os/win/whrt/tsc.cpp index d834df9e87..7abe12ceaa 100644 --- a/source/lib/sysdep/os/win/whrt/tsc.cpp +++ b/source/lib/sysdep/os/win/whrt/tsc.cpp @@ -38,6 +38,7 @@ #if ARCH_X86_X64 # include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64_rdtsc # include "lib/sysdep/arch/x86_x64/topology.h" +# include "lib/sysdep/arch/x86_x64/msr.h" #endif @@ -173,7 +174,7 @@ public: #if ARCH_X86_X64 // recent CPU: - if(x86_x64_Generation() >= 7) + //if(x86_x64_Generation() >= 7) { // note: 8th generation CPUs support C1-clock ramping, which causes // drift on multi-core systems, but those were excluded above. @@ -183,7 +184,7 @@ public: // the chipset thinks the system is dangerously overheated; the // OS isn't even notified. this may be rare, but could cause // incorrect results => unsafe. - return false; + //return false; } #endif @@ -217,6 +218,15 @@ public: // note: even here, initial accuracy isn't critical because the // clock is subject to thermal drift and would require continual // recalibration anyway. +#if ARCH_X86_X64 + if(MSR::HasNehalem()) + { + const u64 platformInfo = MSR::Read(MSR::PLATFORM_INFO); + const u8 maxNonTurboRatio = bits(platformInfo, 8, 15); + return maxNonTurboRatio * 133.33e6f; + } + else +#endif return os_cpu_ClockFrequency(); }