Revision control

Copy as Markdown

Other Tools

/*
* Runtime CPU detection for x86
* (C) 2009,2010,2013,2017 Jack Lloyd
*
* Botan is released under the Simplified BSD License (see license.txt)
*/
#include <botan/cpuid.h>
#include <botan/mem_ops.h>
#include <botan/loadstor.h>
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
#include <intrin.h>
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
#include <ia32intrin.h>
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
#include <cpuid.h>
#endif
#endif
namespace Botan {
#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
{
#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
#define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
#define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
#define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
#define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
#define X86_CPUID(type, out) \
asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
: "0" (type))
#define X86_CPUID_SUBLEVEL(type, level, out) \
asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
: "0" (type), "2" (level))
#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
#define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
#define X86_CPUID_SUBLEVEL(type, level, out) \
do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
#else
#warning "No way of calling x86 cpuid instruction for this compiler"
#define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
#define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
#endif
uint64_t features_detected = 0;
uint32_t cpuid[4] = { 0 };
// CPUID 0: vendor identification, max sublevel
X86_CPUID(0, cpuid);
const uint32_t max_supported_sublevel = cpuid[0];
const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
if(max_supported_sublevel >= 1)
{
// CPUID 1: feature bits
X86_CPUID(1, cpuid);
const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
enum x86_CPUID_1_bits : uint64_t {
RDTSC = (1ULL << 4),
SSE2 = (1ULL << 26),
CLMUL = (1ULL << 33),
SSSE3 = (1ULL << 41),
SSE41 = (1ULL << 51),
SSE42 = (1ULL << 52),
AESNI = (1ULL << 57),
RDRAND = (1ULL << 62)
};
if(flags0 & x86_CPUID_1_bits::RDTSC)
features_detected |= CPUID::CPUID_RDTSC_BIT;
if(flags0 & x86_CPUID_1_bits::SSE2)
features_detected |= CPUID::CPUID_SSE2_BIT;
if(flags0 & x86_CPUID_1_bits::CLMUL)
features_detected |= CPUID::CPUID_CLMUL_BIT;
if(flags0 & x86_CPUID_1_bits::SSSE3)
features_detected |= CPUID::CPUID_SSSE3_BIT;
if(flags0 & x86_CPUID_1_bits::SSE41)
features_detected |= CPUID::CPUID_SSE41_BIT;
if(flags0 & x86_CPUID_1_bits::SSE42)
features_detected |= CPUID::CPUID_SSE42_BIT;
if(flags0 & x86_CPUID_1_bits::AESNI)
features_detected |= CPUID::CPUID_AESNI_BIT;
if(flags0 & x86_CPUID_1_bits::RDRAND)
features_detected |= CPUID::CPUID_RDRAND_BIT;
}
if(is_intel)
{
// Intel cache line size is in cpuid(1) output
*cache_line_size = 8 * get_byte(2, cpuid[1]);
}
else if(is_amd)
{
// AMD puts it in vendor zone
X86_CPUID(0x80000005, cpuid);
*cache_line_size = get_byte(3, cpuid[2]);
}
if(max_supported_sublevel >= 7)
{
clear_mem(cpuid, 4);
X86_CPUID_SUBLEVEL(7, 0, cpuid);
enum x86_CPUID_7_bits : uint64_t {
BMI1 = (1ULL << 3),
AVX2 = (1ULL << 5),
BMI2 = (1ULL << 8),
AVX512_F = (1ULL << 16),
AVX512_DQ = (1ULL << 17),
RDSEED = (1ULL << 18),
ADX = (1ULL << 19),
AVX512_IFMA = (1ULL << 21),
SHA = (1ULL << 29),
AVX512_BW = (1ULL << 30),
AVX512_VL = (1ULL << 31),
AVX512_VBMI = (1ULL << 33),
AVX512_VBMI2 = (1ULL << 38),
AVX512_VAES = (1ULL << 41),
AVX512_VCLMUL = (1ULL << 42),
AVX512_VBITALG = (1ULL << 44),
};
const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
if(flags7 & x86_CPUID_7_bits::AVX2)
features_detected |= CPUID::CPUID_AVX2_BIT;
if(flags7 & x86_CPUID_7_bits::BMI1)
{
features_detected |= CPUID::CPUID_BMI1_BIT;
/*
We only set the BMI2 bit if BMI1 is also supported, so BMI2
code can safely use both extensions. No known processor
implements BMI2 but not BMI1.
*/
if(flags7 & x86_CPUID_7_bits::BMI2)
features_detected |= CPUID::CPUID_BMI2_BIT;
}
if(flags7 & x86_CPUID_7_bits::AVX512_F)
{
features_detected |= CPUID::CPUID_AVX512F_BIT;
if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
features_detected |= CPUID::CPUID_AVX512DQ_BIT;
if(flags7 & x86_CPUID_7_bits::AVX512_BW)
features_detected |= CPUID::CPUID_AVX512BW_BIT;
const uint64_t ICELAKE_FLAGS =
x86_CPUID_7_bits::AVX512_F |
x86_CPUID_7_bits::AVX512_DQ |
x86_CPUID_7_bits::AVX512_IFMA |
x86_CPUID_7_bits::AVX512_BW |
x86_CPUID_7_bits::AVX512_VL |
x86_CPUID_7_bits::AVX512_VBMI |
x86_CPUID_7_bits::AVX512_VBMI2 |
x86_CPUID_7_bits::AVX512_VBITALG;
if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
features_detected |= CPUID::CPUID_AVX512_AES_BIT;
if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
}
if(flags7 & x86_CPUID_7_bits::RDSEED)
features_detected |= CPUID::CPUID_RDSEED_BIT;
if(flags7 & x86_CPUID_7_bits::ADX)
features_detected |= CPUID::CPUID_ADX_BIT;
if(flags7 & x86_CPUID_7_bits::SHA)
features_detected |= CPUID::CPUID_SHA_BIT;
}
#undef X86_CPUID
#undef X86_CPUID_SUBLEVEL
/*
* If we don't have access to CPUID, we can still safely assume that
* any x86-64 processor has SSE2 and RDTSC
*/
#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
if(features_detected == 0)
{
features_detected |= CPUID::CPUID_SSE2_BIT;
features_detected |= CPUID::CPUID_RDTSC_BIT;
}
#endif
return features_detected;
}
#endif
}