pcompress/utils/cpuid.c
Moinak Ghosh d5ceda559e Update Licensing notes and build notes.
More whitespace fixes.
2014-07-26 15:28:40 +05:30

196 lines
5.4 KiB
C

/*
* This file is a part of Pcompress, a chunked parallel multi-
* algorithm lossless compression and decompression program.
*
* Copyright (C) 2012-2013 Moinak Ghosh. All rights reserved.
* Use is subject to license terms.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program.
* If not, see <http://www.gnu.org/licenses/>.
*
* moinakg@belenix.org, http://moinakg.wordpress.com/
*/
/*
* Copyright 2008 Veselin Georgiev,
* anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include "utils.h"
#include "cpuid.h"
#ifdef __x86_64__
#define SSE4_1_FLAG 0x080000
#define SSE4_2_FLAG 0x100000
#define SSE3_FLAG 0x1
#define SSSE3_FLAG 0x200
#define AVX_FLAG 0x10000000
#define AVX2_FLAG (1U << 5)
#define XOP_FLAG 0x800
#define AES_FLAG 0x2000000
static void
exec_cpuid(uint32_t *regs)
{
#ifdef __GNUC__
__asm __volatile(
" push %%rbx\n"
" push %%rcx\n"
" push %%rdx\n"
" push %%rdi\n"
" mov %0, %%rdi\n"
" mov (%%rdi), %%eax\n"
" mov 4(%%rdi), %%ebx\n"
" mov 8(%%rdi), %%ecx\n"
" mov 12(%%rdi), %%edx\n"
" cpuid\n"
" movl %%eax, (%%rdi)\n"
" movl %%ebx, 4(%%rdi)\n"
" movl %%ecx, 8(%%rdi)\n"
" movl %%edx, 12(%%rdi)\n"
" pop %%rdi\n"
" pop %%rdx\n"
" pop %%rcx\n"
" pop %%rbx\n"
:
:"rdi"(regs)
:"memory", "eax"
);
#else
#error "Unsupported compiler"
#endif
}
static void
cpu_exec_cpuid(uint32_t eax, uint32_t* regs)
{
regs[0] = eax;
regs[1] = regs[2] = regs[3] = 0;
exec_cpuid(regs);
}
static void
cpu_exec_cpuid_ext(uint32_t* regs)
{
exec_cpuid(regs);
}
/*
* The function below is not inlined as it appears to bork optimized
* code generation on some older buggy GCC versions.
*/
void
NOINLINE_ATTR cpuid_get_raw_data(struct cpu_raw_data_t* data)
{
unsigned i;
for (i = 0; i < 32; i++)
cpu_exec_cpuid(i, data->basic_cpuid[i]);
for (i = 0; i < 32; i++)
cpu_exec_cpuid(0x80000000 + i, data->ext_cpuid[i]);
for (i = 0; i < 4; i++) {
memset(data->intel_fn4[i], 0, sizeof(data->intel_fn4[i]));
data->intel_fn4[i][0] = 4;
data->intel_fn4[i][2] = i;
cpu_exec_cpuid_ext(data->intel_fn4[i]);
}
}
void
cpuid_basic_identify(processor_cap_t *pc)
{
struct cpu_raw_data_t raw;
cpuid_get_raw_data(&raw);
memcpy(raw.vendor_str + 0, &raw.basic_cpuid[0][1], 4);
memcpy(raw.vendor_str + 4, &raw.basic_cpuid[0][3], 4);
memcpy(raw.vendor_str + 8, &raw.basic_cpuid[0][2], 4);
raw.vendor_str[12] = 0;
pc->avx_level = 0;
pc->sse_level = 0;
pc->sse_sub_level = 0;
pc->xop_avail = 0;
if (strcmp(raw.vendor_str, "GenuineIntel") == 0) {
pc->proc_type = PROC_X64_INTEL;
pc->sse_level = 2;
} else if (strcmp(raw.vendor_str, "AuthenticAMD") == 0) {
pc->proc_type = PROC_X64_AMD;
pc->sse_level = 2;
}
if (raw.basic_cpuid[0][0] >= 1) {
// ECX has SSE 4.2 and AVX flags
// Bit 20 is SSE 4.2 and bit 28 indicates AVX
if (raw.basic_cpuid[1][2] & SSE4_1_FLAG) {
pc->sse_level = 4;
pc->sse_sub_level = 1;
if (raw.basic_cpuid[1][2] & SSE4_2_FLAG) {
pc->sse_sub_level = 2;
}
} else {
if (raw.basic_cpuid[1][2] & SSE3_FLAG) {
pc->sse_level = 3;
if (raw.basic_cpuid[1][2] & SSSE3_FLAG) {
pc->sse_sub_level = 1;
}
} else {
pc->sse_level = 2;
}
}
pc->avx_level = 0;
if (raw.basic_cpuid[1][2] & AVX_FLAG) {
pc->avx_level = 1;
}
if (raw.basic_cpuid[7][1] & AVX2_FLAG) {
pc->avx_level = 2;
}
if (raw.basic_cpuid[1][2] & AES_FLAG) {
pc->aes_avail = 1;
}
if (raw.ext_cpuid[1][2] & XOP_FLAG) {
pc->xop_avail = 1;
}
}
}
#endif