675 lines
19 KiB
C
675 lines
19 KiB
C
#include<stdint.h>
|
|
#include<stdio.h>
|
|
#include<stdbool.h>
|
|
#include<limits.h>
|
|
#include<immintrin.h>
|
|
#include<cpuid.h>
|
|
|
|
|
|
|
|
typedef uint8_t u8;
|
|
typedef int8_t i8;
|
|
typedef uint16_t u16;
|
|
typedef int16_t i16;
|
|
typedef uint32_t u32;
|
|
typedef int32_t i32;
|
|
typedef uint64_t u64;
|
|
typedef int64_t i64;
|
|
|
|
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
|
|
|
#define I8_MAX 0x7F
|
|
#define I8_MIN -0x80
|
|
#define U8_MAX 0xFF
|
|
#define U8_MIN 0
|
|
|
|
#define I16_MAX 0x7FFF
|
|
#define I16_MIN -0x8000
|
|
#define U16_MAX 0xFFFF
|
|
#define U16_MIN 0
|
|
|
|
#define I32_MAX 0x7FFFFFFF
|
|
#define I32_MIN -0x80000000
|
|
#define U32_MAX 0xFFFFFFFF
|
|
#define U32_MIN 0
|
|
|
|
#define I64_MAX 0x7FFFFFFFFFFFFFFF
|
|
#define I64_MIN -0x8000000000000000
|
|
#define U64_MAX 0xFFFFFFFFFFFFFFFF
|
|
#define U64_MIN 0
|
|
|
|
#define MMX_TEST_STRUCT(sz) \
|
|
typedef struct mmx_##sz##_test { \
|
|
sz a; \
|
|
sz b; \
|
|
sz result; \
|
|
} mmx_##sz##_test_t
|
|
|
|
MMX_TEST_STRUCT(u8);
|
|
MMX_TEST_STRUCT(i8);
|
|
MMX_TEST_STRUCT(u16);
|
|
MMX_TEST_STRUCT(i16);
|
|
MMX_TEST_STRUCT(u32);
|
|
MMX_TEST_STRUCT(i32);
|
|
MMX_TEST_STRUCT(u64);
|
|
MMX_TEST_STRUCT(i64);
|
|
|
|
// Binary compare two mm registers
|
|
bool mm_raw_compare(__m64 a, __m64 b) {
|
|
__m64 a_upper_reg = _mm_srli_si64(a, 32);
|
|
__m64 b_upper_reg = _mm_srli_si64(b, 32);
|
|
|
|
int a_lower = _m_to_int(a);
|
|
int a_upper = _m_to_int(a_upper_reg);
|
|
|
|
int b_lower = _m_to_int(b);
|
|
int b_upper = _m_to_int(b_upper_reg);
|
|
|
|
return (a_lower == b_lower) && (a_upper == b_upper);
|
|
}
|
|
|
|
// Load a 64 bit value into a mm register
|
|
__m64 mm_load64(u64 val) {
|
|
__m64 lower = _m_from_int(val & 0xFFFFFFFF);
|
|
__m64 upper = _m_from_int((val >> 32) & 0xFFFFFFFF);
|
|
|
|
__m64 shifted = _mm_slli_si64(upper, 32);
|
|
__m64 final = _m_por(shifted, lower);
|
|
|
|
return final;
|
|
}
|
|
|
|
#define MMX_ARITH_TEST(name, testcases, testcase_type, type, size, testfunc) \
|
|
bool name() { \
|
|
printf("TEST: " #name "\n"); \
|
|
int errors = 0; \
|
|
\
|
|
for (size_t i = 0; i < ARRAY_SIZE(testcases); i++ ) { \
|
|
testcase_type test_data = testcases[i]; \
|
|
\
|
|
__m64 a = _mm_set1_pi##size(test_data.a); \
|
|
__m64 b = _mm_set1_pi##size(test_data.b); \
|
|
__m64 expected = _mm_set1_pi##size(test_data.result); \
|
|
__m64 result = testfunc(a, b); \
|
|
\
|
|
bool success = mm_raw_compare(expected, result); \
|
|
errors += (int) (!success); \
|
|
} \
|
|
\
|
|
_m_empty(); \
|
|
printf("TEST: finished with: %d errors\n", errors); \
|
|
return errors; \
|
|
}
|
|
|
|
#define MMX_SHIFT_TEST(name, testcases, testfunc) \
|
|
bool name() { \
|
|
printf("TEST: " #name "\n"); \
|
|
int errors = 0; \
|
|
\
|
|
for (size_t i = 0; i < ARRAY_SIZE(testcases); i++ ) { \
|
|
mmx_u64_test_t test_data = testcases[i]; \
|
|
\
|
|
__m64 a = mm_load64(test_data.a); \
|
|
__m64 expected = mm_load64(test_data.result); \
|
|
__m64 result = testfunc(a, test_data.b); \
|
|
\
|
|
bool success = mm_raw_compare(expected, result); \
|
|
if (!success) { \
|
|
printf( \
|
|
"Failed; Expected: 0x%08x_%08x\tGot: 0x%08x_%08x\n", \
|
|
_m_to_int(_mm_srli_si64(expected, 32)), \
|
|
_m_to_int(expected), \
|
|
_m_to_int(_mm_srli_si64(result, 32)), \
|
|
_m_to_int(result) \
|
|
); \
|
|
} \
|
|
errors += (int) (!success); \
|
|
} \
|
|
\
|
|
_m_empty(); \
|
|
printf("TEST: finished with: %d errors\n", errors); \
|
|
return errors; \
|
|
}
|
|
|
|
|
|
|
|
// Loads 2 64 bit immediates and compares with the third
|
|
// Test data must be of type mmx_u64_test_t
|
|
#define MMX_64_TEST(name, testcases, testfunc) \
|
|
bool name() { \
|
|
printf("TEST: " #name "\n"); \
|
|
int errors = 0; \
|
|
\
|
|
for (size_t i = 0; i < ARRAY_SIZE(testcases); i++ ) { \
|
|
mmx_u64_test_t test_data = testcases[i]; \
|
|
\
|
|
__m64 a = mm_load64(test_data.a); \
|
|
__m64 b = mm_load64(test_data.b); \
|
|
__m64 expected = mm_load64(test_data.result); \
|
|
__m64 result = testfunc(a, b); \
|
|
\
|
|
bool success = mm_raw_compare(expected, result); \
|
|
if (!success) { \
|
|
printf( \
|
|
"Failed; Expected: 0x%08x_%08x\tGot: 0x%08x_%08x\n", \
|
|
_m_to_int(_mm_srli_si64(expected, 32)), \
|
|
_m_to_int(expected), \
|
|
_m_to_int(_mm_srli_si64(result, 32)), \
|
|
_m_to_int(result) \
|
|
); \
|
|
} \
|
|
errors += (int) (!success); \
|
|
} \
|
|
\
|
|
_m_empty(); \
|
|
printf("TEST: finished with: %d errors\n", errors); \
|
|
return errors; \
|
|
}
|
|
|
|
|
|
mmx_i8_test_t mmx_i8_add_test_data[] = {
|
|
{ .a = 1, .b = 2, .result = 3 },
|
|
{ .a = 0, .b = 1, .result = 1 },
|
|
{ .a = I8_MAX, .b = 1, .result = I8_MIN },
|
|
{ .a = I8_MIN, .b = -1, .result = I8_MAX },
|
|
{ .a = 0, .b = U8_MAX, .result = U8_MAX },
|
|
};
|
|
mmx_i8_test_t mmx_i8_add_sat_test_data[] = {
|
|
{ .a = 1, .b = 2, .result = 3 },
|
|
{ .a = 0, .b = 1, .result = 1 },
|
|
{ .a = I8_MAX, .b = 1, .result = I8_MAX },
|
|
{ .a = I8_MIN, .b = -1, .result = I8_MIN },
|
|
};
|
|
mmx_u8_test_t mmx_u8_add_sat_test_data[] = {
|
|
{ .a = 1, .b = 2, .result = 3 },
|
|
{ .a = 0, .b = 1, .result = 1 },
|
|
{ .a = U8_MAX, .b = 1, .result = U8_MAX },
|
|
{ .a = 0, .b = U8_MAX, .result = U8_MAX },
|
|
};
|
|
|
|
mmx_i16_test_t mmx_i16_add_test_data[] = {
|
|
{ .a = 1, .b = 2, .result = 3 },
|
|
{ .a = 0, .b = 1, .result = 1 },
|
|
{ .a = I16_MAX, .b = 1, .result = I16_MIN },
|
|
{ .a = I16_MIN, .b = -1, .result = I16_MAX },
|
|
};
|
|
mmx_i16_test_t mmx_i16_add_sat_test_data[] = {
|
|
{ .a = 1, .b = 2, .result = 3 },
|
|
{ .a = 0, .b = 1, .result = 1 },
|
|
{ .a = I16_MAX, .b = 1, .result = I16_MAX },
|
|
{ .a = I16_MIN, .b = -1, .result = I16_MIN },
|
|
};
|
|
mmx_u16_test_t mmx_u16_add_sat_test_data[] = {
|
|
{ .a = 1, .b = 2, .result = 3 },
|
|
{ .a = 0, .b = 1, .result = 1 },
|
|
{ .a = U16_MAX, .b = 1, .result = U16_MAX },
|
|
{ .a = 0, .b = U16_MAX, .result = U16_MAX },
|
|
};
|
|
|
|
mmx_i32_test_t mmx_i32_add_test_data[] = {
|
|
{ .a = 1, .b = 2, .result = 3 },
|
|
{ .a = 0, .b = 1, .result = 1 },
|
|
{ .a = I32_MAX, .b = 1, .result = I32_MIN },
|
|
{ .a = I32_MIN, .b = -1, .result = I32_MAX },
|
|
};
|
|
|
|
MMX_ARITH_TEST(test_mmx_paddb, mmx_i8_add_test_data, mmx_i8_test_t, i8, 8, _m_paddb);
|
|
MMX_ARITH_TEST(test_mmx_paddsb, mmx_i8_add_sat_test_data, mmx_i8_test_t, i8, 8, _m_paddsb);
|
|
MMX_ARITH_TEST(test_mmx_paddusb, mmx_u8_add_sat_test_data, mmx_u8_test_t, u8, 8, _m_paddusb);
|
|
|
|
MMX_ARITH_TEST(test_mmx_paddw, mmx_i16_add_test_data, mmx_i16_test_t, i16, 16, _m_paddw);
|
|
MMX_ARITH_TEST(test_mmx_paddsw, mmx_i16_add_sat_test_data, mmx_i16_test_t, i16, 16, _m_paddsw);
|
|
MMX_ARITH_TEST(test_mmx_paddusw, mmx_u16_add_sat_test_data, mmx_u16_test_t, u16, 16, _m_paddusw);
|
|
|
|
MMX_ARITH_TEST(test_mmx_paddd, mmx_i32_add_test_data, mmx_i32_test_t, i32, 32, _m_paddd);
|
|
|
|
|
|
|
|
mmx_i8_test_t mmx_i8_sub_test_data[] = {
|
|
{ .a = 3, .b = 2, .result = 1 },
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = I8_MIN, .b = 1, .result = I8_MAX },
|
|
{ .a = I8_MAX, .b = -1, .result = I8_MIN },
|
|
{ .a = U8_MAX, .b = U8_MAX, .result = 0 },
|
|
};
|
|
mmx_i8_test_t mmx_i8_sub_sat_test_data[] = {
|
|
{ .a = 3, .b = 2, .result = 1 },
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = I8_MIN, .b = 1, .result = I8_MIN },
|
|
{ .a = I8_MAX, .b = -1, .result = I8_MAX },
|
|
};
|
|
mmx_u8_test_t mmx_u8_sub_sat_test_data[] = {
|
|
{ .a = 3, .b = 2, .result = 1 },
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = U8_MIN, .b = 1, .result = U8_MIN },
|
|
{ .a = U8_MAX, .b = U8_MAX, .result = 0 },
|
|
};
|
|
|
|
mmx_i16_test_t mmx_i16_sub_test_data[] = {
|
|
{ .a = 3, .b = 2, .result = 1 },
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = I16_MIN, .b = 1, .result = I16_MAX },
|
|
{ .a = I16_MAX, .b = -1, .result = I16_MIN },
|
|
};
|
|
mmx_i16_test_t mmx_i16_sub_sat_test_data[] = {
|
|
{ .a = 3, .b = 2, .result = 1 },
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = I16_MIN, .b = 1, .result = I16_MIN },
|
|
{ .a = I16_MAX, .b = -1, .result = I16_MAX },
|
|
};
|
|
mmx_u16_test_t mmx_u16_sub_sat_test_data[] = {
|
|
{ .a = 3, .b = 2, .result = 1 },
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = U16_MIN, .b = 1, .result = U16_MIN },
|
|
{ .a = U16_MIN, .b = U16_MIN, .result = 0 },
|
|
};
|
|
|
|
mmx_i32_test_t mmx_i32_sub_test_data[] = {
|
|
{ .a = 3, .b = 2, .result = 1 },
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = I32_MIN, .b = 1, .result = I32_MAX },
|
|
{ .a = I32_MAX, .b = -1, .result = I32_MIN },
|
|
};
|
|
|
|
MMX_ARITH_TEST(test_mmx_psubb, mmx_i8_sub_test_data, mmx_i8_test_t, i8, 8, _m_psubb);
|
|
MMX_ARITH_TEST(test_mmx_psubsb, mmx_i8_sub_sat_test_data, mmx_i8_test_t, i8, 8, _m_psubsb);
|
|
MMX_ARITH_TEST(test_mmx_psubusb, mmx_u8_sub_sat_test_data, mmx_u8_test_t, u8, 8, _m_psubusb);
|
|
|
|
MMX_ARITH_TEST(test_mmx_psubw, mmx_i16_sub_test_data, mmx_i16_test_t, i16, 16, _m_psubw);
|
|
MMX_ARITH_TEST(test_mmx_psubuw, mmx_i16_sub_sat_test_data, mmx_i16_test_t, i16, 16, _m_psubsw);
|
|
MMX_ARITH_TEST(test_mmx_psubusw, mmx_u16_sub_sat_test_data, mmx_u16_test_t, u16, 16, _m_psubusw);
|
|
|
|
MMX_ARITH_TEST(test_mmx_psubd, mmx_i32_sub_test_data, mmx_i32_test_t, i32, 32, _m_psubd);
|
|
|
|
|
|
|
|
|
|
mmx_u64_test_t mmx_por_test_data[] = {
|
|
{ .a = 0xAAAAAAAAAAAAAAAA,
|
|
.b = 0x5555555555555555,
|
|
.result = 0xFFFFFFFFFFFFFFFF },
|
|
{ .a = 0x0000000000000000,
|
|
.b = 0x1111111111111111,
|
|
.result = 0x1111111111111111 },
|
|
};
|
|
|
|
mmx_u64_test_t mmx_pand_test_data[] = {
|
|
{ .a = 0xAAAAAAAAAAAAAAAA,
|
|
.b = 0x5555555555555555,
|
|
.result = 0x0000000000000000 },
|
|
{ .a = 0xFFFFFFFFFFFFFFFF,
|
|
.b = 0xFFFFFFFFFFFFFFFF,
|
|
.result = 0xFFFFFFFFFFFFFFFF },
|
|
};
|
|
|
|
mmx_u64_test_t mmx_pandn_test_data[] = {
|
|
{ .a = 0x0000000000000000,
|
|
.b = 0xFFFFFFFFFFFFFFFF,
|
|
.result = 0xFFFFFFFFFFFFFFFF },
|
|
{ .a = 0xFFFFFFFFFFFFFFFF,
|
|
.b = 0x0000000000000000,
|
|
.result = 0x0000000000000000 },
|
|
};
|
|
|
|
|
|
mmx_u64_test_t mmx_pxor_test_data[] = {
|
|
{ .a = 0xAAAAAAAAAAAAAAAA,
|
|
.b = 0x5555555555555555,
|
|
.result = 0xFFFFFFFFFFFFFFFF },
|
|
{ .a = 0xFFFFFFFFFFFFFFFF,
|
|
.b = 0xFFFFFFFFFFFFFFFF,
|
|
.result = 0x0000000000000000 },
|
|
};
|
|
|
|
|
|
MMX_64_TEST(test_mmx_por, mmx_por_test_data, _m_por);
|
|
MMX_64_TEST(test_mmx_pand, mmx_pand_test_data, _m_pand);
|
|
MMX_64_TEST(test_mmx_pandn, mmx_pandn_test_data, _m_pandn);
|
|
MMX_64_TEST(test_mmx_pxor, mmx_pxor_test_data, _m_pxor);
|
|
|
|
|
|
|
|
|
|
|
|
mmx_i16_test_t mmx_pmullw_test_data[] = {
|
|
{ .a = 10, .b = 10, .result = 100 },
|
|
{ .a = 32000, .b = 10, .result = 0xE200 },
|
|
{ .a = 20000, .b = 20000, .result = 0x8400 },
|
|
};
|
|
mmx_i16_test_t mmx_pmulhw_test_data[] = {
|
|
{ .a = 10, .b = 10, .result = 0 },
|
|
{ .a = 32000, .b = 10, .result = 4 },
|
|
{ .a = 20000, .b = 20000, .result = 0x17D7 },
|
|
};
|
|
mmx_u64_test_t mmx_pmaddwd_test_data[] = {
|
|
{ .a = 0x0000000100000001,
|
|
.b = 0x0000000100000001,
|
|
.result = 0x0000000100000001 },
|
|
{ .a = 0x0000000200000004,
|
|
.b = 0x0000000200000004,
|
|
.result = 0x0000000400000010 },
|
|
|
|
{ .a = 0x000000007FFFFFFF,
|
|
.b = 0x000000007FFFFFFF,
|
|
.result = 0x000000003FFF0002 },
|
|
|
|
// -1 * -1 = 2
|
|
{ .a = 0x00000000FFFFFFFF,
|
|
.b = 0x00000000FFFFFFFF,
|
|
.result = 0x0000000000000002 },
|
|
};
|
|
|
|
|
|
MMX_ARITH_TEST(test_mmx_pmullw, mmx_pmullw_test_data, mmx_i16_test_t, i16, 16, _m_pmullw);
|
|
MMX_ARITH_TEST(test_mmx_pmulhw, mmx_pmulhw_test_data, mmx_i16_test_t, i16, 16, _m_pmulhw);
|
|
MMX_64_TEST(test_mmx_pmaddwd, mmx_pmaddwd_test_data, _m_pmaddwd);
|
|
|
|
|
|
|
|
|
|
|
|
mmx_u64_test_t mmx_packssdw_test_data[] = {
|
|
{ .a = 0x0000000200000001,
|
|
.b = 0x0000000400000003,
|
|
.result = 0x0004000300020001 },
|
|
{ .a = 0x7FFFFFFF7FFFFFFF,
|
|
.b = 0x7FFFFFFF7FFFFFFF,
|
|
.result = 0x7FFF7FFF7FFF7FFF },
|
|
{ .a = 0x8000000080000000,
|
|
.b = 0x8000000080000000,
|
|
.result = 0x8000800080008000 },
|
|
};
|
|
mmx_u64_test_t mmx_packsswb_test_data[] = {
|
|
{ .a = 0x0004000300020001,
|
|
.b = 0x0008000700060005,
|
|
.result = 0x0807060504030201 },
|
|
{ .a = 0x7FFF7FFF7FFF7FFF,
|
|
.b = 0x7FFF7FFF7FFF7FFF,
|
|
.result = 0x7F7F7F7F7F7F7F7F },
|
|
{ .a = 0x8000800080008000,
|
|
.b = 0x8000800080008000,
|
|
.result = 0x8080808080808080 },
|
|
};
|
|
mmx_u64_test_t mmx_packuswb_test_data[] = {
|
|
{ .a = 0x0004000300020001,
|
|
.b = 0x0008000700060005,
|
|
.result = 0x0807060504030201 },
|
|
{ .a = 0x7FFF7FFF7FFF7FFF,
|
|
.b = 0x7FFF7FFF7FFF7FFF,
|
|
.result = 0xFFFFFFFFFFFFFFFF },
|
|
{ .a = 0x8000800080008000,
|
|
.b = 0x8000800080008000,
|
|
.result = 0x0000000000000000 },
|
|
};
|
|
|
|
|
|
MMX_64_TEST(test_mmx_packssdw, mmx_packssdw_test_data, _m_packssdw);
|
|
MMX_64_TEST(test_mmx_packsswb, mmx_packsswb_test_data, _m_packsswb);
|
|
MMX_64_TEST(test_mmx_packuswb, mmx_packuswb_test_data, _m_packuswb);
|
|
|
|
|
|
|
|
mmx_u64_test_t mmx_punpckhbw_test_data[] = {
|
|
{ .a = 0x4433221100000000,
|
|
.b = 0x8877665500000000,
|
|
.result = 0x8844773366225511 },
|
|
};
|
|
mmx_u64_test_t mmx_punpckhdq_test_data[] = {
|
|
{ .a = 0xAAAAAAAA00000000,
|
|
.b = 0xBBBBBBBB00000000,
|
|
.result = 0xBBBBBBBBAAAAAAAA },
|
|
};
|
|
mmx_u64_test_t mmx_punpckhwd_test_data[] = {
|
|
{ .a = 0xBBBBAAAA00000000,
|
|
.b = 0xDDDDCCCC00000000,
|
|
.result = 0xDDDDBBBBCCCCAAAA },
|
|
};
|
|
mmx_u64_test_t mmx_punpcklbw_test_data[] = {
|
|
{ .a = 0x0000000044332211,
|
|
.b = 0x0000000088776655,
|
|
.result = 0x8844773366225511 },
|
|
};
|
|
mmx_u64_test_t mmx_punpckldq_test_data[] = {
|
|
{ .a = 0x00000000AAAAAAAA,
|
|
.b = 0x00000000BBBBBBBB,
|
|
.result = 0xBBBBBBBBAAAAAAAA },
|
|
};
|
|
mmx_u64_test_t mmx_punpcklwd_test_data[] = {
|
|
{ .a = 0x00000000BBBBAAAA,
|
|
.b = 0x00000000DDDDCCCC,
|
|
.result = 0xDDDDBBBBCCCCAAAA },
|
|
};
|
|
|
|
|
|
MMX_64_TEST(test_mmx_punpckhbw, mmx_punpckhbw_test_data, _m_punpckhbw);
|
|
MMX_64_TEST(test_mmx_punpckhdq, mmx_punpckhdq_test_data, _m_punpckhdq);
|
|
MMX_64_TEST(test_mmx_punpckhwd, mmx_punpckhwd_test_data, _m_punpckhwd);
|
|
MMX_64_TEST(test_mmx_punpcklbw, mmx_punpcklbw_test_data, _m_punpcklbw);
|
|
MMX_64_TEST(test_mmx_punpckldq, mmx_punpckldq_test_data, _m_punpckldq);
|
|
MMX_64_TEST(test_mmx_punpcklwd, mmx_punpcklwd_test_data, _m_punpcklwd);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mmx_u64_test_t mmx_pcmpeqb_test_data[] = {
|
|
{ .a = 0x8877665544332211,
|
|
.b = 0x0077005500330011,
|
|
.result = 0x00FF00FF00FF00FF },
|
|
};
|
|
mmx_u64_test_t mmx_pcmpeqw_test_data[] = {
|
|
{ .a = 0x4444333322221111,
|
|
.b = 0x0000333300001111,
|
|
.result = 0x0000FFFF0000FFFF },
|
|
};
|
|
mmx_u64_test_t mmx_pcmpeqd_test_data[] = {
|
|
{ .a = 0x2222222211111111,
|
|
.b = 0x2222222200000000,
|
|
.result = 0xFFFFFFFF00000000 },
|
|
};
|
|
|
|
mmx_u64_test_t mmx_pcmpgtb_test_data[] = {
|
|
{ .a = 0x0000000000002201,
|
|
.b = 0x0000000000002300,
|
|
.result = 0x00000000000000FF },
|
|
};
|
|
mmx_u64_test_t mmx_pcmpgtw_test_data[] = {
|
|
{ .a = 0x4444333322221111,
|
|
.b = 0x0000333300001112,
|
|
.result = 0xFFFF0000FFFF0000 },
|
|
};
|
|
mmx_u64_test_t mmx_pcmpgtd_test_data[] = {
|
|
{ .a = 0x2222222111111111,
|
|
.b = 0x2222222200000000,
|
|
.result = 0x00000000FFFFFFFF },
|
|
};
|
|
|
|
|
|
MMX_64_TEST(test_mmx_pcmpeqb, mmx_pcmpeqb_test_data, _m_pcmpeqb);
|
|
MMX_64_TEST(test_mmx_pcmpeqw, mmx_pcmpeqw_test_data, _m_pcmpeqw);
|
|
MMX_64_TEST(test_mmx_pcmpeqd, mmx_pcmpeqd_test_data, _m_pcmpeqd);
|
|
MMX_64_TEST(test_mmx_pcmpgtb, mmx_pcmpgtb_test_data, _m_pcmpgtb);
|
|
MMX_64_TEST(test_mmx_pcmpgtw, mmx_pcmpgtw_test_data, _m_pcmpgtw);
|
|
MMX_64_TEST(test_mmx_pcmpgtd, mmx_pcmpgtd_test_data, _m_pcmpgtd);
|
|
|
|
|
|
|
|
|
|
mmx_u64_test_t mmx_pslld_test_data[] = {
|
|
{ .a = 1, .b = 1, .result = 2 },
|
|
{ .a = 16, .b = 1, .result = 32 },
|
|
{ .a = 16, .b = 32, .result = 0 },
|
|
{ .a = 16, .b = 0, .result = 16 },
|
|
};
|
|
mmx_u64_test_t mmx_psllq_test_data[] = {
|
|
{ .a = 1, .b = 1, .result = 2 },
|
|
{ .a = 16, .b = 1, .result = 32 },
|
|
{ .a = 16, .b = 64, .result = 0 },
|
|
{ .a = 16, .b = 0, .result = 16 },
|
|
};
|
|
mmx_u64_test_t mmx_psllw_test_data[] = {
|
|
{ .a = 1, .b = 1, .result = 2 },
|
|
{ .a = 16, .b = 1, .result = 32 },
|
|
{ .a = 16, .b = 16, .result = 0 },
|
|
{ .a = 16, .b = 0, .result = 16 },
|
|
};
|
|
mmx_u64_test_t mmx_psrad_test_data[] = {
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = 16, .b = 1, .result = 8 },
|
|
{ .a = 16, .b = 0, .result = 16 },
|
|
{ .a = 0x7FFFFFFF, .b = 1, .result = 0x3FFFFFFF },
|
|
|
|
{ .a = I32_MAX, .b = 32, .result = 0 },
|
|
{ .a = I32_MIN, .b = 32, .result = U32_MAX },
|
|
};
|
|
mmx_u64_test_t mmx_psraw_test_data[] = {
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = 16, .b = 1, .result = 8 },
|
|
{ .a = 16, .b = 0, .result = 16 },
|
|
{ .a = 0x7FFF, .b = 1, .result = 0x3FFF },
|
|
|
|
{ .a = I16_MAX, .b = 16, .result = 0 },
|
|
{ .a = U16_MAX, .b = 16, .result = U16_MAX },
|
|
};
|
|
mmx_u64_test_t mmx_psrld_test_data[] = {
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = 16, .b = 1, .result = 8 },
|
|
{ .a = 16, .b = 0, .result = 16 },
|
|
{ .a = 0x7FFFFFFF, .b = 1, .result = 0x3FFFFFFF },
|
|
|
|
{ .a = I32_MAX, .b = 32, .result = 0 },
|
|
{ .a = I32_MIN, .b = 32, .result = 0 },
|
|
};
|
|
mmx_u64_test_t mmx_psrlq_test_data[] = {
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = 16, .b = 1, .result = 8 },
|
|
{ .a = 16, .b = 0, .result = 16 },
|
|
|
|
{ .a = I64_MAX, .b = 64, .result = 0 },
|
|
{ .a = I64_MIN, .b = 64, .result = 0 },
|
|
};
|
|
mmx_u64_test_t mmx_psrlw_test_data[] = {
|
|
{ .a = 1, .b = 1, .result = 0 },
|
|
{ .a = 16, .b = 1, .result = 8 },
|
|
{ .a = 16, .b = 0, .result = 16 },
|
|
|
|
{ .a = I16_MAX, .b = 16, .result = 0 },
|
|
|
|
// TODO: Works on my machine
|
|
// { .a = I16_MIN, .b = 16, .result = 0 },
|
|
};
|
|
|
|
|
|
|
|
MMX_64_TEST(test_mmx_pslld, mmx_pslld_test_data, _m_pslld);
|
|
MMX_64_TEST(test_mmx_psllq, mmx_psllq_test_data, _m_psllq);
|
|
MMX_64_TEST(test_mmx_psllw, mmx_psllw_test_data, _m_psllw);
|
|
MMX_64_TEST(test_mmx_psrad, mmx_psrad_test_data, _m_psrad);
|
|
MMX_64_TEST(test_mmx_psraw, mmx_psraw_test_data, _m_psraw);
|
|
MMX_64_TEST(test_mmx_psrld, mmx_psrld_test_data, _m_psrld);
|
|
MMX_64_TEST(test_mmx_psrlq, mmx_psrlq_test_data, _m_psrlq);
|
|
MMX_64_TEST(test_mmx_psrlw, mmx_psrlw_test_data, _m_psrlw);
|
|
|
|
MMX_SHIFT_TEST(test_mmx_pslldi, mmx_pslld_test_data, _m_pslldi);
|
|
MMX_SHIFT_TEST(test_mmx_psllqi, mmx_psllq_test_data, _m_psllqi);
|
|
MMX_SHIFT_TEST(test_mmx_psllwi, mmx_psllw_test_data, _m_psllwi);
|
|
MMX_SHIFT_TEST(test_mmx_psradi, mmx_psrad_test_data, _m_psradi);
|
|
MMX_SHIFT_TEST(test_mmx_psrawi, mmx_psraw_test_data, _m_psrawi);
|
|
MMX_SHIFT_TEST(test_mmx_psrldi, mmx_psrld_test_data, _m_psrldi);
|
|
MMX_SHIFT_TEST(test_mmx_psrlqi, mmx_psrlq_test_data, _m_psrlqi);
|
|
MMX_SHIFT_TEST(test_mmx_psrlwi, mmx_psrlw_test_data, _m_psrlwi);
|
|
|
|
|
|
|
|
|
|
bool test_mmx_cpuid() {
|
|
printf("TEST: test_mmx_cpuid\n");
|
|
|
|
unsigned int eax, ebx, ecx, edx;
|
|
asm volatile(
|
|
"cpuid"
|
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
|
: "a" (1), "c" (0)
|
|
);
|
|
|
|
int has_mmx = !!(edx & (1 << 23));
|
|
if (has_mmx) {
|
|
return 0;
|
|
}
|
|
return 1;
|
|
|
|
}
|
|
|
|
int main() {
|
|
int errors = 0;
|
|
|
|
errors += (int) test_mmx_cpuid();
|
|
|
|
|
|
errors += (int) test_mmx_paddb();
|
|
errors += (int) test_mmx_paddsb();
|
|
errors += (int) test_mmx_paddusb();
|
|
errors += (int) test_mmx_paddw();
|
|
errors += (int) test_mmx_paddsw();
|
|
errors += (int) test_mmx_paddusw();
|
|
errors += (int) test_mmx_paddd();
|
|
|
|
errors += (int) test_mmx_psubb();
|
|
errors += (int) test_mmx_psubsb();
|
|
errors += (int) test_mmx_psubusb();
|
|
errors += (int) test_mmx_psubw();
|
|
errors += (int) test_mmx_psubuw();
|
|
errors += (int) test_mmx_psubusw();
|
|
errors += (int) test_mmx_psubd();
|
|
|
|
errors += (int) test_mmx_por();
|
|
errors += (int) test_mmx_pand();
|
|
errors += (int) test_mmx_pandn();
|
|
errors += (int) test_mmx_pxor();
|
|
|
|
errors += (int) test_mmx_pmullw();
|
|
errors += (int) test_mmx_pmulhw();
|
|
errors += (int) test_mmx_pmaddwd();
|
|
|
|
errors += (int) test_mmx_packssdw();
|
|
errors += (int) test_mmx_packsswb();
|
|
errors += (int) test_mmx_packuswb();
|
|
|
|
errors += (int) test_mmx_punpckhbw();
|
|
errors += (int) test_mmx_punpckhdq();
|
|
errors += (int) test_mmx_punpckhwd();
|
|
errors += (int) test_mmx_punpcklbw();
|
|
errors += (int) test_mmx_punpckldq();
|
|
errors += (int) test_mmx_punpcklwd();
|
|
|
|
errors += (int) test_mmx_pcmpeqb();
|
|
errors += (int) test_mmx_pcmpeqw();
|
|
errors += (int) test_mmx_pcmpeqd();
|
|
errors += (int) test_mmx_pcmpgtb();
|
|
errors += (int) test_mmx_pcmpgtw();
|
|
errors += (int) test_mmx_pcmpgtd();
|
|
|
|
errors += (int) test_mmx_psllw();
|
|
errors += (int) test_mmx_psllwi();
|
|
errors += (int) test_mmx_pslld();
|
|
errors += (int) test_mmx_pslldi();
|
|
errors += (int) test_mmx_psllq();
|
|
errors += (int) test_mmx_psllqi();
|
|
errors += (int) test_mmx_psraw();
|
|
errors += (int) test_mmx_psrawi();
|
|
errors += (int) test_mmx_psrad();
|
|
errors += (int) test_mmx_psradi();
|
|
errors += (int) test_mmx_psrld();
|
|
errors += (int) test_mmx_psrldi();
|
|
errors += (int) test_mmx_psrlq();
|
|
errors += (int) test_mmx_psrlqi();
|
|
errors += (int) test_mmx_psrlw();
|
|
errors += (int) test_mmx_psrlwi();
|
|
|
|
|
|
printf("Errors: %d\n", errors);
|
|
return errors;
|
|
}
|
|
|