gcc native를 쓰는 법과, #include
gcc vector extension에 대한 설명은 찾기가 어려운 편이고, 어셈 쓰듯이 하나씩 익히면 된다..
결국 gcc 와 intel 을 다 봐야 한다.
gcc를 쓰려면 형선언까지 해야 된다. 이게 뭐야...
비교 연산 테스트. avx512는 아직 구경 못해봤는데 막 제온 파이 같은 걸 사야 들어있나??
#include <stdio.h>
typedef int v4si __attribute__ ((vector_size (16)));
typedef char v16qi __attribute__ ((vector_size (16)));
#if 0
v32qi __builtin_ia32_pcmpeqb256 (v32qi,v32qi)
v16hi __builtin_ia32_pcmpeqw256 (v16hi,v16hi)
v8si __builtin_ia32_pcmpeqd256 (c8si,v8si)
v4di __builtin_ia32_pcmpeqq256 (v4di,v4di)
v32qi __builtin_ia32_pcmpgtb256 (v32qi,v32qi)
v16hi __builtin_ia32_pcmpgtw256 (16hi,v16hi)
v8si __builtin_ia32_pcmpgtd256 (v8si,v8si)
v4di __builtin_ia32_pcmpgtq256 (v4di,v4di)
v16qi __builtin_ia32_pcmpestrm128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestri128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestria128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestric128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestrio128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestris128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestriz128 (v16qi, int, v16qi, int, const int)
v16qi __builtin_ia32_pcmpistrm128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistri128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistria128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistric128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistrio128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistris128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistriz128 (v16qi, v16qi, const int)
v2di __builtin_ia32_pcmpgtq (v2di, v2di)
#endif
#define _SIDD_UBYTE_OPS 0x01 // unsigned 8-bit characters
#define _SIDD_UWORD_OPS 0x02 // unsigned 16-bit characters
#define _SIDD_SBYTE_OPS 0x04 // signed 8-bit characters
#define _SIDD_SWORD_OPS 0x08 // signed 16-bit characters
#define _SIDD_CMP_EQUAL_ANY 0x10// compare equal any
#define _SIDD_CMP_RANGES 0x20// compare ranges
#define _SIDD_CMP_EQUAL_EACH 0x40// compare equal each
#define _SIDD_CMP_EQUAL_ORDERED 0x80// compare equal ordered
#define _SIDD_NEGATIVE_POLARITY 0x100// negate results
#define _SIDD_MASKED_NEGATIVE_POLARITY 0x200// negate results only before end of string
#define _SIDD_LEAST_SIGNIFICANT 0x400// index only: return last significant bit
#define _SIDD_MOST_SIGNIFICANT 0x800// index only: return most significant bit
#define _SIDD_BIT_MASK 0x1000// mask only: return bit mask
#define _SIDD_UNIT_MASK 0x2000// mask only: return byte/word mask
typedef char v32qi __attribute__ ((vector_size (32)));
int main(int argc, char *argv[])
{
printf("hello world\n");
v32qi kk = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
v32qi qq = { 3, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
v32qi yy;
v16qi s1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
v16qi s2 = { 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
v16qi r1;
int k;
yy = __builtin_ia32_pcmpgtb256(kk, qq);
k = __builtin_ia32_pcmpistri128(s1, s2, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY);
int i;
for (i = 0; i < 16; i++) {
printf("%d ", yy[i]);
}
printf("%d\n", k);
return 0;
}