[t:/]$ 지식_

gcc simd compare

2018/04/13

gcc native를 쓰는 법과, #include 를 해서 인텔셋을 쓰는 방법이 있는데, 나는 인텔셋만 썼었다.

gcc vector extension에 대한 설명은 찾기가 어려운 편이고, 어셈 쓰듯이 하나씩 익히면 된다..

결국 gcc 와 intel 을 다 봐야 한다.

gcc를 쓰려면 형선언까지 해야 된다. 이게 뭐야...

비교 연산 테스트. avx512는 아직 구경 못해봤는데 막 제온 파이 같은 걸 사야 들어있나??


#include <stdio.h>
typedef int v4si __attribute__ ((vector_size (16)));
typedef char v16qi __attribute__ ((vector_size (16)));

#if 0
v32qi __builtin_ia32_pcmpeqb256 (v32qi,v32qi)
v16hi __builtin_ia32_pcmpeqw256 (v16hi,v16hi)
v8si __builtin_ia32_pcmpeqd256 (c8si,v8si)
v4di __builtin_ia32_pcmpeqq256 (v4di,v4di)
v32qi __builtin_ia32_pcmpgtb256 (v32qi,v32qi)
v16hi __builtin_ia32_pcmpgtw256 (16hi,v16hi)
v8si __builtin_ia32_pcmpgtd256 (v8si,v8si)
v4di __builtin_ia32_pcmpgtq256 (v4di,v4di)

v16qi __builtin_ia32_pcmpestrm128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestri128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestria128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestric128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestrio128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestris128 (v16qi, int, v16qi, int, const int)
int __builtin_ia32_pcmpestriz128 (v16qi, int, v16qi, int, const int)
v16qi __builtin_ia32_pcmpistrm128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistri128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistria128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistric128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistrio128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistris128 (v16qi, v16qi, const int)
int __builtin_ia32_pcmpistriz128 (v16qi, v16qi, const int)
v2di __builtin_ia32_pcmpgtq (v2di, v2di)

#endif 

#define    _SIDD_UBYTE_OPS                0x01 // unsigned 8-bit characters
#define    _SIDD_UWORD_OPS                0x02 // unsigned 16-bit characters
#define    _SIDD_SBYTE_OPS                0x04 // signed 8-bit characters
#define    _SIDD_SWORD_OPS                0x08 // signed 16-bit characters
#define    _SIDD_CMP_EQUAL_ANY            0x10// compare equal any
#define    _SIDD_CMP_RANGES               0x20// compare ranges
#define    _SIDD_CMP_EQUAL_EACH           0x40// compare equal each
#define    _SIDD_CMP_EQUAL_ORDERED        0x80// compare equal ordered
#define    _SIDD_NEGATIVE_POLARITY        0x100// negate results
#define    _SIDD_MASKED_NEGATIVE_POLARITY 0x200// negate results only before end of string
#define    _SIDD_LEAST_SIGNIFICANT        0x400// index only: return last significant bit
#define    _SIDD_MOST_SIGNIFICANT         0x800// index only: return most significant bit
#define    _SIDD_BIT_MASK                 0x1000// mask only: return bit mask
#define    _SIDD_UNIT_MASK                0x2000// mask only: return byte/word mask

typedef char v32qi __attribute__ ((vector_size (32)));

int main(int argc, char *argv[])
{
     printf("hello world\n");

     v32qi kk = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
     v32qi qq = { 3, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
     v32qi yy;

     v16qi s1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
     v16qi s2 = { 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
     v16qi r1;
     int k;

     yy = __builtin_ia32_pcmpgtb256(kk, qq);
     k = __builtin_ia32_pcmpistri128(s1, s2, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY);

     int i;

     for (i = 0; i < 16; i++) {
        printf("%d ", yy[i]);
    }

     printf("%d\n", k);

     return 0;
}




공유하기













[t:/] is not "technology - root". dawnsea, rss