1

my systems is mac osx 10.8.5. The default gcc on this machine is 4.2 (i686-apple-darwin11-llvm-gcc-4.2 (GCC) 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)) i have installed gcc 4.9 on /usr/local.

I have a code that uses AVX2 intrinsics. Have attached the code with message.

#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
#include <sys/time.h>
#define SIZE 4
#define TIMES 1

void mmul(const float*a, const float* b, float*c){
    int a_vindex1[4] ={0,0,0,0};
    int b_vindex1[4] = {0,0,0,0};
    int m,i,j,k;
    __m128i  a_index, b_index;
    __m128 a1;
    for (i=0;i< SIZE*SIZE; i+= 1){
        m=(i/SIZE)*4 ;
        for (j=0;j<4;j++){
            b_vindex1[j] = i%SIZE+SIZE*j;
            a_vindex1[j] = m+j;
        }
        a_index = *(__m128i*)&a_vindex1[0];
        b_index = *(__m128i*)&b_vindex1[0];
        a1 = _mm_i32gather_ps(a, a_index, 1);
        printf("\nBINDEX %d,%d,%d,%d", b_vindex1[0],b_vindex1[1], b_vindex1[2], b_vindex1[3]);
        printf("\nAINDEX %d,%d,%d,%d", a_vindex1[0],a_vindex1[1], a_vindex1[2], a_vindex1[3]);
    }
}

int main(){
    float * a, *b,*c;
    int i,j;
    double timetotal = 0.0;
    struct timeval start,stop,start1, stop1;

    a=(float*)calloc(SIZE*SIZE, sizeof(float));
    b=(float*)calloc(SIZE*SIZE, sizeof(float));
    c=(float*)calloc(SIZE*SIZE, sizeof(float));
    for (i=0;i<SIZE*SIZE;i++){
        a[i] = i;
        b[i] = 0.5*i;
        }
    for (j=0;j<TIMES;j++){
        gettimeofday(&start,NULL);
        mmul(a,b,c);
        gettimeofday(&stop,NULL);
        timetotal += ((double)((stop.tv_sec-start.tv_sec)*1000000+ (stop.tv_usec-start.tv_usec)))/1000000;
    }
    //printf("\n time avegrare = %.8lf",timetotal/TIMES);
    return 0;
 }

Now if I compile this code with gcc 4.9 as,

gcc-4.9 -O3 -march=core-avx2 a7.c, I get the following error messages,

/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:141:no such instruction: `vmovd %r8d, %xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:143:no such instruction: `vmovapd LC15(%rip), %ymm3'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:145:no such instruction: `vbroadcastss %xmm7, %ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:147:no such instruction: `vpaddd LC13(%rip), %ymm0,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:148:no such instruction: `vpaddd LC14(%rip), %ymm0,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:149:no such instruction: `vcvtdq2ps %ymm1, %ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:150:no such instruction: `vmovups %ymm2, (%rcx)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:151:no such instruction: `vcvtdq2pd %xmm1, %ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:152:no such instruction: `vmulpd %ymm3, %ymm2,%ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:153:no such instruction: `vextracti128 $0x1, %ymm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:154:no such instruction: `vcvtpd2psy %ymm2, %xmm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:155:no such instruction: `vcvtdq2pd %xmm1, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:156:no such instruction: `vmulpd %ymm3, %ymm1,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:157:no such instruction: `vcvtpd2psy %ymm1, %xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:158:no such instruction: `vinsertf128 $0x1, %xmm1,%ymm2,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:159:no such instruction: `vmovups %ymm1, (%rax)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:162:no such instruction: `vcvtdq2ps %ymm0, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:163:no such instruction: `vmovups %ymm1, 32(%rcx)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:164:no such instruction: `vcvtdq2pd %xmm0, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:165:no such instruction: `vextracti128 $0x1, %ymm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:166:no such instruction: `vmulpd %ymm3, %ymm1,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:167:no such instruction: `vcvtdq2pd %xmm0, %ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:168:no such instruction: `vcvtpd2psy %ymm1, %xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:169:no such instruction: `vmulpd %ymm3, %ymm0,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:170:no such instruction: `vcvtpd2psy %ymm0, %xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:171:no such instruction: `vinsertf128 $0x1, %xmm0,%ymm1,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:172:no such instruction: `vmovups %ymm0, 32(%rax)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:178:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:179:no such instruction: `vcvtsi2ss %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:181:no such instruction: `vxorps %xmm5, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:182:no such instruction: `vmovsd LC16(%rip), %xmm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:183:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:184:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:185:no such instruction: `vcvtsi2sd %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:186:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:187:no such instruction: `vcvtsd2ss %xmm0, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:188:no such instruction: `vmovss %xmm5, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:192:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:193:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:195:no such instruction: `vxorps %xmm6, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:196:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:197:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:198:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:200:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:201:no such instruction: `vcvtsd2ss %xmm0, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:202:no such instruction: `vmovss %xmm6, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:205:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:206:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:208:no such instruction: `vxorps %xmm7, %xmm7,%xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:209:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:210:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:211:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:213:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:214:no such instruction: `vcvtsd2ss %xmm0, %xmm7,%xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:215:no such instruction: `vmovss %xmm7, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:218:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:219:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:221:no such instruction: `vxorps %xmm4, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:222:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:223:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:224:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:226:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:227:no such instruction: `vcvtsd2ss %xmm0, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:228:no such instruction: `vmovss %xmm4, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:231:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:232:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:234:no such instruction: `vxorps %xmm5, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:236:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:237:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:238:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:239:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:240:no such instruction: `vcvtsd2ss %xmm0, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:241:no such instruction: `vmovss %xmm5, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:244:no such instruction: `vxorpd %xmm1, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:245:no such instruction: `vcvtsi2sd %edi, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:246:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:248:no such instruction: `vcvtsi2ss %edi, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:249:no such instruction: `vxorps %xmm6, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:251:no such instruction: `vmulsd %xmm2, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:252:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:253:no such instruction: `vcvtsd2ss %xmm1, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:254:no such instruction: `vmovss %xmm6, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:257:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:258:no such instruction: `vcvtsi2ss %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:260:no such instruction: `vxorps %xmm4, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:261:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:262:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:263:no such instruction: `vcvtsi2sd %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:264:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:265:no such instruction: `vcvtsd2ss %xmm0, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:266:no such instruction: `vmovss %xmm4, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:270:no such instruction: `vzeroupper'

I guess the assembler needs to be updated when I use gcc-4.9? Or does it seem like some other issues... any pointer would help..

sepp2k
  • 363,768
  • 54
  • 674
  • 675
gayathri
  • 15
  • 1
  • 5

1 Answers1

5

Add the option:

-Wa,-q

to your compile.

Meaning comes from two sources. First the GCC manual:

-Wa,option

Pass option as an option to the assembler. If option contains commas, it is split into multiple options at the commas.

Then the AS (Mac OS X Mach-O GNU-based assemblers) manual:

-q Use the clang(1) integrated assembler instead of the GNU based system assembler. This is the default for the x86 and arm architectures.

Note this option is also necessary on later version of Darwin and GCC (tested for example under macOS 10.12 with GCC 6).

Eric Platon
  • 9,819
  • 6
  • 41
  • 48
worldofjr
  • 3,868
  • 8
  • 37
  • 49
  • **Please note:** This is a community wiki answer as a solution was posted by @IlyaVerbin in the comments. Please edit this answer if you have more information that will help future readers. – worldofjr Oct 29 '14 at 04:25
  • @IlyaVerbin - After adding -Wa,-q,gcc-4.9 throws the following error: "as: assembler (/opt/local/bin/clang) not installed". Should I set the path for the assembler here? – gayathri Oct 29 '14 at 06:25
  • @gayathri Hm, have you installed Xcode Command Line Tools? GCC tries to use `as` from clang, but can't find it. – Ilya Verbin Nov 03 '14 at 20:18