0

I am trying to implement the AES-NI on 256 bits blocks . I have modified the codes based on the Intel whitepaper and below is my modification.

extern "C"  __declspec(dllexport) void AES_CBC_encrypt(const unsigned char *in,
    unsigned char *out,
    unsigned char ivec[32],
    unsigned long length,
    unsigned char *key1,
    int number_of_rounds)

{
    __m128i RIJNDAEL256_MASK =
        _mm_set_epi32(0x03020d0c, 0x0f0e0908, 0x0b0a0504, 0x07060100);

    __m128i BLEND_MASK=
        _mm_set_epi32(0x80000000, 0x80800000, 0x80800000, 0x80808000);
    __m128i tmp1, tmp2, data1 ,data2;
    UCHAR _ks[1028];
    UCHAR* ks=_ks;
    expand256Key((UCHAR *)key1,ks);
    __m128i *key = (__m128i*)ks;

    __m128i feedback1,feedback2;
    int i,j;
    if (length%32)
        length = length/32+1;
    else length /=32;

    feedback1=_mm_loadu_si128(&((__m128i*)ivec)[0]) ;
    feedback2=_mm_loadu_si128(&((__m128i*)ivec)[1]) ;

    for(i=0; i < length; i++){
        data1 = _mm_loadu_si128(&((__m128i*)in)[i*2+0]); /* load data block */
        data2 = _mm_loadu_si128(&((__m128i*)in)[i*2+1]);
        //data = _mm_loadu_si128 (&((__m128i*)in)[i]);
        feedback1 = _mm_xor_si128 (data1,feedback1);
        feedback2 = _mm_xor_si128 (data2,feedback2);
        feedback1 = _mm_xor_si128 (feedback1,key[0]);  //this line causing me error
        feedback2 = _mm_xor_si128(feedback2,key[1]);
        for(j=1; j <number_of_rounds; j++)
        {
            //feedback1 = _mm_aesenc_si128 (feedback1,((__m128i*)key)[j]);

            /*Blend to compensate for the shift rows shifts bytes between two
            128 bit blocks*/
            tmp1 = _mm_blendv_epi8(feedback1, feedback2, BLEND_MASK);
            tmp2 = _mm_blendv_epi8(feedback2, feedback1, BLEND_MASK);
            /*Shuffle that compensates for the additional shift in rows 3 and 4
            as opposed to rijndael128 (AES)*/
            tmp1 = _mm_shuffle_epi8(tmp1, RIJNDAEL256_MASK);
            tmp2 = _mm_shuffle_epi8(tmp2, RIJNDAEL256_MASK);
            /*This is the encryption step that includes sub bytes, shift rows,
            mix columns, xor with round key*/
            feedback1 = _mm_aesenc_si128(tmp1, key[j*2]);
            feedback2 = _mm_aesenc_si128(tmp2, key[j*2+1]);
        }
        //feedback1 = _mm_aesenclast_si128 (feedback1,key[j]);
        //_mm_storeu_si128 (&((__m128i*)out)[i],feedback1);


        tmp1 = _mm_blendv_epi8(feedback1, feedback2, BLEND_MASK);
        tmp2 = _mm_blendv_epi8(feedback2, feedback1, BLEND_MASK);
        tmp1 = _mm_shuffle_epi8(tmp1, RIJNDAEL256_MASK);
        tmp2 = _mm_shuffle_epi8(tmp2, RIJNDAEL256_MASK);
        tmp1 = _mm_aesenclast_si128(tmp1, key[j*2+0]); /*last AES round */
        tmp2 = _mm_aesenclast_si128(tmp2, key[j*2+1]);
        _mm_storeu_si128(&((__m128i*)out)[i*2+0],tmp1);
        _mm_storeu_si128(&((__m128i*)out)[i*2+1],tmp2);
    }
}

However, at the line feedback1 = _mm_xor_si128 (feedback1,key[0]); I am getting an exception of System.AccessViolationException . I previously posted a similar question and I had that fixed by aligning my keyschedule, but it doesn't seem to work in this case and I have no idea where else to look at. Would be great if someone could give me some pointers or point me to the correct direction to troubleshoot this problem. Thank you.

user1926691
  • 173
  • 1
  • 3
  • 17
  • So, you have debugged that code to the point where it fails, checked that `feedback1` and `key` are initialized, have proper length and are aligned, and you are still getting that exception? – Oleg Estekhin Jul 23 '14 at 07:00
  • @OlegEstekhin Yes . That is correct. Would like to ask if I am correct in thinking that the problem might lie with key ? As the _mm_xor operation on data1 and feedback1 seems to work fine . – user1926691 Jul 23 '14 at 07:59
  • 1
    @user1926691 - I don't know if you are monitoring this thread any more but did you ever get this working? If so, would you mind sharing? – Neil Weicher Apr 10 '18 at 07:26

0 Answers0