I am trying to implement the AES-NI on 256 bits blocks . I have modified the codes based on the Intel whitepaper and below is my modification.
extern "C" __declspec(dllexport) void AES_CBC_encrypt(const unsigned char *in,
unsigned char *out,
unsigned char ivec[32],
unsigned long length,
unsigned char *key1,
int number_of_rounds)
{
__m128i RIJNDAEL256_MASK =
_mm_set_epi32(0x03020d0c, 0x0f0e0908, 0x0b0a0504, 0x07060100);
__m128i BLEND_MASK=
_mm_set_epi32(0x80000000, 0x80800000, 0x80800000, 0x80808000);
__m128i tmp1, tmp2, data1 ,data2;
UCHAR _ks[1028];
UCHAR* ks=_ks;
expand256Key((UCHAR *)key1,ks);
__m128i *key = (__m128i*)ks;
__m128i feedback1,feedback2;
int i,j;
if (length%32)
length = length/32+1;
else length /=32;
feedback1=_mm_loadu_si128(&((__m128i*)ivec)[0]) ;
feedback2=_mm_loadu_si128(&((__m128i*)ivec)[1]) ;
for(i=0; i < length; i++){
data1 = _mm_loadu_si128(&((__m128i*)in)[i*2+0]); /* load data block */
data2 = _mm_loadu_si128(&((__m128i*)in)[i*2+1]);
//data = _mm_loadu_si128 (&((__m128i*)in)[i]);
feedback1 = _mm_xor_si128 (data1,feedback1);
feedback2 = _mm_xor_si128 (data2,feedback2);
feedback1 = _mm_xor_si128 (feedback1,key[0]); //this line causing me error
feedback2 = _mm_xor_si128(feedback2,key[1]);
for(j=1; j <number_of_rounds; j++)
{
//feedback1 = _mm_aesenc_si128 (feedback1,((__m128i*)key)[j]);
/*Blend to compensate for the shift rows shifts bytes between two
128 bit blocks*/
tmp1 = _mm_blendv_epi8(feedback1, feedback2, BLEND_MASK);
tmp2 = _mm_blendv_epi8(feedback2, feedback1, BLEND_MASK);
/*Shuffle that compensates for the additional shift in rows 3 and 4
as opposed to rijndael128 (AES)*/
tmp1 = _mm_shuffle_epi8(tmp1, RIJNDAEL256_MASK);
tmp2 = _mm_shuffle_epi8(tmp2, RIJNDAEL256_MASK);
/*This is the encryption step that includes sub bytes, shift rows,
mix columns, xor with round key*/
feedback1 = _mm_aesenc_si128(tmp1, key[j*2]);
feedback2 = _mm_aesenc_si128(tmp2, key[j*2+1]);
}
//feedback1 = _mm_aesenclast_si128 (feedback1,key[j]);
//_mm_storeu_si128 (&((__m128i*)out)[i],feedback1);
tmp1 = _mm_blendv_epi8(feedback1, feedback2, BLEND_MASK);
tmp2 = _mm_blendv_epi8(feedback2, feedback1, BLEND_MASK);
tmp1 = _mm_shuffle_epi8(tmp1, RIJNDAEL256_MASK);
tmp2 = _mm_shuffle_epi8(tmp2, RIJNDAEL256_MASK);
tmp1 = _mm_aesenclast_si128(tmp1, key[j*2+0]); /*last AES round */
tmp2 = _mm_aesenclast_si128(tmp2, key[j*2+1]);
_mm_storeu_si128(&((__m128i*)out)[i*2+0],tmp1);
_mm_storeu_si128(&((__m128i*)out)[i*2+1],tmp2);
}
}
However, at the line feedback1 = _mm_xor_si128 (feedback1,key[0]);
I am getting an exception of System.AccessViolationException . I previously posted a similar question and I had that fixed by aligning my keyschedule, but it doesn't seem to work in this case and I have no idea where else to look at.
Would be great if someone could give me some pointers or point me to the correct direction to troubleshoot this problem.
Thank you.