I am trying to implement D.J.Bernstein's Poly1305 algorithm. While going through his C implementation here in the poly1305_init
function I am not able to figure out what is the arithmetic tactic he has used in this part to achieve performance without timing attack:
void poly1305_init(poly1305_context *ctx, const unsigned char key[32]) {
poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
st->r[0] = (U8TO32(&key[ 0]) ) & 0x3ffffff;
st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03;
st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff;
st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff;
st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
......
......
}
typedef struct poly1305_state_internal_t {
unsigned long r[5];
unsigned long h[5];
unsigned long pad[4];
size_t leftover;
unsigned char buffer[poly1305_block_size];
unsigned char final;
} poly1305_state_internal_t;
typedef struct poly1305_context {
size_t aligner;
unsigned char opaque[136];
} poly1305_context;
I understood all the remaining part of the code in this file. Can anyone help me understand the logic he has used?