1

The following posting offers guidance on how to implment a carryless mulitplicative inverse. However, a simple implementation for 8-bit value pairs does not offer the expected results.

static uint cl_mul(uint a, uint b)
{
    uint r = 0;
    while (b != 0)
    {
        if ((a & 1) != 0)
            r ^= b;      // carryless addition is xor
        a >>= 1;
        b <<= 1;
    }
    return r;
}

static uint clmulinv(uint x)
{
    uint inv = 1;
    uint rem = x;
    for (int i = 1; i < 32; i++)
    {
        if (((rem >> i) & 1) != 0)
        {
            rem ^= x << i;
            inv |= 1u << i;
        }
    }
    return inv;
}

int main(int argc, char *argv[])
{
    
    uint16_t cc=0,dd=0;       
    uint8_t  c=0,d=0;
    uint16_t t=0,e=0, k=0;
    
    
    for(cc = 0; cc < 256; cc++)  {
        for(dd = 0; dd < 256; dd++) {
    
            c = cc;
            d = dd;
            
            t = cl_mul(d,c);
            e = clmulinv(t);


            if((t & 0x1) && ((e == d) || (e == c))) {
                printf("k %4llu  c %4hhu    d %4hhu  t  %4llu  e %4hhu   \n",k,c,d,t,e);
                k++;
                }
            }
        }
            
}  

For 8-bit inputs [c] and [d] I obtain result [t] which, if odd, i use as input for the inverse function to obtain [e]. Comparing [e] with both [c] and [d] does not offer much joy. Whats wrong?

k    0  c    1  d    1          t      1        e    1
k    1  c    5  d  255          t    771        e  255
k    2  c   17  d   85          t   1285        e   85
k    3  c   21  d  219          t   3591        e  219
k    4  c   51  d   85          t   3855        e   51
k    5  c   65  d   73          t   4617        e   73
k    6  c   69  d  151          t   9995        e  151
k    7  c   73  d   65          t   4617        e   73
k    8  c   81  d  157          t   11789       e  157
k    9  c   85  d   17          t   1285        e   85
k   10  c   85  d   51          t   3855        e   51
k   11  c  151  d   69          t   9995        e  151
k   12  c  157  d   81          t   11789       e  157
k   13  c  219  d   21          t   3591        e  219
k   14  c  255  d    5          t    771        e  255

1 Answers1

2

Sadly this code does not correctly implement multiplication in GF(2n). We obtain GF(2n) by working in the ring GF(2)[x] modulo some irreducible polynomial in x of degree n. The code here uses xp, which is not irreducible. Whenever the argument x of clmulinv is odd, there is no inverse.

For GF(28), AES uses the polynomial x8 + x4 + x3 + x + 1. This means that, whenever the high bit of b is set, we need to XOR b by 24 + 23 + 21 + 20 = 27 after the left shift.

Repaired C code below.

#include <assert.h>
#include <stdio.h>

enum { poly = (1 << 8) + (1 << 4) + (1 << 3) + (1 << 1) + (1 << 0) };

unsigned gf2_mul(unsigned a, unsigned b) {
  unsigned r = 0;
  while (a) {
    if (a & 1)
      r ^= b;
    a >>= 1;
    b <<= 1;
    if (b & (1 << 8))
      b ^= poly;
  }
  return r;
}

unsigned poly_mul(unsigned a, unsigned b) {
  unsigned r = 0;
  while (a) {
    if (a & 1)
      r ^= b;
    a >>= 1;
    b <<= 1;
  }
  return r;
}

void poly_divmod(unsigned a, unsigned b, unsigned *q_out, unsigned *r_out) {
  unsigned orig_a = a;
  unsigned q = 0;
  for (int i = __builtin_clz(b) - __builtin_clz(a); 0 <= i; i--) {
    assert(orig_a == (poly_mul(q, b) ^ a));
    unsigned c = a ^ (b << i);
    if (c < a) {
      q ^= 1 << i;
      a = c;
    }
  }
  *q_out = q;
  *r_out = a;
}

// extended Euclid
unsigned gf2_inv(unsigned a) {
  unsigned b = poly;
  unsigned orig_a = a;
  unsigned orig_b = b;
  unsigned s = 1, t = 0;
  unsigned u = 0, v = 1;
  while (a) {
    assert(a == (poly_mul(s, orig_a) ^ poly_mul(t, orig_b)));
    assert(b == (poly_mul(u, orig_a) ^ poly_mul(v, orig_b)));
    unsigned q, r;
    poly_divmod(b, a, &q, &r);
    assert(b == (poly_mul(q, a) ^ r));
    unsigned x = u ^ poly_mul(q, s);
    unsigned y = v ^ poly_mul(q, t);
    b = a;
    u = s;
    v = t;
    a = r;
    s = x;
    t = y;
  }
  return u;
}

int main() {
  for (unsigned a = 0; a < (1 << 8); a++) {
    assert(gf2_mul(a, 1) == a);
    if (a)
      assert(gf2_mul(a, gf2_inv(a)) == 1);
    for (unsigned b = 0; b < (1 << 8); b++) {
      assert(gf2_mul(a, b) == gf2_mul(b, a));
      for (unsigned c = 0; c < (1 << 8); c++) {
        assert(gf2_mul(a, gf2_mul(b, c)) == gf2_mul(gf2_mul(a, b), c));
        assert(gf2_mul(a ^ b, c) == (gf2_mul(a, c) ^ gf2_mul(b, c)));
      }
    }
  }
}
David Eisenstat
  • 64,237
  • 7
  • 60
  • 120
  • I updated to include this line b ^= 27; although now the outer for loop stops at 1. Should i mod() the output? – affluentbarnburner Aug 16 '22 at 00:31
  • @affluentbarnburner I repaired the code. – David Eisenstat Aug 16 '22 at 01:43
  • HI David - got derailed last summer - thank you for your updated code - question pls: unsigned gf2_mul(unsigned a, unsigned b) is designed to return a 16-bit number, which is logically true because if [a] is 0x80 then result = [b] ^ ([b] << 7) - however your gf2_mul never does when presented 8-bit numbers for [a] and [b] - am i misunderstanding ? – affluentbarnburner Mar 26 '23 at 19:35
  • It's designed to multiply two elements of GF(2^8), so no, it never will. I used a wider type out of habit. – David Eisenstat Mar 26 '23 at 20:06