-1

I am looking to learn more about hashing algorithms, specifically the Message Digest 5 algorithm. And specifically in a slightly more up-to-date version of Python (3+.)

I know you can do the whole from hashlib import md5 thing, and that implementations of it in Python can be found online. However, I've found that the most recent one that mimics the original C code only works in Python version 2.4.X and that there is also a 3.2 version which is very condensed and not at all what I'm looking for (http://rosettacode.org/wiki/MD5/Implementation.)

After spending a few hours converting from JavaScript to Python 3.5, I can't get it right no matter how hard I try, the resulting hashes are always different than the ones from the hashlib module itself. Can anyone help me out?

EDIT: This is the JavaScript implementation I am converting from: How does MD5Sum algorithm work? This implementation works, I've verified it using the online tool that w3schools.com has.

Here is my current code (that still won't work.) The issue is most definitely in the bitwise operations, the main hashing function is not the issue since I directly copy and pasted from the JavaScript version which works

def newArray(num):
    array=[]
    for x in range(num):
        array.append(0)
    return array

def convertToWordArray(string):
    #print(string)
    lMessageLength=len(string)
    #print(lMessageLength)
    lNumberOfWords_temp1=lMessageLength+8
    #print(lNumberOfWords_temp1)
    lNumberOfWords_temp2=int((lNumberOfWords_temp1-(lNumberOfWords_temp1%64))/64)
    #print(lNumberOfWords_temp2)
    lNumberOfWords=(lNumberOfWords_temp2+1)*16
    #print(lNumberOfWords)
    lWordArray=newArray(lNumberOfWords-1)
    lBytePosition=0
    lByteCount=0
    while lByteCount<lMessageLength:
        lWordCount=int((lByteCount-(lByteCount%4))/4)
        lBytePosition=(lByteCount%4)*8
        #print(string[int(lByteCount)])
        lWordArray[lWordCount]=(lWordArray[lWordCount]|(ord(string[int(lByteCount)])<<lBytePosition))
        lByteCount+=1
    lWordCount=int((lByteCount-(lByteCount%4))/4)
    lBytePosition=(lByteCount%4)*8
    lWordArray[lWordCount]=lWordArray[lWordCount]|(0x80<<lBytePosition)
    lWordArray[lNumberOfWords-2]=lMessageLength<<3
    lWordArray.append(lMessageLength>>29)
    return lWordArray

def F(x,y,z):
    return (x & y)|((~x) & z)
def G(x,y,z):
    return (x & z)|(y & (~z))
def H(x,y,z):
    return (x ^ y ^ z)
def I(x,y,z):
    return (y ^ (x|(~z)))
def C(q,a,b,x,s,ac):
    return addu(rol(addu(addu(a,q),addu(x,ac)),s),b)
def FF(a,b,c,d,x,s,ac):
    return C((b & c)|((~b) & d),a,b,x,s,ac)
def GG(a,b,c,d,x,s,ac):
    return C((b & d)|(c & (~d)),a,b,x,s,ac)
def HH(a,b,c,d,x,s,ac):
    return C(b ^ c ^ d,a,b,x,s,ac)
def II(a,b,c,d,x,s,ac):
    return C(c ^ (b|(~d)),a,b,x,s,ac)
def addu(x,y):
    ls=(x & 0xFFFF)+(y & 0xFFFF)
    return (((x>>16)+(y>>16)+(ls>>16))<<16)|(ls & 0xFFFF)
def rol(v,s):
    return (v<<s)|(v>>(32-s))
def wordToHex(lValue):
    wordToHexValue=''
    wordToHexValue_temp=''
    for lCount in range(4):
        lByte=(lValue>>(lCount*8)) & 255
        wordToHexValue_temp="0"+format(lByte, 'x')
        wordToHexValue=wordToHexValue+wordToHexValue_temp[-2:]
    return wordToHexValue

def md5hash(message):
    x=convertToWordArray(message)
    a=0x67452301
    b=0xEFCDAB89
    c=0x98BADCFE
    d=0x10325476
    xl=len(x)
    j=0
    while j<xl:
        aa=a
        bb=b
        cc=c
        dd=d
        a=FF(a,b,c,d, x[j+0], 7,0xD76AA478)
        d=FF(d,a,b,c, x[j+1],12,0xE8C7B756)
        c=FF(c,d,a,b, x[j+2],17,0x242070DB)
        b=FF(b,c,d,a, x[j+3],22,0xC1BDCEEE)
        a=FF(a,b,c,d, x[j+4], 7,0xF57C0FAF)
        d=FF(d,a,b,c, x[j+5],12,0x4787C62A)
        c=FF(c,d,a,b, x[j+6],17,0xA8304613)
        b=FF(b,c,d,a, x[j+7],22,0xFD469501)
        a=FF(a,b,c,d, x[j+8], 7,0x698098D8)
        d=FF(d,a,b,c, x[j+9],12,0x8B44F7AF)
        c=FF(c,d,a,b,x[j+10],17,0xFFFF5BB1)
        b=FF(b,c,d,a,x[j+11],22,0x895CD7BE)
        a=FF(a,b,c,d,x[j+12], 7,0x6B901122)
        d=FF(d,a,b,c,x[j+13],12,0xFD987193)
        c=FF(c,d,a,b,x[j+14],17,0xA679438E)
        b=FF(b,c,d,a,x[j+15],22,0x49B40821)
        a=GG(a,b,c,d, x[j+1], 5,0xF61E2562)
        d=GG(d,a,b,c, x[j+6], 9,0xC040B340)
        c=GG(c,d,a,b,x[j+11],14,0x265E5A51)
        b=GG(b,c,d,a, x[j+0],20,0xE9B6C7AA)
        a=GG(a,b,c,d, x[j+5], 5,0xD62F105D)
        d=GG(d,a,b,c,x[j+10], 9,0x2441453)
        c=GG(c,d,a,b,x[j+15],14,0xD8A1E681)
        b=GG(b,c,d,a, x[j+4],20,0xE7D3FBC8)
        a=GG(a,b,c,d, x[j+9], 5,0x21E1CDE6)
        d=GG(d,a,b,c,x[j+14], 9,0xC33707D6)
        c=GG(c,d,a,b, x[j+3],14,0xF4D50D87)
        b=GG(b,c,d,a, x[j+8],20,0x455A14ED)
        a=GG(a,b,c,d,x[j+13], 5,0xA9E3E905)
        d=GG(d,a,b,c, x[j+2], 9,0xFCEFA3F8)
        c=GG(c,d,a,b, x[j+7],14,0x676F02D9)
        b=GG(b,c,d,a,x[j+12],20,0x8D2A4C8A)
        a=HH(a,b,c,d, x[j+5], 4,0xFFFA3942)
        d=HH(d,a,b,c, x[j+8],11,0x8771F681)
        c=HH(c,d,a,b,x[j+11],16,0x6D9D6122)
        b=HH(b,c,d,a,x[j+14],23,0xFDE5380C)
        a=HH(a,b,c,d, x[j+1], 4,0xA4BEEA44)
        d=HH(d,a,b,c, x[j+4],11,0x4BDECFA9)
        c=HH(c,d,a,b, x[j+7],16,0xF6BB4B60)
        b=HH(b,c,d,a,x[j+10],23,0xBEBFBC70)
        a=HH(a,b,c,d,x[j+13], 4,0x289B7EC6)
        d=HH(d,a,b,c, x[j+0],11,0xEAA127FA)
        c=HH(c,d,a,b, x[j+3],16,0xD4EF3085)
        b=HH(b,c,d,a, x[j+6],23,0x4881D05)
        a=HH(a,b,c,d, x[j+9], 4,0xD9D4D039)
        d=HH(d,a,b,c,x[j+12],11,0xE6DB99E5)
        c=HH(c,d,a,b,x[j+15],16,0x1FA27CF8)
        b=HH(b,c,d,a, x[j+2],23,0xC4AC5665)
        a=II(a,b,c,d, x[j+0], 6,0xF4292244)
        d=II(d,a,b,c, x[j+7],10,0x432AFF97)
        c=II(c,d,a,b,x[j+14],15,0xAB9423A7)
        b=II(b,c,d,a, x[j+5],21,0xFC93A039)
        a=II(a,b,c,d,x[j+12], 6,0x655B59C3)
        d=II(d,a,b,c, x[j+3],10,0x8F0CCC92)
        c=II(c,d,a,b,x[j+10],15,0xFFEFF47D)
        b=II(b,c,d,a, x[j+1],21,0x85845DD1)
        a=II(a,b,c,d, x[j+8], 6,0x6FA87E4F)
        d=II(d,a,b,c,x[j+15],10,0xFE2CE6E0)
        c=II(c,d,a,b, x[j+6],15,0xA3014314)
        b=II(b,c,d,a,x[j+13],21,0x4E0811A1)
        a=II(a,b,c,d, x[j+4], 6,0xF7537E82)
        d=II(d,a,b,c,x[j+11],10,0xBD3AF235)
        c=II(c,d,a,b, x[j+2],15,0x2AD7D2BB)
        b=II(b,c,d,a, x[j+9],21,0xEB86D391)
        a=addu(a,aa)
        b=addu(b,bb)
        c=addu(c,cc)
        d=addu(d,dd)
        j+=16
    return (wordToHex(a)+wordToHex(b)+wordToHex(c)+wordToHex(d)).lower()
Cœur
  • 37,241
  • 25
  • 195
  • 267
Mick Ashton
  • 356
  • 4
  • 15
  • 2
    if you post your code, with specific questions pertaining to errors in your code then yeah, people will help you out. Right now? doesn't seem likely :) – R Nar Dec 11 '15 at 16:59
  • What JavaScript implementation of md5 are you using? How are you feeding data to the digest? Have you verified there's no implicit decode/encode happening during update? **Show your test code!** – dhke Dec 11 '15 at 17:00
  • I've added it as an edit for you guys – Mick Ashton Dec 11 '15 at 17:09
  • @Jaco I want the full code, not the hashlib module (written in C btw) – Mick Ashton Dec 11 '15 at 17:10
  • Dinu Gherman has a pure Python version [here](http://starship.python.net/~gherman/programs/md5py/md5py.py) that seems to more closely mimic the JavaScript version you posted. However, as advice from experience: Trying to refactor the inner workings of cryptographic algorithms from source code is usually a waste of time. The [RFC](https://tools.ietf.org/html/rfc1321) has a short abstract explanation, however. – dhke Dec 11 '15 at 17:16
  • @dhke this is the 2.4 version I said I didn't want... I would like a repair of my code or a 3.4 un-condensed version, but thanks for the tip. – Mick Ashton Dec 11 '15 at 17:18
  • This is not the prettiest implementation I have seen, it is going to take some time to debug this! – Alex Dec 11 '15 at 17:22
  • Why don't you debug the code? You can do it both in the js code, and in your python code. – fodma1 Dec 11 '15 at 17:29
  • @fodma1 I've tried, and still trying. I'll update this if I find my own solution – Mick Ashton Dec 11 '15 at 17:32

2 Answers2

1

I've figured it out, here is my new code. This time I tried utilising the 'Master FGHI Function' in the 2.4 version and it worked. Not sure what the true issue was with my original code, but this new method works fine in place of the FF, GG, HH, II functions.

def XX(func, a, b, c, d, x, s, ac):
    res=0
    res=res+a+func(b,c,d)
    res+=x
    res+=ac
    res=res & 0xffffffff
    res=rol(res,s)
    res=res & 0xffffffff
    res+=b
    return res & 0xffffffff

The full code for a Python 3.4 MD5 algorithm implementation is as follows:

def newArray(num):
    array=[]
    for x in range(num):
        array.append(0)
    return array

def convertToWordArray(string):
    #print(string)
    lMessageLength=len(string)
    #print(lMessageLength)
    lNumberOfWords_temp1=lMessageLength+8
    #print(lNumberOfWords_temp1)
    lNumberOfWords_temp2=(lNumberOfWords_temp1-(lNumberOfWords_temp1%64))/64
    #print(lNumberOfWords_temp2)
    lNumberOfWords=int((lNumberOfWords_temp2+1)*16)
    #print(lNumberOfWords)
    lWordArray=newArray(lNumberOfWords-1)
    lBytePosition=0
    lByteCount=0
    while lByteCount<lMessageLength:
        lWordCount=int((lByteCount-(lByteCount%4))/4)
        lBytePosition=(lByteCount%4)*8
        #print(string[int(lByteCount)])
        lWordArray[lWordCount]=(lWordArray[lWordCount]|(ord(string[int(lByteCount)])<<lBytePosition))
        lByteCount+=1
    lWordCount=int((lByteCount-(lByteCount%4))/4)
    lBytePosition=(lByteCount%4)*8
    lWordArray[lWordCount]=lWordArray[lWordCount]|(0x80<<lBytePosition)
    lWordArray[lNumberOfWords-2]=lMessageLength<<3
    lWordArray.append(lMessageLength>>29)
    return lWordArray

def F(x,y,z):
    print(x,y,x)
    return (x & y) | ((~x) & z)
def G(x,y,z):
    return (x & z) | (y & (~z))
def H(x,y,z):
    return x ^ y ^ z
def I(x,y,z):
    return y ^ (x | (~z))
def XX(func, a, b, c, d, x, s, ac):
    res=0
    res=res+a+func(b,c,d)
    res+=x
    res+=ac
    res=res & 0xffffffff
    res=rol(res,s)
    res=res & 0xffffffff
    res+=b
    return res & 0xffffffff
##    return addu(rol(addu(addu(a,q),addu(x,ac)),s),b)
##def FF(a,b,c,d,x,s,ac):
##    return C((b & c)|((~b) & d),a,b,x,s,ac)
##def GG(a,b,c,d,x,s,ac):
##    return C((b & d)|(c & (~d)),a,b,x,s,ac)
##def HH(a,b,c,d,x,s,ac):
##    return C(b ^ c ^ d,a,b,x,s,ac)
##def II(a,b,c,d,x,s,ac):
##    return C(c ^ (b|(~d)),a,b,x,s,ac)
def addu(x,y):
    ls=(x & 0xffffffff)+(y & 0xffffffff)
    return (((x>>16)+(y>>16)+(ls>>16))<<16)|(ls & 0xffffffff)
def rol(v,s):
    return (v<<s)|(v>>(32-s))
def wordToHex(lValue):
    wordToHexValue=''
    wordToHexValue_temp=''
    for lCount in range(4):
        lByte=(lValue>>(lCount*8)) & 255
        wordToHexValue_temp="0"+format(lByte, 'x')
        wordToHexValue=wordToHexValue+wordToHexValue_temp[-2:]
    return wordToHexValue

def md5hash(message):
    x=convertToWordArray(message)
    a=0x67452301
    b=0xEFCDAB89
    c=0x98BADCFE
    d=0x10325476
    xl=len(x)
    j=0
    while j<xl:
        aa=a
        bb=b
        cc=c
        dd=d
        a=XX(F,a,b,c,d, x[j+0], 7,0xD76AA478)
        d=XX(F,d,a,b,c, x[j+1],12,0xE8C7B756)
        c=XX(F,c,d,a,b, x[j+2],17,0x242070DB)
        b=XX(F,b,c,d,a, x[j+3],22,0xC1BDCEEE)
        a=XX(F,a,b,c,d, x[j+4], 7,0xF57C0FAF)
        d=XX(F,d,a,b,c, x[j+5],12,0x4787C62A)
        c=XX(F,c,d,a,b, x[j+6],17,0xA8304613)
        b=XX(F,b,c,d,a, x[j+7],22,0xFD469501)
        a=XX(F,a,b,c,d, x[j+8], 7,0x698098D8)
        d=XX(F,d,a,b,c, x[j+9],12,0x8B44F7AF)
        c=XX(F,c,d,a,b,x[j+10],17,0xFFFF5BB1)
        b=XX(F,b,c,d,a,x[j+11],22,0x895CD7BE)
        a=XX(F,a,b,c,d,x[j+12], 7,0x6B901122)
        d=XX(F,d,a,b,c,x[j+13],12,0xFD987193)
        c=XX(F,c,d,a,b,x[j+14],17,0xA679438E)
        b=XX(F,b,c,d,a,x[j+15],22,0x49B40821)
        a=XX(G,a,b,c,d, x[j+1], 5,0xF61E2562)
        d=XX(G,d,a,b,c, x[j+6], 9,0xC040B340)
        c=XX(G,c,d,a,b,x[j+11],14,0x265E5A51)
        b=XX(G,b,c,d,a, x[j+0],20,0xE9B6C7AA)
        a=XX(G,a,b,c,d, x[j+5], 5,0xD62F105D)
        d=XX(G,d,a,b,c,x[j+10], 9,0x2441453)
        c=XX(G,c,d,a,b,x[j+15],14,0xD8A1E681)
        b=XX(G,b,c,d,a, x[j+4],20,0xE7D3FBC8)
        a=XX(G,a,b,c,d, x[j+9], 5,0x21E1CDE6)
        d=XX(G,d,a,b,c,x[j+14], 9,0xC33707D6)
        c=XX(G,c,d,a,b, x[j+3],14,0xF4D50D87)
        b=XX(G,b,c,d,a, x[j+8],20,0x455A14ED)
        a=XX(G,a,b,c,d,x[j+13], 5,0xA9E3E905)
        d=XX(G,d,a,b,c, x[j+2], 9,0xFCEFA3F8)
        c=XX(G,c,d,a,b, x[j+7],14,0x676F02D9)
        b=XX(G,b,c,d,a,x[j+12],20,0x8D2A4C8A)
        a=XX(H,a,b,c,d, x[j+5], 4,0xFFFA3942)
        d=XX(H,d,a,b,c, x[j+8],11,0x8771F681)
        c=XX(H,c,d,a,b,x[j+11],16,0x6D9D6122)
        b=XX(H,b,c,d,a,x[j+14],23,0xFDE5380C)
        a=XX(H,a,b,c,d, x[j+1], 4,0xA4BEEA44)
        d=XX(H,d,a,b,c, x[j+4],11,0x4BDECFA9)
        c=XX(H,c,d,a,b, x[j+7],16,0xF6BB4B60)
        b=XX(H,b,c,d,a,x[j+10],23,0xBEBFBC70)
        a=XX(H,a,b,c,d,x[j+13], 4,0x289B7EC6)
        d=XX(H,d,a,b,c, x[j+0],11,0xEAA127FA)
        c=XX(H,c,d,a,b, x[j+3],16,0xD4EF3085)
        b=XX(H,b,c,d,a, x[j+6],23,0x4881D05)
        a=XX(H,a,b,c,d, x[j+9], 4,0xD9D4D039)
        d=XX(H,d,a,b,c,x[j+12],11,0xE6DB99E5)
        c=XX(H,c,d,a,b,x[j+15],16,0x1FA27CF8)
        b=XX(H,b,c,d,a, x[j+2],23,0xC4AC5665)
        a=XX(I,a,b,c,d, x[j+0], 6,0xF4292244)
        d=XX(I,d,a,b,c, x[j+7],10,0x432AFF97)
        c=XX(I,c,d,a,b,x[j+14],15,0xAB9423A7)
        b=XX(I,b,c,d,a, x[j+5],21,0xFC93A039)
        a=XX(I,a,b,c,d,x[j+12], 6,0x655B59C3)
        d=XX(I,d,a,b,c, x[j+3],10,0x8F0CCC92)
        c=XX(I,c,d,a,b,x[j+10],15,0xFFEFF47D)
        b=XX(I,b,c,d,a, x[j+1],21,0x85845DD1)
        a=XX(I,a,b,c,d, x[j+8], 6,0x6FA87E4F)
        d=XX(I,d,a,b,c,x[j+15],10,0xFE2CE6E0)
        c=XX(I,c,d,a,b, x[j+6],15,0xA3014314)
        b=XX(I,b,c,d,a,x[j+13],21,0x4E0811A1)
        a=XX(I,a,b,c,d, x[j+4], 6,0xF7537E82)
        d=XX(I,d,a,b,c,x[j+11],10,0xBD3AF235)
        c=XX(I,c,d,a,b, x[j+2],15,0x2AD7D2BB)
        b=XX(I,b,c,d,a, x[j+9],21,0xEB86D391)
        a=addu(a,aa)
        b=addu(b,bb)
        c=addu(c,cc)
        d=addu(d,dd)
        j+=16
    return (wordToHex(a)+wordToHex(b)+wordToHex(c)+wordToHex(d)).lower()
Mick Ashton
  • 356
  • 4
  • 15
  • 1
    Idk, maybe someone will find this useful. I just wanted to make it since it didn't really exist for this version (at least not easily accessible.) – Mick Ashton Aug 01 '16 at 17:32
  • it's a bit late but your code is not givng the correct output I checked it with hashlib library, please check – joedenly Apr 18 '20 at 12:36
0

I tried the new code but it didn't give the correct output, so I tried this code instead (which did work):

def addu(x,y):
    return (x + y) & 0xffffffff