I have a problem understanding this specific code and managing to convert it to Python from Javascript. The problem lies in the Buffer method used by Javascript which creates a different hash output than in Python. The main goal is to get the merkleRoot of the transactions ["a","b"].
Javascript: (The hashes of "a" and "b" individually are the same as with a python SHA256 implementation. However, the method (Buffer.concat([hashA, hashB])) makes the difference apparently, however I cannot figure out how to implement it in Python. In python I get a merkleRoot of "ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d", which is not correct. I posted the correct merkleRoot below.
const sha256 = (tx) => crypto.createHash("sha256").update(tx).digest();
const hashPair = (hashA, hashB, hashFunction = sha256) =>
hashFunction(Buffer.concat([hashA, hashB]));
const a = sha256("a");
const b = sha256("b");
hashPair(a, b).toString("hex");
e5a01fee14e0ed5c48714f22180f25ad8365b53f9779f79dc4a3d7e93963f94a
├─ ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb
└─ 3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d
I have tried some approaches like with base64 and encodings, however due to my limitation in cryptography knowledge I can't seem to figure out the right approach. My approach in python was:
- Get SHA256 of the string "a"
- Get SHA256 of the string "b"
- Get SHA256 of the concatenated hashes of "a"+"b": ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d
Here is the Python Implementation from: https://www.geeksforgeeks.org/introduction-to-merkle-tree/
Python:
# Python code for implemementing Merkle Tree
from typing import List
import hashlib
class Node:
def __init__(self, left, right, value: str, content, is_copied=False) -> None:
self.left: Node = left
self.right: Node = right
self.value = value
self.content = content
self.is_copied = is_copied
@staticmethod
def hash(val: str) -> str:
return hashlib.sha256(val.encode('utf-8')).hexdigest()
def __str__(self):
return (str(self.value))
def copy(self):
"""
class copy function
"""
return Node(self.left, self.right, self.value, self.content, True)
class MerkleTree:
def __init__(self, values: List[str]) -> None:
self.__buildTree(values)
def __buildTree(self, values: List[str]) -> None:
leaves: List[Node] = [Node(None, None, Node.hash(e), e) for e in values]
if len(leaves) % 2 == 1:
leaves.append(leaves[-1].copy()) # duplicate last elem if odd number of elements
self.root: Node = self.__buildTreeRec(leaves)
def __buildTreeRec(self, nodes: List[Node]) -> Node:
if len(nodes) % 2 == 1:
nodes.append(nodes[-1].copy()) # duplicate last elem if odd number of elements
half: int = len(nodes) // 2
if len(nodes) == 2:
return Node(nodes[0], nodes[1], Node.hash(nodes[0].value + nodes[1].value), nodes[0].content+"+"+nodes[1].content)
left: Node = self.__buildTreeRec(nodes[:half])
right: Node = self.__buildTreeRec(nodes[half:])
value: str = Node.hash(left.value + right.value)
content: str = f'{left.content}+{right.content}'
return Node(left, right, value, content)
def printTree(self) -> None:
self.__printTreeRec(self.root)
def __printTreeRec(self, node: Node) -> None:
if node != None:
if node.left != None:
print("Left: "+str(node.left))
print("Right: "+str(node.right))
else:
print("Input")
if node.is_copied:
print('(Padding)')
print("Value: "+str(node.value))
print("Content: "+str(node.content))
print("")
self.__printTreeRec(node.left)
self.__printTreeRec(node.right)
def getRootHash(self) -> str:
return self.root.value
def mixmerkletree() -> None:
elems = ["a", "b"]
#as there are odd number of inputs, the last input is repeated
print("Inputs: ")
print(*elems, sep=" | ")
print("")
mtree = MerkleTree(elems)
print("Root Hash: "+mtree.getRootHash()+"\n")
mtree.printTree()
mixmerkletree()
#This code was contributed by Pranay Arora (TSEC-2023).
Python Output:
Inputs:
a | b
Root Hash: 62af5c3cb8da3e4f25061e829ebeea5c7513c54949115b1acc225930a90154da
Left: ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb
Right: 3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d
Value: 62af5c3cb8da3e4f25061e829ebeea5c7513c54949115b1acc225930a90154da
Content: a+b
So my main question is, how can I correctly implement the Buffer method from javascript into Python to get the same hash of when combining the hashes of "a" and "b". The correct merkleRoot as shown above should be: e5a01fee14e0ed5c48714f22180f25ad8365b53f9779f79dc4a3d7e93963f94a