My intention is to calculate TRON wallet address thru private key. Able to do it with CPU but not able to get the right address with GPU. I think for the GPU version, problems lies on keccak256 part.
# CPU Version
def derive_tron_addr(private_key):
sk = ecdsa.SigningKey.from_string(bytes.fromhex(private_key), curve=ecdsa.SECP256k1)
vk = sk.get_verifying_key()
public_key = b'x04' + vk.to_string()
public_key = public_key[1:]
sha3 = hashlib.sha3_256()
sha3.update(public_key)
keccak_hash = sha3.digest()
keccak_hash = keccak.new(digest_bits=256)
keccak_hash.update(public_key)
keccak_hash = keccak_hash.hexdigest()
address = keccak_hash[-40:]
address = b'x41' + bytes.fromhex(address)
sha256_hash = hashlib.sha256(address).digest()
checksum = hashlib.sha256(sha256_hash).digest()[:4]
address_with_checksum = address + checksum
tron_address = base58.b58encode(address_with_checksum).decode()
return tron_address
The above functions getting the address correctly.
# GPU Version
script_dir = os.path.dirname(__file__)
sha3_path = os.path.join(script_dir, 'sha3.cu')
sha256_path = os.path.join(script_dir, 'sha256.cu')
with open(sha3_path, "r") as f:
sha3_cu_code = f.read()
with open(sha256_path, "r") as f:
sha256_cu_code = f.read()
cuda_code = f"""
#include <stdint.h>
{sha256_cu_code}
{sha3_cu_code}
extern "C" {{
__global__ void hash_keys(char *input, char *output, int n, int key_len) {{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < n) {{
char key[64];
memcpy(key, input + idx * key_len, key_len);
char hash[32];
keccak256(key, key_len, (unsigned char*)hash, 32);
memcpy(output + idx * 32, hash, 32);
uint8_t address[21];
address[0] = 0x41;
memcpy(address + 1, hash + 12, 20);
uint8_t checksum[32];
sha256((const uint8_t*)address, 21, checksum);
sha256(checksum, 32, checksum);
uint8_t address_with_checksum[25];
memcpy(address_with_checksum, address, 21);
memcpy(address_with_checksum + 21, checksum, 4);
memcpy(output + idx * 57, address_with_checksum, 25);
}}
}}
}}
"""
mod = SourceModule(cuda_code)
hash_keys = mod.get_function("hash_keys")
def derive_tron_address_from_key(private_key):
sk = ecdsa.SigningKey.from_string(bytes.fromhex(private_key), curve=ecdsa.SECP256k1)
vk = sk.get_verifying_key()
public_key = b'x04' + vk.to_string()
public_key = public_key[1:]
keys_np = np.array([list(public_key)], dtype=np.uint8)
keys_gpu = cuda.mem_alloc(keys_np.nbytes)
result_gpu = cuda.mem_alloc(len(keys_np) * 57) # Length of address with checksum
cuda.memcpy_htod(keys_gpu, keys_np)
block_size = 256
grid_size = (len(keys_np) + block_size - 1) // block_size
hash_keys(keys_gpu, result_gpu, np.int32(len(keys_np)), np.int32(len(keys_np[0])), block=(block_size, 1, 1), grid=(grid_size, 1))
result = np.empty(len(keys_np) * 57, dtype=np.uint8)
cuda.memcpy_dtoh(result, result_gpu)
keccak_hash = result[:32].tobytes()
address_with_checksum = result[32:57].tobytes()
tron_address = base58_encode(address_with_checksum)
return tron_address
I need to calculate the Tron Wallet Address on GPU, and comparing efficiencies of both method.