currently, I am studying the textbook “handbook of floating point arithmetic”. Im reading the chapter about the software implementation of floating point multiplication and so far i have implemented the pseudo code in c++.
here is my implementation:
#include <cstdint>
#include <iostream>
#include <bitset>
uint32_t maxu(uint32_t A, uint32_t B) {
return A > B ? A : B;
}
uint32_t mul(uint32_t A, uint32_t B) {
uint64_t t0, t1, t2;
t0 = A;
t1 = B;
t2 = (t0*t1) >> 32;
return t2;
}
uint32_t mullow(uint32_t A, uint32_t B) {
uint64_t t0, t1, t2;
t0 = A;
t1 = B;
t2 = (t0 * t1) & 0xFFFFFFFF; // Mask to extract lower 32 bits
return t2;
}
uint32_t nlz(uint32_t x) {
uint32_t z = 0;
if (x == 0) return 32;
if (x <= 0x0000FFFF) {
z = z + 16;
x = x << 16;
}
if (x <= 0x00FFFFFF) {
z = z + 8;
x = x << 8;
}
if (x <= 0x0FFFFFFF) {
z = z + 4;
x = x << 4;
}
if (x <= 0x3FFFFFFF) {
z = z + 2;
x = x << 2;
}
if (x <= 0x7FFFFFFF) {
z = z + 1;
}
return z;
}
uint32_t multiply(uint32_t x, uint32_t y) {
uint32_t absx, ex;
absx = x & 0x7FFFFFFF;
ex = absx >> 23; // exponent of x
uint32_t absy, ey;
absy = y & 0x7FFFFFFF;
ey = absy >> 23; //exponent of y
uint32_t nx, ny;
nx = absx >= 0x800000; // is normal bit for x
ny = absy >= 0x800000;// is normal bit for y
uint32_t mx, my, lambday, lambdax;
mx = maxu(nlz(absx), 8);
lambdax = mx - 8; //leading zeros of significand x
my = maxu(nlz(absy), 8);
lambday = my - 8; // PP
uint32_t dm1;
uint32_t mpx, mpy, highs, m, lows, c, g, hight, lowt, morlowt, b;
mpx = (x << mx) | 0x80000000; // normalize significand x
mpy = (y << my) | 0x80000000; // normalize significand y
highs = mul(mpx, mpy);
c = highs >= 0x80000000;
lows = mullow(mpx, mpy);
lowt = (lows != 0);
m = highs >> (7 + c);
morlowt = m | lowt;
g = (highs >> (6 + c)) & 1;
hight = (highs << (26 - c)) != 0;
b = g & (morlowt | hight);
dm1 = (((ex - nx) + (ey - ny)) - ((mx + my) + 110)) + c; // biased exponent
uint32_t sr = (x ^ y) & 0x80000000;
uint32_t result = ((sr | (dm1 << 23)) + m) + b;
//result = *reinterpret_cast<_Float16*>(&result);
return result;
}
int main() {
uint32_t x = 3.0;
uint32_t y = 5.0;
//function
uint32_t result = multiply(x, y);
uint32_t number = 15;
// Convert result to uint16_t to print its bits
//uint16_t result_bits = *reinterpret_cast<uint16_t*>(&result);
//
//Print the bits of the result
std::cout << "Bits of multiply(3.0, 5.0): " << std::bitset<32>(result) << std::endl;
std::cout << std::bitset<32>(number) << std::endl;
std::cout << mullow(5.0, 3.0) << std::endl;
std::cout << "result: " << result << std::endl;
return 0;
}
im trying to return the result which is a uint32_t
for now but the bit string for the result in main is 10101100011100000000000000000000
which isnt 15.
i would also like if you give me an idea of how i can make the function multiply
round to _Float16
. i dont know where in my implementation to fix this.
thanks