Question
Create a C++ program that transform Decimal Numbers into Floating Points. The user has to input the Exponent E, and the Mantissa F, Given that F>E. When the conversion is over the program has to round up the floating point.
The code must be able to Add and Multiply two numbers in the Floating Point Format. It has to calculate the double precision error.
It has to have a GUI Interfase. Be Creative! :) Have Fun
Explanation / Answer
class Float16Compressor { union Bits { float f; int32_t si; uint32_t ui; }; static int const shift = 13; static int const shiftSign = 16; static int32_t const infN = 0x7F800000; // flt32 infinity static int32_t const maxN = 0x477FE000; // max flt16 normal as a flt32 static int32_t const minN = 0x38800000; // min flt16 normal as a flt32 static int32_t const signN = 0x80000000; // flt32 sign bit static int32_t const infC = infN >> shift; static int32_t const nanN = (infC + 1) shift; static int32_t const minC = minN >> shift; static int32_t const signC = signN >> shiftSign; // flt16 sign bit static int32_t const mulN = 0x52000000; // (1 = shiftSign; // logical shift s.si = mulN; s.si = s.f * v.f; // correct subnormals v.si ^= (s.si ^ v.si) & -(minN > v.si); v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN)); v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN)); v.ui >>= shift; // logical shift v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC); v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC); return v.ui | sign; } static float decompress(uint16_t value) { Bits v; v.ui = value; int32_t sign = v.si & signC; v.si ^= sign; sign maxC); Bits s; s.si = mulC; s.f *= v.si; int32_t mask = -(norC > v.si); v.si