diff --git a/include/universal/number/rational/numeric_limits.hpp b/include/universal/number/rational/numeric_limits.hpp index af346f08..4b55dbc7 100644 --- a/include/universal/number/rational/numeric_limits.hpp +++ b/include/universal/number/rational/numeric_limits.hpp @@ -25,43 +25,48 @@ A class that is a literal type is a class (defined with class, struct or union) template class numeric_limits< sw::universal::rational > { + using RationalType = rational; public: static constexpr bool is_specialized = true; - static constexpr long min() { return 1; } // return minimum value - static constexpr uint64_t max() { return INT64_MAX; } // return maximum value - static constexpr int64_t lowest() { return -INT64_MAX; } // return most negative value - static constexpr long epsilon() { // return smallest effective increment from 1.0 - return long(1); + static constexpr RationalType min() { return RationalType().minpos(); } // return minimum value + static constexpr RationalType max() { return RationalType().maxpos();; } // return maximum value + static constexpr RationalType lowest() { return RationalType().maxneg();; } // return most negative value + static constexpr RationalType epsilon() { // return smallest effective increment from 1.0 + // the pattern is this ratio 0.....1 / 01.....0 + RationalType r(0, 0); + r.setnbit(0); + r.setbits(nbits - 2); + return r; } - static constexpr long round_error() { // return largest rounding error + static constexpr RationalType round_error() { // return largest rounding error return long(0.5); } - static constexpr long denorm_min() { // return minimum denormalized value - return 1; + static constexpr RationalType denorm_min() { // return minimum denormalized value + return RationalType().minpos(); } - static constexpr uint64_t infinity() { // return positive infinity + static constexpr RationalType infinity() { // return positive infinity return INT64_MAX; } - static constexpr uint64_t quiet_NaN() { // return non-signaling NaN - return INT64_MAX; + static constexpr RationalType quiet_NaN() { // return non-signaling NaN + return RationalType(0, 0); } - static constexpr uint64_t signaling_NaN() { // return signaling NaN - return INT64_MAX; + static constexpr RationalType signaling_NaN() { // return signaling NaN + return RationalType(0, 0); } - static constexpr int digits = 3333333; - static constexpr int digits10 = 1000000; - static constexpr int max_digits10 = 1000000; + static constexpr int digits = nbits; + static constexpr int digits10 = 1000*nbits/3333; + static constexpr int max_digits10 = digits10+1; static constexpr bool is_signed = true; static constexpr bool is_integer = true; static constexpr bool is_exact = true; static constexpr int radix = 10; - static constexpr int min_exponent = 0; + static constexpr int min_exponent = 0; static constexpr int min_exponent10 = 0; - static constexpr int max_exponent = 0; + static constexpr int max_exponent = 0; static constexpr int max_exponent10 = 0; - static constexpr bool has_infinity = false; + static constexpr bool has_infinity = false; static constexpr bool has_quiet_NaN = false; static constexpr bool has_signaling_NaN = false; static constexpr float_denorm_style has_denorm = denorm_absent; diff --git a/include/universal/number/rational/rational.hpp b/include/universal/number/rational/rational.hpp index 32bddabd..5a46968c 100644 --- a/include/universal/number/rational/rational.hpp +++ b/include/universal/number/rational/rational.hpp @@ -64,15 +64,15 @@ namespace sw { namespace universal { // rational binary of 8bits - using rb8 = rational<8, uint8_t>; + using rb8 = rational<8, std::uint8_t>; // rational binary of 16bits - using rb16 = rational<16, uint16_t>; + using rb16 = rational<16, std::uint16_t>; // rational binary of 32bits - using rb32 = rational<32, uint32_t>; + using rb32 = rational<32, std::uint32_t>; // rational binary of 64bits - using rb64 = rational<64, uint64_t>; + using rb64 = rational<64, std::uint64_t>; // rational binary of 128bits - using rb128 = rational<128, uint32_t>; + using rb128 = rational<128, std::uint32_t>; }} diff --git a/include/universal/number/rational/rational_impl.hpp b/include/universal/number/rational/rational_impl.hpp index 8d818b45..de49cddf 100644 --- a/include/universal/number/rational/rational_impl.hpp +++ b/include/universal/number/rational/rational_impl.hpp @@ -32,26 +32,27 @@ inline rational& convert(const triple& v, rational -rational& minpos(rational& lminpos) { - return lminpos; +rational& minpos(rational& r) { + return r.minpos(); } template -rational& maxpos(rational& lmaxpos) { - return lmaxpos; +rational& maxpos(rational& r) { + return r.maxpos(); } template -rational& minneg(rational& lminneg) { - return lminneg; +rational& minneg(rational& r) { + return r.minneg(); } template -rational& maxneg(rational& lmaxneg) { - return lmaxneg; +rational& maxneg(rational& r) { + return r.maxneg(); } // template class representing a value in scientific notation, using a template size for the number of fraction bits -template +template class rational { public: + static constexpr unsigned nbits = _nbits; typedef bt BlockType; using SignedBlockBinary = blockbinary; @@ -211,8 +212,13 @@ class rational { constexpr void clear() noexcept { n = 0; d = 1; } constexpr void setzero() noexcept { n = 0; d = 1; } constexpr void setnan() noexcept { n = 0; d = 0; } - constexpr void set(const SignedBlockBinary& _n, const SignedBlockBinary& _d) noexcept { n = _n; d = _d; } + constexpr void set(const SignedBlockBinary& _n, const SignedBlockBinary& _d) noexcept { + n = _n; d = _d; + normalize(); + } constexpr void setbits(std::int64_t bits) noexcept { n = bits; d = 1; } + constexpr void setnbit(unsigned index) noexcept { n.set(index); } + constexpr void setdbit(unsigned index) noexcept { d.set(index); } // create specific number system values of interest constexpr rational& maxpos() noexcept { @@ -316,6 +322,14 @@ class rational { template::value, Real >::type> rational& convert_ieee754(Real rhs) noexcept { + if (std::isnan(rhs)) { + n = 0; d = 0; + return *this; + } + if (rhs == 0.0) { + n = 0; d = 1; + return *this; + } // extract components, convert mantissa to fraction with denominator 2^fbits, adjust fraction using scale, normalize uint64_t bits{ 0 }; uint64_t e{ 0 }, f{ 0 }; @@ -328,73 +342,108 @@ class rational { uint64_t a = f | ieee754_parameter::hmask; uint64_t b = ieee754_parameter::hmask; int exponent = static_cast(e - ieee754_parameter::bias); - std::cout << "exponent = " << exponent << '\n'; - std::cout << "a = " << to_binary(a) << '\n'; - std::cout << "b = " << to_binary(b) << '\n'; - if (a == b) { +// std::cout << "exponent = " << exponent << '\n'; +// std::cout << "a = " << to_binary(a) << '\n'; +// std::cout << "b = " << to_binary(b) << '\n'; + if (exponent == 0 && a == b) { n = 1; d = 1; } else { - // do we need to round the value or can we just throw the lower bits away? - // - // find the msb and shift it to the msb of the numerator - int msb = find_msb(a); - if (msb > nbits) { - int shift = 1 + msb - nbits; // one extra slot as we are shifting into a 2's complement encoding - a >>= shift; - b >>= shift; - } /* - // normalize the ratio - uint64_t r; - while (a % b > 0ull) { - r = a % b; - a = b; - b = r; - } + * two cases: + * exponent > 0 + * we need to scale the numerator + * 0000 0010 0100 0010 numerator + * 0000 0010 0000 0000 denominator + * we can shift the numerator up maximally (nbits - msb - 1) + * and after that we need to shift the denominator down maximally till the msb is on bit 0 + * + * exponent < 0 + * we need to scale the denominator + * 0000 0010 0100 0010 numerator + * 0000 0010 0000 0000 denominator + * we can shift the denominator up maximally (nbits - msb - 1) + * and after that we need to shift the numerator down maximally till the msb is on bit 0 */ - std::cout << "a = " << to_binary(a) << '\n'; - std::cout << "b = " << to_binary(b) << '\n'; - // and finally scale the ratio - msb = find_msb(a); - uint64_t maxUpShift = (nbits - msb - 1); + // TODO: do we need to round the value or is it ok if we just throw the lower bits away? if (exponent >= 0) { + + // find the msb of the numerator value and shift it to the msb of the numerator size of this rational + unsigned msb = find_msb(a); + if (msb > nbits) { + unsigned shift = 1u + msb - nbits; // one extra slot as we are shifting into a 2's complement encoding + a >>= shift; + b >>= shift; + } + + //std::cout << "a = " << to_binary(a) << '\n'; + //std::cout << "b = " << to_binary(b) << '\n'; + + // and finally scale the ratio + + msb = find_msb(a); // find the msb of the numerator + uint64_t maxUpShift = (nbits - msb - 1u); // this will be 0 if we had to scale the ratio down to fit + // find the new msb of the denominator to direct how we need to scale while avoiding overflow + uint64_t maxDownShift = find_msb(b); uint64_t scale = static_cast(exponent); - // find the new msb to direct how we need to scale while avoiding overflow if (scale > maxUpShift) { - a <<= maxUpShift; - b >>= (scale - maxUpShift); + if (maxUpShift < (scale - maxDownShift)) { + // overflow, saturate to maxpos + std::cerr << "overflow: scale = " << exponent << '\n'; + maxpos(); + return *this; + } + else { + a <<= maxUpShift; + b >>= (scale - maxUpShift); + } } else { a <<= scale; } } else { + // find the msb of the denominator value and shift it to the msb of the denominator size of this rational + unsigned msb = find_msb(b); + if (msb > nbits) { + unsigned shift = 1u + msb - nbits; // one extra slot as we are shifting into a 2's complement encoding + a >>= shift; + b >>= shift; + } + + //std::cout << "a = " << to_binary(a) << '\n'; + //std::cout << "b = " << to_binary(b) << '\n'; + + // and finally scale the ratio + + msb = find_msb(b); // find the msb of the denominator + uint64_t maxUpShift = (nbits - msb - 1u); // this will be 0 if we had to scale the ratio down to fit + // find the new msb of the numerator to direct how we need to scale while avoiding underflow + uint64_t maxDownShift = find_msb(a); uint64_t scale = static_cast(-exponent); - // find the new msb to direct how we need to scale while avoiding underflow - uint64_t maxDownShift = find_msb(b); - if (scale > maxDownShift) { - if (maxUpShift < (scale - maxDownShift)) { - // overflow, saturate to maxpos - std::cerr << "overflow: scale = " << scale << '\n'; - n = 0; d = 0; + if (scale > maxUpShift) { + if (scale > (maxUpShift + maxDownShift)) { + // underflow, saturate to maxpos + std::cerr << "underflow: scale = " << exponent << '\n'; + setzero(); + return *this; } else { - a <<= maxUpShift; - b >>= maxDownShift; + b <<= maxUpShift; + a >>= (scale - maxUpShift); } } else { - b >>= scale; + b <<= scale; } } n = (s ? -static_cast(a) : static_cast(a)); - d = b; + d = static_cast(b); normalize(); - std::cout << "n = " << to_binary(n) << '\n'; - std::cout << "d = " << to_binary(d) << '\n'; +// std::cout << "n = " << to_binary(n) << '\n'; +// std::cout << "d = " << to_binary(d) << '\n'; } } return *this; @@ -451,11 +500,19 @@ inline std::string to_binary(const rational& v, bool nibbleMarker = tr /// binary logic functions template -inline bool operator==(const rational& lhs, const rational& rhs) { return false; } +inline bool operator==(const rational& lhs, const rational& rhs) { + return (lhs.d == rhs.d) && (lhs.n == rhs.n); +} template inline bool operator!=(const rational& lhs, const rational& rhs) { return !operator==(lhs, rhs); } template -inline bool operator< (const rational& lhs, const rational& rhs) { return false; } +inline bool operator< (const rational& lhs, const rational& rhs) { + // a / b is less than c / d when ad < bc + // problem is that the products ad and bc can overflow, thus destroying the logic structure + // so better is to take the hit and reduce to double, this will fail with some values but + // provides a better cover than evaluating (ad < bc) + return double(lhs) < double(rhs); +} template inline bool operator> (const rational& lhs, const rational& rhs) { return operator< (rhs, lhs); } template diff --git a/static/rational/conversion/assignment.cpp b/static/rational/conversion/assignment.cpp new file mode 100644 index 00000000..46cddf96 --- /dev/null +++ b/static/rational/conversion/assignment.cpp @@ -0,0 +1,172 @@ +// assignment.cpp: test suite runner for assignment conversion of floats to fixed-sized, arbitrary configuration rationals +// +// Copyright (C) 2017 Stillwater Supercomputing, Inc. +// SPDX-License-Identifier: MIT +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include + + +namespace sw { namespace universal { + + // TODO: needs a type trait to only match on rational<> type + template + int ValidateAssignment(bool reportTestCases) { + constexpr size_t nbits = RationalType::nbits; + constexpr size_t NR_ENCODINGS = (1ull << nbits); + int nrOfFailedTestCases = 0; + + RationalType a, b; + for (unsigned numerator = 0; numerator < NR_ENCODINGS; ++numerator) { + for (unsigned denominator = 0; denominator < NR_ENCODINGS; ++denominator) { + a.set(numerator, denominator); + double da = double(a); + b = da; + // std::cout << to_binary(a) << " : " << da << " vs " << b << '\n'; + if (a != b) { + if (a.isnan() && b.isnan()) continue; + ++nrOfFailedTestCases; + if (reportTestCases) ReportAssignmentError("FAIL", "=", da, b, a); + } + else { + // if (reportTestCases) ReportAssignmentSuccess("PASS", "=", da, b, a); + } + if (nrOfFailedTestCases > 9) return nrOfFailedTestCases; + } + } + + // test clipping or saturation + + return nrOfFailedTestCases; + } + +} } + +template +void GenerateBitWeightTable() { + using namespace sw::universal; +// constexpr size_t minNormalExponent = static_cast(-ieee754_parameter ::minNormalExp); + constexpr size_t minSubnormalExponent = static_cast(-ieee754_parameter::minSubnormalExp); + + TargetFloat multiplier = ieee754_parameter::minSubnormal; + for (size_t i = 0; i < minSubnormalExponent; ++i) { + std::cout << i << ' ' << to_binary(multiplier) << ' ' << multiplier << '\n'; + multiplier *= 2.0f; // these are error free multiplies + } +} + +template +void Ranges(Real v) { + using namespace sw::universal; + using rb10 = rational<10, std::uint16_t>; + using rb12 = rational<12, std::uint16_t>; + using rb14 = rational<14, std::uint16_t>; + using rb20 = rational<20, std::uint32_t>; + using rb24 = rational<24, std::uint32_t>; + + rb8 r8{ v }; + rb10 r10{ v }; + rb12 r12{ v }; + rb14 r14{ v }; + rb16 r16{ v }; + rb20 r20{ v }; + rb24 r24{ v }; + + std::cout << symmetry_range(r8) << '\n' << to_binary(r8) << " : " << r8 << '\n'; + std::cout << symmetry_range(r10) << '\n' << to_binary(r10) << " : " << r10 << '\n'; + std::cout << symmetry_range(r12) << '\n' << to_binary(r12) << " : " << r12 << '\n'; + std::cout << symmetry_range(r14) << '\n' << to_binary(r14) << " : " << r14 << '\n'; + std::cout << symmetry_range(r16) << '\n' << to_binary(r16) << " : " << r16 << '\n'; + std::cout << symmetry_range(r20) << '\n' << to_binary(r20) << " : " << r20 << '\n'; + std::cout << symmetry_range(r24) << '\n' << to_binary(r24) << " : " << r24 << '\n'; +} + +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + +int main() +try { + using namespace sw::universal; + + std::string test_suite = "rational float assignment validation"; + std::string test_tag = "assignment"; + bool reportTestCases = true; + int nrOfFailedTestCases = 0; + + ReportTestSuiteHeader(test_suite, reportTestCases); + +#if MANUAL_TESTING + + rb8 a,b; + a.set(0x02, 0x0A); + double da = double(a); + b = da; + std::cout << a << '\n'; + std::cout << b << '\n'; + + return 0; + Ranges(1.0f); + + // manual exhaustive test + // + nrOfFailedTestCases += ReportTestResult(ValidateAssignment(reportTestCases), type_tag(rb8()), test_tag); + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return EXIT_SUCCESS; +#else + +#if REGRESSION_LEVEL_1 + nrOfFailedTestCases += ReportTestResult(ValidateAssignment< rational<4, std::uint8_t> >(reportTestCases), type_tag(rational<4, std::uint8_t>()), test_tag); + + nrOfFailedTestCases += ReportTestResult(ValidateAssignment< rational<8, std::uint8_t> >(reportTestCases), type_tag(rational<8, std::uint8_t>()), test_tag); +#endif + +#if REGRESSION_LEVEL_2 +#endif + +#if REGRESSION_LEVEL_3 +#endif + +#if REGRESSION_LEVEL_4 +#endif + + ReportTestSuiteResults(test_suite, nrOfFailedTestCases); + return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); + +#endif // MANUAL_TESTING +} +catch (char const* msg) { + std::cerr << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Caught unexpected universal arithmetic exception : " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Caught unexpected universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Uncaught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +}