diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eda5329..55b347b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,6 +35,7 @@ jobs: - run: cargo clippy --version - run: cargo clippy --features i64 - run: cargo clippy --all-targets --features i64 + - run: cargo clippy --all-targets --features i64,i128 - run: cargo clippy --all-targets --all-features test: @@ -44,12 +45,12 @@ jobs: - run: rustc --version - run: cargo test --features i64 - run: cargo test --features i128 - - run: cargo test --no-default-features --lib --features i64 - - run: cargo test --no-default-features --lib --features i128 - - run: cargo test --no-default-features --lib --features std,i64 - - run: cargo test --no-default-features --lib --features serde,i64 - - run: cargo test --no-default-features --lib --features i64,parity - - run: cargo test --no-default-features --lib --features i128,parity + - run: cargo test --no-default-features --lib --test it --features i64 + - run: cargo test --no-default-features --lib --test it --features i128 + - run: cargo test --no-default-features --lib --test it --features std,i64 + - run: cargo test --no-default-features --lib --test it --features serde,i64 + - run: cargo test --no-default-features --lib --test it --features i64,parity + - run: cargo test --no-default-features --lib --test it --features i128,parity - run: cargo test --all-features run-example: diff --git a/Cargo.toml b/Cargo.toml index 6ddeafb..345f31e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ std = ["derive_more/error"] i16 = [] i32 = [] i64 = [] -i128 = [] +i128 = ["dep:i256"] serde = ["dep:serde"] schemars = ["dep:schemars"] parity = ["parity-scale-codec"] @@ -46,16 +46,16 @@ quick-xml = ["serde?/derive", "serde?/alloc"] # FIXME: quick-xml#473 serde = { version = "1.0", default-features = false, optional = true } schemars = { version = "0.8", default-features = false, optional = true } typenum = "1.12.0" -derive_more = { version = "0.99.9", default-features = false } parity-scale-codec = { version = "3", default-features = false, optional = true } static_assertions = "1.1.0" itoa = "1.0.1" +i256 = { version = "=0.1.1", default-features = false, optional = true } [dev-dependencies] anyhow = { version = "1.0.38", default-features = false } colored = "2.0.0" criterion = "0.5" -derive_more = "0.99.9" +derive_more = { version = "1.0.0", features = ["full"] } trybuild = "1.0.85" serde_json = "1" proptest = "1.0.0" diff --git a/benches/README.md b/benches/README.md index 2a2b8a4..13ea43d 100644 --- a/benches/README.md +++ b/benches/README.md @@ -1,92 +1,92 @@ # Benchmarks -Benchmarks were performed on an [AMD Ryzen 7 4800HS CPU](https://en.wikichip.org/wiki/amd/ryzen_9/3900). +Benchmarks were performed on an Intel Core i9-14900K CPU. ```sh $ cargo bench --bench --features -$ critcmp new | tail +3 | sort | sed 's# ? ?/sec##' +$ critcmp new | tail +3 | sort | sed 's# ? ?/sec##' | sed 's# 1.00##' ``` ## ops 64-bit FP with precision = 9: ``` -F64p9/cadd (~1e4) 1.00 1.9±0.01ns -F64p9/from_decimal(12345, -3) 1.00 1.6±0.00ns -F64p9/next_power_of_ten 1.00 3.6±0.01ns -F64p9/rdiv (~1e5/~1e4, Ceil) 1.00 1.9±0.01ns -F64p9/rdiv (~1e5/~1e4, Floor) 1.00 1.9±0.01ns -F64p9/rdiv (~1e5/~1e4, Nearest) 1.00 1.9±0.00ns -F64p9/rmul (~1e4, Ceil) 1.00 1.9±0.01ns -F64p9/rmul (~1e4, Floor) 1.00 1.9±0.03ns -F64p9/rmul (~1e4, Nearest) 1.00 1.9±0.00ns -F64p9/rsqrt (~1e4, Ceil) 1.00 43.7±0.29ns -F64p9/rsqrt (~1e4, Floor) 1.00 42.5±0.17ns -F64p9/rsqrt (~1e4, Nearest) 1.00 47.0±0.19ns -F64p9/rsqrt (adaptive, Ceil) 1.00 98.0±0.33ns -F64p9/rsqrt (adaptive, Floor) 1.00 94.4±1.45ns -F64p9/rsqrt (adaptive, Nearest) 1.00 99.6±0.67ns -F64p9/rsqrt (MAX, Ceil) 1.00 102.3±0.50ns -F64p9/rsqrt (MAX, Floor) 1.00 100.2±0.50ns -F64p9/rsqrt (MAX, Nearest) 1.00 102.7±0.80ns -F64p9/to_decimal(0) (12.345) 1.00 9.1±0.02ns -F64p9/to_decimal(i32::MAX) (12.345) 1.00 9.1±0.01ns -F64p9/try_from(f64) (~0.1) 1.00 64.8±0.33ns -F64p9/try_from(f64) (~1e-12) 1.00 132.5±0.46ns -F64p9/try_from(f64) (~1e6) 1.00 24.9±0.14ns -F64p9/try_from(f64) (MAX) 1.00 5.9±0.01µs -F64p9/try_from(f64) (MIN_POSITIVE) 1.00 1872.9±4.12ns +F64p9/cadd (~1e4) 1.0±0.03ns +F64p9/from_decimal(12345, -3) 1.0±0.01ns +F64p9/next_power_of_ten 1.6±0.03ns +F64p9/rdiv (~1e5/~1e4, Ceil) 1.0±0.03ns +F64p9/rdiv (~1e5/~1e4, Floor) 1.0±0.04ns +F64p9/rdiv (~1e5/~1e4, Nearest) 1.0±0.04ns +F64p9/rmul (~1e4, Ceil) 1.0±0.03ns +F64p9/rmul (~1e4, Floor) 1.0±0.04ns +F64p9/rmul (~1e4, Nearest) 1.0±0.05ns +F64p9/rsqrt (~1e4, Ceil) 1.0±0.02ns +F64p9/rsqrt (~1e4, Floor) 1.0±0.02ns +F64p9/rsqrt (~1e4, Nearest) 1.0±0.03ns +F64p9/rsqrt (adaptive, Ceil) 5.4±0.02ns +F64p9/rsqrt (adaptive, Floor) 4.9±0.01ns +F64p9/rsqrt (adaptive, Nearest) 5.5±0.02ns +F64p9/rsqrt (MAX, Ceil) 1.0±0.01ns +F64p9/rsqrt (MAX, Floor) 1.0±0.01ns +F64p9/rsqrt (MAX, Nearest) 1.0±0.01ns +F64p9/to_decimal(0) (12.345) 5.0±0.01ns +F64p9/to_decimal(i32::MAX) (12.345) 5.0±0.02ns +F64p9/try_from(f64) (~0.1) 33.2±0.08ns +F64p9/try_from(f64) (~1e-12) 61.9±0.20ns +F64p9/try_from(f64) (~1e6) 16.2±0.05ns +F64p9/try_from(f64) (MAX) 1263.8±2.26ns +F64p9/try_from(f64) (MIN_POSITIVE) 693.4±2.38ns ``` 128-bit FP with precision = 18: ``` -F128p18/cadd (~1e4) 1.00 2.8±0.00ns -F128p18/from_decimal(12345, -3) 1.00 9.1±0.03ns -F128p18/next_power_of_ten 1.00 6.3±0.03ns -F128p18/rdiv (~1e5/~1e4, Ceil) 1.00 157.3±0.51ns -F128p18/rdiv (~1e5/~1e4, Floor) 1.00 154.2±1.19ns -F128p18/rdiv (~1e5/~1e4, Nearest) 1.00 159.4±1.05ns -F128p18/rmul (~1e4, Ceil) 1.00 132.5±0.61ns -F128p18/rmul (~1e4, Floor) 1.00 132.3±0.79ns -F128p18/rmul (~1e4, Nearest) 1.00 134.1±0.79ns -F128p18/rsqrt (~1e4, Ceil) 1.00 428.3±7.08ns -F128p18/rsqrt (~1e4, Floor) 1.00 403.9±1.24ns -F128p18/rsqrt (~1e4, Nearest) 1.00 475.3±1.03ns -F128p18/rsqrt (adaptive, Ceil) 1.00 1469.3±3.05ns -F128p18/rsqrt (adaptive, Floor) 1.00 1436.2±1.98ns -F128p18/rsqrt (adaptive, Nearest) 1.00 1530.6±1.97ns -F128p18/rsqrt (MAX, Ceil) 1.00 1393.2±9.68ns -F128p18/rsqrt (MAX, Floor) 1.00 1335.9±10.01ns -F128p18/rsqrt (MAX, Nearest) 1.00 1441.7±11.63ns -F128p18/to_decimal(0) (12.345) 1.00 263.8±25.35ns -F128p18/to_decimal(i32::MAX) (12.345) 1.00 263.2±0.13ns -F128p18/try_from(f64) (~0.1) 1.00 59.3±0.36ns -F128p18/try_from(f64) (~1e-12) 1.00 133.0±0.14ns -F128p18/try_from(f64) (~1e6) 1.00 27.8±0.25ns -F128p18/try_from(f64) (MAX) 1.00 5.9±0.00µs -F128p18/try_from(f64) (MIN_POSITIVE) 1.00 1842.6±1.86ns +F128p18/cadd (~1e4) 1.9±0.05ns +F128p18/from_decimal(12345, -3) 4.8±0.02ns +F128p18/next_power_of_ten 3.1±0.04ns +F128p18/rdiv (~1e5/~1e4, Ceil) 10.7±0.15ns +F128p18/rdiv (~1e5/~1e4, Floor) 10.4±0.15ns +F128p18/rdiv (~1e5/~1e4, Nearest) 11.2±0.16ns +F128p18/rmul (~1e4, Ceil) 7.0±0.04ns +F128p18/rmul (~1e4, Floor) 7.0±0.02ns +F128p18/rmul (~1e4, Nearest) 7.2±0.06ns +F128p18/rsqrt (~1e4, Ceil) 40.0±0.24ns +F128p18/rsqrt (~1e4, Floor) 39.4±0.28ns +F128p18/rsqrt (~1e4, Nearest) 41.2±0.28ns +F128p18/rsqrt (adaptive, Ceil) 50.0±0.42ns +F128p18/rsqrt (adaptive, Floor) 49.2±0.42ns +F128p18/rsqrt (adaptive, Nearest) 50.6±0.38ns +F128p18/rsqrt (MAX, Ceil) 40.2±0.28ns +F128p18/rsqrt (MAX, Floor) 39.3±0.27ns +F128p18/rsqrt (MAX, Nearest) 41.4±0.38ns +F128p18/to_decimal(0) (12.345) 59.1±0.19ns +F128p18/to_decimal(i32::MAX) (12.345) 59.1±0.28ns +F128p18/try_from(f64) (~0.1) 28.5±1.51ns +F128p18/try_from(f64) (~1e-12) 62.1±0.20ns +F128p18/try_from(f64) (~1e6) 15.2±0.04ns +F128p18/try_from(f64) (MAX) 1264.6±4.34ns +F128p18/try_from(f64) (MIN_POSITIVE) 693.6±2.45ns ``` ## serde 64-bit FP with precision = 9: ``` -F64p9/deserialize 123.456 from f64 1.00 103.7±0.24ns -F64p9/deserialize 123.456 from string 1.00 54.8±0.18ns -F64p9/deserialize MAX from f64 1.00 59.8±0.24ns -F64p9/deserialize MAX from string 1.00 86.3±0.79ns -F64p9/serialize 123.456 to f64 1.00 48.2±0.46ns -F64p9/serialize 123.456 to string 1.00 27.5±0.29ns -F64p9/serialize MAX to f64 1.00 41.3±0.95ns -F64p9/serialize MAX to string 1.00 35.3±2.63ns +F64p9/deserialize 123.456 from f64 55.4±0.17ns +F64p9/deserialize 123.456 from string 27.1±0.34ns +F64p9/deserialize MAX from f64 44.4±0.03ns +F64p9/deserialize MAX from string 39.3±0.61ns +F64p9/serialize 123.456 to f64 27.0±0.33ns +F64p9/serialize 123.456 to string 13.1±0.21ns +F64p9/serialize MAX to f64 38.6±0.01ns +F64p9/serialize MAX to string 14.8±0.19ns ``` 128-bit FP with precision = 18: ``` -F128p18/deserialize 123.456 from f64 1.00 103.3±0.24ns -F128p18/deserialize 123.456 from string 1.00 70.8±0.09ns -F128p18/deserialize MAX from f64 1.00 56.6±0.19ns -F128p18/deserialize MAX from string 1.00 147.3±0.51ns -F128p18/serialize 123.456 to f64 1.00 67.7±0.38ns -F128p18/serialize 123.456 to string 1.00 51.7±0.64ns -F128p18/serialize MAX to f64 1.00 63.6±0.74ns -F128p18/serialize MAX to string 1.00 80.6±1.00ns +F128p18/deserialize 123.456 from f64 55.9±0.07ns +F128p18/deserialize 123.456 from string 31.5±0.74ns +F128p18/deserialize MAX from f64 40.8±0.20ns +F128p18/deserialize MAX from string 60.1±0.75ns +F128p18/serialize 123.456 to f64 30.4±0.15ns +F128p18/serialize 123.456 to string 23.6±0.29ns +F128p18/serialize MAX to f64 23.4±0.02ns +F128p18/serialize MAX to string 37.3±0.04ns ``` diff --git a/src/errors.rs b/src/errors.rs index 6d63cf4..ca487eb 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,10 +1,11 @@ use core::fmt::{Display, Formatter, Result}; +// TODO: once MSRV becomes 1.81, use `core::error::Error` instead. +// Also, enable doctests in CI checks even for no-std. #[cfg(feature = "std")] -use derive_more::Error; +use std::error::Error; /// Represents errors during arithmetic operations. -#[cfg_attr(feature = "std", derive(Error))] #[derive(Clone, Debug, PartialEq, Eq)] #[non_exhaustive] pub enum ArithmeticError { @@ -34,8 +35,10 @@ impl Display for ArithmeticError { } } +#[cfg(feature = "std")] +impl Error for ArithmeticError {} + /// Represents errors during conversions. -#[cfg_attr(feature = "std", derive(Error))] #[derive(Clone, Debug, PartialEq, Eq)] pub struct ConvertError { reason: &'static str, @@ -57,3 +60,6 @@ impl Display for ConvertError { f.write_str(self.as_str()) } } + +#[cfg(feature = "std")] +impl Error for ConvertError {} diff --git a/src/i256/mod.rs b/src/i256/mod.rs deleted file mode 100644 index cf55741..0000000 --- a/src/i256/mod.rs +++ /dev/null @@ -1,336 +0,0 @@ -use core::cmp::{Ordering, PartialOrd}; -use core::ops::{Add, Div, Mul, Neg, Sub}; - -use crate::ops::sqrt::Sqrt; -use crate::ops::{One, Zero}; -use crate::{ArithmeticError, ConvertError}; - -const TOTAL_BITS_COUNT: usize = 256; -const UINT_CHUNK_BITS_COUNT: usize = 64; -const UINT_CHUNKS_COUNT: usize = TOTAL_BITS_COUNT / UINT_CHUNK_BITS_COUNT; -const SIGN_MASK: u64 = 1 << (UINT_CHUNK_BITS_COUNT - 1); // MSB = 1, other are equal to 0. - -mod u256; - -use u256::U256; - -/// Signed 256-bit number. Works on top of U256 with help of two's complement. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct I256 { - inner: U256, -} - -impl I256 { - pub const I128_MAX: Self = Self::from_i128(i128::MAX); - pub const I128_MIN: Self = Self::from_i128(i128::MIN); - pub const U128_MAX: Self = Self::new(U256([u64::MAX, u64::MAX, 0, 0])); - pub const MAX: Self = Self::new(U256([u64::MAX, u64::MAX, u64::MAX, !SIGN_MASK])); - pub const MIN: Self = Self::new(U256([0, 0, 0, SIGN_MASK])); - - const fn new(x: U256) -> Self { - I256 { inner: x } - } - - pub const fn from_i128(x: i128) -> Self { - let msb = if x < 0 { u64::MAX } else { 0 }; - Self::new(U256([x as u64, (x >> 64) as u64, msb, msb])) // The only way to do it const - } - - const fn is_negative(self) -> bool { - let most_significant_chunk: u64 = self.chunks()[UINT_CHUNKS_COUNT - 1]; - most_significant_chunk & SIGN_MASK != 0 - } - - const fn chunks(&self) -> &[u64; UINT_CHUNKS_COUNT] { - &self.inner.0 - } -} - -impl Mul for I256 { - type Output = Self; - - #[inline] - fn mul(self, rhs: Self) -> Self::Output { - let lhs_was_negative = self.is_negative(); - let rhs_was_negative = rhs.is_negative(); - - let lhs = if lhs_was_negative { -self } else { self }; - let rhs = if rhs_was_negative { -rhs } else { rhs }; - - // Mustn't overflow because we're usually promoting just i128 to I256. - let result = Self::new(lhs.inner * rhs.inner); - if lhs_was_negative == rhs_was_negative { - result - } else { - -result - } - } -} - -impl Div for I256 { - type Output = Self; - - #[inline] - fn div(self, rhs: Self) -> Self::Output { - let lhs_was_negative = self.is_negative(); - let rhs_was_negative = rhs.is_negative(); - - let lhs = if lhs_was_negative { -self } else { self }; - let rhs = if rhs_was_negative { -rhs } else { rhs }; - - let result = Self::new(lhs.inner / rhs.inner); - if lhs_was_negative == rhs_was_negative { - result - } else { - -result - } - } -} - -impl Add for I256 { - type Output = Self; - - #[inline] - fn add(self, rhs: Self) -> Self::Output { - let (x, _) = self.inner.overflowing_add(rhs.inner); - Self::new(x) - } -} - -impl Sub for I256 { - type Output = Self; - - #[inline] - fn sub(self, rhs: Self) -> Self::Output { - let (x, _) = self.inner.overflowing_sub(rhs.inner); - Self::new(x) - } -} - -impl Neg for I256 { - type Output = Self; - - #[inline] - fn neg(self) -> Self::Output { - // Neg isn't defined for `I256::MIN` because on two's complement we always have one extra negative value. - debug_assert_ne!(self, Self::MIN); - // Overflow takes place when we negate zero. - let (x, _) = (!self.inner).overflowing_add(Self::ONE.inner); - Self::new(x) - } -} - -impl Ord for I256 { - #[inline] - fn cmp(&self, other: &Self) -> Ordering { - match (self.is_negative(), other.is_negative()) { - (true, false) => Ordering::Less, - (false, true) => Ordering::Greater, - _ => self.inner.cmp(&other.inner), - } - } -} - -impl PartialOrd for I256 { - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl From for I256 { - fn from(x: i128) -> Self { - Self::from_i128(x) - } -} - -impl TryFrom for i128 { - type Error = ArithmeticError; - - fn try_from(x: I256) -> Result { - if x > I256::I128_MAX || x < I256::I128_MIN { - return Err(ArithmeticError::Overflow); - } - Ok(i128::from(x.chunks()[0]) | (i128::from(x.chunks()[1]) << 64)) - } -} - -impl From for I256 { - fn from(x: u128) -> Self { - Self::new(x.into()) - } -} - -impl TryFrom for u128 { - type Error = ConvertError; - - fn try_from(x: I256) -> Result { - if x > I256::U128_MAX || x < I256::ZERO { - return Err(ConvertError::new("too big integer")); - } - Ok(u128::from(x.chunks()[0]) | (u128::from(x.chunks()[1]) << 64)) - } -} - -impl One for I256 { - const ONE: Self = Self::from_i128(1); -} - -impl Zero for I256 { - const ZERO: Self = Self::from_i128(0); -} - -impl Sqrt for I256 { - type Error = ArithmeticError; - - #[inline] - fn sqrt(self) -> Result { - debug_assert!(self >= Self::ZERO); - self.inner.sqrt().map(Self::new) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn min() { - assert_eq!(i128::try_from(I256::I128_MIN).unwrap(), i128::MIN); - } - - #[test] - fn max() { - assert_eq!(i128::try_from(I256::I128_MAX).unwrap(), i128::MAX); - } - - #[test] - fn cmp() { - use core::cmp::Ordering::{self, *}; - fn t(a: i128, b: i128, ord: Ordering) { - let a = I256::from(a); - let b = I256::from(b); - assert_eq!(a.cmp(&b), ord); - assert_eq!(b.cmp(&a), ord.reverse()); - } - t(5, 3, Greater); - t(-5, -5, Equal); - t(0, -5, Greater); - } - - #[test] - fn from_i128() { - fn t(x: i128) { - assert_eq!(i128::try_from(I256::from(x)).unwrap(), x); - } - t(0); - t(1); - t(-1); - t(i128::MAX); - t(i128::MAX - 1); - t(i128::MIN); - t(i128::MIN + 1); - } - - #[test] - fn neg_i128() { - fn t(x: i128) { - assert_eq!(i128::try_from(-I256::from(x)).unwrap(), -x); - assert_eq!(i128::try_from(-I256::from(-x)).unwrap(), x); - } - t(0); - t(1); - t(1234); - t(123_456_789_987); - } - - #[test] - fn neg_i256() { - fn t(value: I256, expected: I256) { - let actual: I256 = -value; - assert_eq!(actual, expected); - assert_eq!(-actual, value); - } - t(I256::MAX, I256::new(U256([1, 0, 0, SIGN_MASK]))); - t( - I256::new(U256([ - 0xa869_bc02_ecba_4436, - 0x5ef3_b3e7_5daa_96ce, - 0x369a_22b0_7ff5_955b, - 0x8aa9_fa9e_77c4_2900, - ])), - I256::new(U256([ - 0x579643fd1345bbca, - 0xa10c4c18a2556931, - 0xc965dd4f800a6aa4, - 0x75560561883bd6ff, - ])), - ); - } - - #[test] - #[should_panic] - fn neg_i256_min() { - let _x = -I256::MIN; - } - - #[test] - fn add() { - fn t(a: i128, b: i128, expected: i128) { - let a = I256::from(a); - let b = I256::from(b); - assert_eq!(i128::try_from(a + b).unwrap(), expected); - assert_eq!(i128::try_from(b + a).unwrap(), expected); - assert_eq!(i128::try_from((-a) + (-b)).unwrap(), -expected); - assert_eq!(i128::try_from((-b) + (-a)).unwrap(), -expected); - } - t(0, 0, 0); - t(1111, 3210, 4321); - t(-1111, 5432, 4321); - t(-4321, 5432, 1111); - } - - #[test] - fn sub() { - fn t(a: i128, b: i128, expected: i128) { - let a = I256::from(a); - let b = I256::from(b); - assert_eq!(i128::try_from(a - b).unwrap(), expected); - assert_eq!(i128::try_from(b - a).unwrap(), -expected); - assert_eq!(i128::try_from((-a) - (-b)).unwrap(), -expected); - assert_eq!(i128::try_from((-b) - (-a)).unwrap(), expected); - } - t(0, 0, 0); - t(4321, 1111, 3210); - t(4321, -1111, 5432); - t(1111, -4321, 5432); - } - - #[test] - fn mul() { - fn t(a: i128, b: i128, expected: i128) { - let a = I256::from(a); - let b = I256::from(b); - assert_eq!(i128::try_from(a * b).unwrap(), expected); - assert_eq!(i128::try_from(b * a).unwrap(), expected); - assert_eq!(i128::try_from((-a) * (-b)).unwrap(), expected); - assert_eq!(i128::try_from((-b) * (-a)).unwrap(), expected); - } - t(0, 0, 0); - t(7, 5, 35); - t(-7, 5, -35); - } - - #[test] - fn div() { - fn t(a: i128, b: i128, expected: i128) { - let a = I256::from(a); - let b = I256::from(b); - assert_eq!(i128::try_from(a / b).unwrap(), expected); - assert_eq!(i128::try_from((-a) / (-b)).unwrap(), expected); - } - t(0, 1, 0); - t(35, 5, 7); - t(-35, 5, -7); - } -} diff --git a/src/i256/u256.rs b/src/i256/u256.rs deleted file mode 100644 index 9b30cf5..0000000 --- a/src/i256/u256.rs +++ /dev/null @@ -1,869 +0,0 @@ -//! # `U256` -//! -//! Expanded unsigned 256-bit integer. -//! -//! Implementation courtesy of [`uint` crate](https://crates.io/crates/uint). - -use crate::errors::{ArithmeticError, ConvertError}; -use crate::ops::sqrt::Sqrt; -use crate::ops::Zero; - -macro_rules! impl_map_from { - ($thing:ident, $from:ty, $to:ty) => { - impl From<$from> for $thing { - fn from(value: $from) -> $thing { - From::from(value as $to) - } - } - }; -} - -macro_rules! uint_overflowing_binop { - ($name:ident, $n_words: tt, $self_expr: expr, $other: expr, $fn:expr) => {{ - let $name(ref me) = $self_expr; - let $name(ref you) = $other; - - let mut ret = [0u64; $n_words]; - let ret_ptr = &mut ret as *mut [u64; $n_words] as *mut u64; - let mut carry = 0u64; - - uint! { @unroll - for i in 0..$n_words { - if carry != 0 { - let (res1, overflow1) = ($fn)(me[i], you[i]); - let (res2, overflow2) = ($fn)(res1, carry); - - unsafe { - // SAFETY: `i` is within bounds and `i * size_of::() < isize::MAX` - #![allow(clippy::ptr_offset_with_cast)] - *ret_ptr.offset(i as _) = res2 - } - carry = (overflow1 as u8 + overflow2 as u8) as u64; - } else { - let (res, overflow) = ($fn)(me[i], you[i]); - - unsafe { - // SAFETY: `i` is within bounds and `i * size_of::() < isize::MAX` - #![allow(clippy::ptr_offset_with_cast)] - *ret_ptr.offset(i as _) = res - } - - carry = overflow as u64; - } - } - } - - ($name(ret), carry > 0) - }}; -} - -macro_rules! uint_full_mul_reg { - ($name:ident, 8, $self_expr:expr, $other:expr) => { - $crate::uint_full_mul_reg!($name, 8, $self_expr, $other, |a, b| a != 0 || b != 0); - }; - ($name:ident, $n_words:tt, $self_expr:expr, $other:expr) => { - uint_full_mul_reg!($name, $n_words, $self_expr, $other, |_, _| true) - }; - ($name:ident, $n_words:tt, $self_expr:expr, $other:expr, $check:expr) => {{ - { - #![allow(unused_assignments)] - - let $name(ref me) = $self_expr; - let $name(ref you) = $other; - let mut ret = [0u64; $n_words * 2]; - - uint! { @unroll - for i in 0..$n_words { - let mut carry = 0u64; - let b = you[i]; - - uint! { @unroll - for j in 0..$n_words { - #[allow(clippy::redundant_closure_call)] - if $check(me[j], carry) { - let a = me[j]; - - let (hi, low) = Self::split_u128(a as u128 * b as u128); - - let overflow = { - let existing_low = &mut ret[i + j]; - let (low, o) = low.overflowing_add(*existing_low); - *existing_low = low; - o - }; - - carry = { - let existing_hi = &mut ret[i + j + 1]; - let hi = hi + overflow as u64; - let (hi, o0) = hi.overflowing_add(carry); - let (hi, o1) = hi.overflowing_add(*existing_hi); - *existing_hi = hi; - - (o0 | o1) as u64 - } - } - } - } - } - } - - ret - } - }}; -} - -macro_rules! uint_overflowing_mul { - ($name:ident, $n_words: tt, $self_expr: expr, $other: expr) => {{ - let ret: [u64; $n_words * 2] = uint_full_mul_reg!($name, $n_words, $self_expr, $other); - - // The safety of this is enforced by the compiler - let ret: [[u64; $n_words]; 2] = unsafe { core::mem::transmute(ret) }; - - // The compiler WILL NOT inline this if you remove this annotation. - #[inline(always)] - fn any_nonzero(arr: &[u64; $n_words]) -> bool { - uint! { @unroll - for i in 0..$n_words { - if arr[i] != 0 { - return true; - } - } - } - - false - } - - ($name(ret[0]), any_nonzero(&ret[1])) - }}; -} - -fn panic_on_overflow(flag: bool) { - if flag { - panic!("arithmetic operation overflow") - } -} - -macro_rules! impl_mul_from { - ($name: ty, $other: ident) => { - impl core::ops::Mul<$other> for $name { - type Output = $name; - - fn mul(self, other: $other) -> $name { - let bignum: $name = other.into(); - let (result, overflow) = self.overflowing_mul(bignum); - panic_on_overflow(overflow); - result - } - } - - impl<'a> core::ops::Mul<&'a $other> for $name { - type Output = $name; - - fn mul(self, other: &'a $other) -> $name { - let bignum: $name = (*other).into(); - let (result, overflow) = self.overflowing_mul(bignum); - panic_on_overflow(overflow); - result - } - } - - impl<'a> core::ops::Mul<&'a $other> for &'a $name { - type Output = $name; - - fn mul(self, other: &'a $other) -> $name { - let bignum: $name = (*other).into(); - let (result, overflow) = self.overflowing_mul(bignum); - panic_on_overflow(overflow); - result - } - } - - impl<'a> core::ops::Mul<$other> for &'a $name { - type Output = $name; - - fn mul(self, other: $other) -> $name { - let bignum: $name = other.into(); - let (result, overflow) = self.overflowing_mul(bignum); - panic_on_overflow(overflow); - result - } - } - - impl core::ops::MulAssign<$other> for $name { - fn mul_assign(&mut self, other: $other) { - let result = *self * other; - *self = result - } - } - }; -} - -macro_rules! impl_mul_for_primitive { - ($name: ty, $other: ident) => { - impl core::ops::Mul<$other> for $name { - type Output = $name; - - fn mul(self, other: $other) -> $name { - let (result, carry) = self.overflowing_mul_u64(other as u64); - panic_on_overflow(carry > 0); - result - } - } - - impl<'a> core::ops::Mul<&'a $other> for $name { - type Output = $name; - - fn mul(self, other: &'a $other) -> $name { - let (result, carry) = self.overflowing_mul_u64(*other as u64); - panic_on_overflow(carry > 0); - result - } - } - - impl<'a> core::ops::Mul<&'a $other> for &'a $name { - type Output = $name; - - fn mul(self, other: &'a $other) -> $name { - let (result, carry) = self.overflowing_mul_u64(*other as u64); - panic_on_overflow(carry > 0); - result - } - } - - impl<'a> core::ops::Mul<$other> for &'a $name { - type Output = $name; - - fn mul(self, other: $other) -> $name { - let (result, carry) = self.overflowing_mul_u64(other as u64); - panic_on_overflow(carry > 0); - result - } - } - - impl core::ops::MulAssign<$other> for $name { - fn mul_assign(&mut self, other: $other) { - let result = *self * (other as u64); - *self = result - } - } - }; -} - -macro_rules! uint { - ( $(#[$attr:meta])* $visibility:vis struct $name:ident (1); ) => { - uint!{ @construct $(#[$attr])* $visibility struct $name (1); } - }; - - ( $(#[$attr:meta])* $visibility:vis struct $name:ident ( $n_words:tt ); ) => { - uint! { @construct $(#[$attr])* $visibility struct $name ($n_words); } - }; - ( @construct $(#[$attr:meta])* $visibility:vis struct $name:ident ( $n_words:tt ); ) => { - /// Little-endian large integer type - #[repr(C)] - $(#[$attr])* - #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] - $visibility struct $name (pub(crate) [u64; $n_words]); - - /// Get a reference to the underlying little-endian words. - impl AsRef<[u64]> for $name { - #[inline] - fn as_ref(&self) -> &[u64] { - &self.0 - } - } - - impl $name { - const WORD_BITS: usize = 64; - - /// Low word (u64) - #[inline] - const fn low_u64(&self) -> u64 { - let &$name(ref arr) = self; - arr[0] - } - - /// Conversion to usize with overflow checking - /// - /// # Panics - /// - /// Panics if the number is larger than usize::max_value(). - #[inline] - fn as_usize(&self) -> usize { - let &$name(ref arr) = self; - if !self.fits_word() || arr[0] > usize::max_value() as u64 { - panic!("Integer overflow when casting to usize") - } - arr[0] as usize - } - - // Whether this fits u64. - #[inline] - fn fits_word(&self) -> bool { - let &$name(ref arr) = self; - for i in 1..$n_words { if arr[i] != 0 { return false; } } - return true; - } - - /// Return the least number of bits needed to represent the number - #[inline] - fn bits(&self) -> usize { - let &$name(ref arr) = self; - for i in 1..$n_words { - if arr[$n_words - i] > 0 { return (0x40 * ($n_words - i + 1)) - arr[$n_words - i].leading_zeros() as usize; } - } - 0x40 - arr[0].leading_zeros() as usize - } - - /// Zero (additive identity) of this type. - #[inline] - const fn zero() -> Self { - Self([0; $n_words]) - } - - fn full_shl(self, shift: u32) -> [u64; $n_words + 1] { - debug_assert!(shift < Self::WORD_BITS as u32); - let mut u = [0u64; $n_words + 1]; - let u_lo = self.0[0] << shift; - let u_hi = self >> (Self::WORD_BITS as u32 - shift); - u[0] = u_lo; - u[1..].copy_from_slice(&u_hi.0[..]); - u - } - - fn full_shr(u: [u64; $n_words + 1], shift: u32) -> Self { - debug_assert!(shift < Self::WORD_BITS as u32); - let mut res = Self::zero(); - for i in 0..$n_words { - res.0[i] = u[i] >> shift; - } - // carry - if shift > 0 { - for i in 1..=$n_words { - res.0[i - 1] |= u[i] << (Self::WORD_BITS as u32 - shift); - } - } - res - } - - fn full_mul_u64(self, by: u64) -> [u64; $n_words + 1] { - let (prod, carry) = self.overflowing_mul_u64(by); - let mut res = [0u64; $n_words + 1]; - res[..$n_words].copy_from_slice(&prod.0[..]); - res[$n_words] = carry; - res - } - - fn div_mod_small(mut self, other: u64) -> (Self, Self) { - let mut rem = 0u64; - self.0.iter_mut().rev().for_each(|d| { - let (q, r) = Self::div_mod_word(rem, *d, other); - *d = q; - rem = r; - }); - (self, rem.into()) - } - - // See Knuth, TAOCP, Volume 2, section 4.3.1, Algorithm D. - fn div_mod_knuth(self, mut v: Self, n: usize, m: usize) -> (Self, Self) { - debug_assert!(self.bits() >= v.bits() && !v.fits_word()); - debug_assert!(n + m <= $n_words); - // D1. - // Make sure 64th bit in v's highest word is set. - // If we shift both self and v, it won't affect the quotient - // and the remainder will only need to be shifted back. - let shift = v.0[n - 1].leading_zeros(); - v <<= shift; - // u will store the remainder (shifted) - let mut u = self.full_shl(shift); - - // quotient - let mut q = Self::zero(); - let v_n_1 = v.0[n - 1]; - let v_n_2 = v.0[n - 2]; - - // D2. D7. - // iterate from m downto 0 - for j in (0..=m).rev() { - let u_jn = u[j + n]; - - // D3. - // q_hat is our guess for the j-th quotient digit - // q_hat = min(b - 1, (u_{j+n} * b + u_{j+n-1}) / v_{n-1}) - // b = 1 << WORD_BITS - // Theorem B: q_hat >= q_j >= q_hat - 2 - let mut q_hat = if u_jn < v_n_1 { - let (mut q_hat, mut r_hat) = Self::div_mod_word(u_jn, u[j + n - 1], v_n_1); - // this loop takes at most 2 iterations - loop { - // check if q_hat * v_{n-2} > b * r_hat + u_{j+n-2} - let (hi, lo) = Self::split_u128(u128::from(q_hat) * u128::from(v_n_2)); - if (hi, lo) <= (r_hat, u[j + n - 2]) { - break; - } - // then iterate till it doesn't hold - q_hat -= 1; - let (new_r_hat, overflow) = r_hat.overflowing_add(v_n_1); - r_hat = new_r_hat; - // if r_hat overflowed, we're done - if overflow { - break; - } - } - q_hat - } else { - // here q_hat >= q_j >= q_hat - 1 - u64::max_value() - }; - - // ex. 20: - // since q_hat * v_{n-2} <= b * r_hat + u_{j+n-2}, - // either q_hat == q_j, or q_hat == q_j + 1 - - // D4. - // let's assume optimistically q_hat == q_j - // subtract (q_hat * v) from u[j..] - let q_hat_v = v.full_mul_u64(q_hat); - // u[j..] -= q_hat_v; - let c = Self::sub_slice(&mut u[j..], &q_hat_v[..n + 1]); - - // D6. - // actually, q_hat == q_j + 1 and u[j..] has overflowed - // highly unlikely ~ (1 / 2^63) - if c { - q_hat -= 1; - // add v to u[j..] - let c = Self::add_slice(&mut u[j..], &v.0[..n]); - u[j + n] = u[j + n].wrapping_add(u64::from(c)); - } - - // D5. - q.0[j] = q_hat; - } - - // D8. - let remainder = Self::full_shr(u, shift); - - (q, remainder) - } - - // Returns the least number of words needed to represent the nonzero number - fn words(bits: usize) -> usize { - debug_assert!(bits > 0); - 1 + (bits - 1) / Self::WORD_BITS - } - - /// Returns a pair `(self / other, self % other)`. - /// - /// # Panics - /// - /// Panics if `other` is zero. - fn div_mod(self, other: Self) -> (Self, Self) { - let my_bits = self.bits(); - let your_bits = other.bits(); - - assert!(your_bits != 0, "division by zero"); - - // Early return in case we are dividing by a larger number than us - if my_bits < your_bits { - return (Self::zero(), self); - } - - if your_bits <= Self::WORD_BITS { - return self.div_mod_small(other.low_u64()); - } - - let (n, m) = { - let my_words = Self::words(my_bits); - let your_words = Self::words(your_bits); - (your_words, my_words - your_words) - }; - - self.div_mod_knuth(other, n, m) - } - - /// Add with overflow. - #[inline(always)] - pub(crate) fn overflowing_add(self, other: $name) -> ($name, bool) { - uint_overflowing_binop!( - $name, - $n_words, - self, - other, - u64::overflowing_add - ) - } - - /// Subtraction which underflows and returns a flag if it does. - #[inline(always)] - pub(crate) fn overflowing_sub(self, other: $name) -> ($name, bool) { - uint_overflowing_binop!( - $name, - $n_words, - self, - other, - u64::overflowing_sub - ) - } - - /// Multiply with overflow, returning a flag if it does. - #[inline(always)] - pub(crate) fn overflowing_mul(self, other: $name) -> ($name, bool) { - uint_overflowing_mul!($name, $n_words, self, other) - } - - #[inline(always)] - fn div_mod_word(hi: u64, lo: u64, y: u64) -> (u64, u64) { - debug_assert!(hi < y); - // NOTE: this is slow (__udivti3) - // let x = (u128::from(hi) << 64) + u128::from(lo); - // let d = u128::from(d); - // ((x / d) as u64, (x % d) as u64) - // TODO: look at https://gmplib.org/~tege/division-paper.pdf - const TWO32: u64 = 1 << 32; - let s = y.leading_zeros(); - let y = y << s; - let (yn1, yn0) = Self::split(y); - let un32 = (hi << s) | lo.checked_shr(64 - s).unwrap_or(0); - let un10 = lo << s; - let (un1, un0) = Self::split(un10); - let mut q1 = un32 / yn1; - let mut rhat = un32 - q1 * yn1; - - while q1 >= TWO32 || q1 * yn0 > TWO32 * rhat + un1 { - q1 -= 1; - rhat += yn1; - if rhat >= TWO32 { - break; - } - } - - let un21 = un32.wrapping_mul(TWO32).wrapping_add(un1).wrapping_sub(q1.wrapping_mul(y)); - let mut q0 = un21 / yn1; - rhat = un21.wrapping_sub(q0.wrapping_mul(yn1)); - - while q0 >= TWO32 || q0 * yn0 > TWO32 * rhat + un0 { - q0 -= 1; - rhat += yn1; - if rhat >= TWO32 { - break; - } - } - - let rem = un21.wrapping_mul(TWO32).wrapping_add(un0).wrapping_sub(y.wrapping_mul(q0)); - (q1 * TWO32 + q0, rem >> s) - } - - #[inline(always)] - fn add_slice(a: &mut [u64], b: &[u64]) -> bool { - Self::binop_slice(a, b, u64::overflowing_add) - } - - #[inline(always)] - fn sub_slice(a: &mut [u64], b: &[u64]) -> bool { - Self::binop_slice(a, b, u64::overflowing_sub) - } - - #[inline(always)] - fn binop_slice(a: &mut [u64], b: &[u64], binop: impl Fn(u64, u64) -> (u64, bool) + Copy) -> bool { - let mut c = false; - a.iter_mut().zip(b.iter()).for_each(|(x, y)| { - let (res, carry) = Self::binop_carry(*x, *y, c, binop); - *x = res; - c = carry; - }); - c - } - - #[inline(always)] - fn binop_carry(a: u64, b: u64, c: bool, binop: impl Fn(u64, u64) -> (u64, bool)) -> (u64, bool) { - let (res1, overflow1) = b.overflowing_add(u64::from(c)); - let (res2, overflow2) = binop(a, res1); - (res2, overflow1 || overflow2) - } - - #[inline(always)] - const fn mul_u64(a: u64, b: u64, carry: u64) -> (u64, u64) { - let (hi, lo) = Self::split_u128(a as u128 * b as u128 + carry as u128); - (lo, hi) - } - - #[inline(always)] - const fn split(a: u64) -> (u64, u64) { - (a >> 32, a & 0xFFFF_FFFF) - } - - #[inline(always)] - const fn split_u128(a: u128) -> (u64, u64) { - ((a >> 64) as _, (a & 0xFFFFFFFFFFFFFFFF) as _) - } - - /// Overflowing multiplication by u64. - /// Returns the result and carry. - fn overflowing_mul_u64(mut self, other: u64) -> (Self, u64) { - let mut carry = 0u64; - - for d in self.0.iter_mut() { - let (res, c) = Self::mul_u64(*d, other, carry); - *d = res; - carry = c; - } - - (self, carry) - } - - fn leading_zeros(&self) -> u32 { - self.0.iter().rev().fold((0, false), |(acc, one_was_met), &chunk| { - if one_was_met { - (acc, true) - } else { - (acc + chunk.leading_zeros(), chunk != 0) - } - }).0 - } - } - - impl core::convert::From for $name { - fn from(value: u64) -> $name { - let mut ret = [0; $n_words]; - ret[0] = value; - $name(ret) - } - } - - impl core::convert::TryFrom<$name> for u128 { - type Error = ConvertError; - - fn try_from(value: $name) -> Result { - if $n_words * $name::WORD_BITS as u32 - value.leading_zeros() > 128 { - return Err(ConvertError::new("too big integer")); - } - let ret = (value.0[0] as u128) | ((value.0[1] as u128) << $name::WORD_BITS as u32); - Ok(ret) - } - } - - impl core::convert::From for $name { - fn from(value: u128) -> Self { - let mut ret = [0u64; $n_words]; - ret[0] = value as _ ; - ret[1] = (value >> 64) as _; - $name(ret) - } - } - - impl_map_from!($name, u32, u64); - - impl core::convert::From for $name { - fn from(value: i64) -> $name { - match value >= 0 { - true => From::from(value as u64), - false => { panic!("Unsigned integer can't be created from negative value"); } - } - } - } - - // all other impls - impl_mul_from!($name, $name); - impl_mul_for_primitive!($name, u64); - impl_mul_for_primitive!($name, usize); - - impl core::ops::Div for $name where T: Into<$name> { - type Output = $name; - - fn div(self, other: T) -> $name { - let other: Self = other.into(); - self.div_mod(other).0 - } - } - - impl<'a, T> core::ops::Div for &'a $name where T: Into<$name> { - type Output = $name; - - fn div(self, other: T) -> $name { - *self / other - } - } - - impl core::ops::DivAssign for $name where T: Into<$name> { - fn div_assign(&mut self, other: T) { - *self = *self / other.into(); - } - } - - impl core::ops::Not for $name { - type Output = $name; - - #[inline] - fn not(self) -> $name { - let $name(ref arr) = self; - let mut ret = [0u64; $n_words]; - for i in 0..$n_words { - ret[i] = !arr[i]; - } - $name(ret) - } - } - - impl core::ops::Shl for $name where T: Into<$name> { - type Output = $name; - - fn shl(self, shift: T) -> $name { - let shift = shift.into().as_usize(); - let $name(ref original) = self; - let mut ret = [0u64; $n_words]; - let word_shift = shift / 64; - let bit_shift = shift % 64; - - // shift - for i in word_shift..$n_words { - ret[i] = original[i - word_shift] << bit_shift; - } - // carry - if bit_shift > 0 { - for i in word_shift+1..$n_words { - ret[i] += original[i - 1 - word_shift] >> (64 - bit_shift); - } - } - $name(ret) - } - } - - impl<'a, T> core::ops::Shl for &'a $name where T: Into<$name> { - type Output = $name; - fn shl(self, shift: T) -> $name { - *self << shift - } - } - - impl core::ops::ShlAssign for $name where T: Into<$name> { - fn shl_assign(&mut self, shift: T) { - *self = *self << shift; - } - } - - impl core::ops::Shr for $name where T: Into<$name> { - type Output = $name; - - fn shr(self, shift: T) -> $name { - let shift = shift.into().as_usize(); - let $name(ref original) = self; - let mut ret = [0u64; $n_words]; - let word_shift = shift / 64; - let bit_shift = shift % 64; - - // shift - for i in word_shift..$n_words { - ret[i - word_shift] = original[i] >> bit_shift; - } - - // Carry - if bit_shift > 0 { - for i in word_shift+1..$n_words { - ret[i - word_shift - 1] += original[i] << (64 - bit_shift); - } - } - - $name(ret) - } - } - - impl<'a, T> core::ops::Shr for &'a $name where T: Into<$name> { - type Output = $name; - fn shr(self, shift: T) -> $name { - *self >> shift - } - } - - impl core::ops::ShrAssign for $name where T: Into<$name> { - fn shr_assign(&mut self, shift: T) { - *self = *self >> shift; - } - } - - impl core::cmp::Ord for $name { - fn cmp(&self, other: &$name) -> core::cmp::Ordering { - self.as_ref().iter().rev().cmp(other.as_ref().iter().rev()) - } - } - - impl core::cmp::PartialOrd for $name { - fn partial_cmp(&self, other: &$name) -> Option { - Some(self.cmp(other)) - } - } - - impl Zero for $name { - const ZERO: Self = Self([0; $n_words]); - } - - impl Sqrt for $name { - type Error = ArithmeticError; - - #[inline] - fn sqrt(self) -> Result { - #[inline] - fn least_significant_word_or(mut a: $name, b: u64) -> $name { - a.0[0] |= b; - a - } - - let result = match u128::try_from(self) { - Ok(x) => x.sqrt()?.into(), - Err(_) => { - let lo = (self >> 2u32).sqrt()? << 1u32; - let hi = least_significant_word_or(lo, 1); - let (hi_square, _): (U256, _) = hi.overflowing_mul(hi); - if hi_square <= self { - hi - } else { - lo - } - } - }; - Ok(result) - } - } - }; - - (@unroll for $v:ident in $start:tt..$end:tt {$($c:tt)*}) => { - #[allow(non_upper_case_globals)] - #[allow(unused_comparisons)] - { - uint!(@unroll @$v, 0, $end, { - if $v >= $start {$($c)*} - } - ); - } - }; - - (@unroll @$v:ident, $a:expr, 4, $c:block) => { - { const $v: usize = $a; $c } - { const $v: usize = $a + 1; $c } - { const $v: usize = $a + 2; $c } - { const $v: usize = $a + 3; $c } - }; -} - -uint! { - pub(crate) struct U256(4); -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn leading_zeros() { - fn t(x: U256, expected: u32) { - assert_eq!(x.leading_zeros(), expected); - } - t(U256::ZERO, 256); - t(1u128.into(), 255); - t(2u128.into(), 254); - t((1u128 << 127).into(), 128); - t(u128::MAX.into(), 128); - t((1u128 << 117).into(), 138); - t((u128::MAX >> 10).into(), 138); - t((u128::MAX >> 10).into(), 138); - } -} diff --git a/src/i256_polyfill.rs b/src/i256_polyfill.rs new file mode 100644 index 0000000..ce9419c --- /dev/null +++ b/src/i256_polyfill.rs @@ -0,0 +1,364 @@ +use core::cmp::{Ordering, PartialOrd}; +use core::ops::{Add, Div, Mul, Neg, Shl, Shr, Sub}; + +use ::i256::i256 as i256_; + +use crate::{ + layout::Promotion, + ops::{One, Zero}, + ConvertError, +}; + +/// A polyfill for i256. +#[allow(non_camel_case_types)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(transparent)] +pub(crate) struct i256(pub i256_); + +static_assertions::assert_eq_size!(i256, [u128; 2]); + +impl i256 { + const I128_MAX: Self = Self::from_i128(i128::MAX); + const I128_MIN: Self = Self::from_i128(i128::MIN); + const I64_MAX: Self = Self::from_i64(i64::MAX); + const I64_MIN: Self = Self::from_i64(i64::MIN); + const MIN: Self = Self(i256_::MIN); + + pub(crate) const fn from_i128(x: i128) -> Self { + Self(i256_::from_i128(x)) + } + + const fn from_i64(x: i64) -> Self { + Self(i256_::from_i64(x)) + } + + const fn from_i8(x: i8) -> Self { + Self(i256_::from_i8(x)) + } + + #[cfg(test)] + const fn new(lo: u128, hi: i128) -> Self { + Self(i256_::new(lo, hi)) + } +} + +impl Promotion for i256 { + type Layout = i128; + + #[inline] + fn as_layout(&self) -> Self::Layout { + self.0.as_i128() + } + + #[cfg(feature = "std")] + #[inline] + fn as_positive_f64(&self) -> f64 { + debug_assert!(*self >= Self::ZERO); + let hi = self.0.high() as f64; + let lo = self.0.low() as f64; + let b2p128 = 3.402823669209385e38; + hi * b2p128 + lo + } + + #[inline] + fn leading_zeros(&self) -> u32 { + self.0.leading_zeros() + } + + #[inline] + fn mul_l(&self, rhs: Self::Layout) -> Self { + Self(self.0.mul_iwide(rhs)) + } + + #[inline] + fn div_l(&self, rhs: Self::Layout) -> Self { + Self(self.0.div_iwide(rhs)) + } + + #[inline] + fn div_rem_l(&self, rhs: Self::Layout) -> (Self, Self::Layout) { + let (div, rem) = self.0.div_rem_iwide(rhs); + (Self(div), rem) + } +} + +impl One for i256 { + const ONE: Self = Self::from_i64(1); +} + +impl Zero for i256 { + const ZERO: Self = Self::from_i64(0); +} + +impl Mul for i256 { + type Output = Self; + + #[inline] + fn mul(self, rhs: Self) -> Self::Output { + Self(self.0 * rhs.0) + } +} + +impl Div for i256 { + type Output = Self; + + #[inline] + fn div(self, rhs: Self) -> Self::Output { + Self(self.0 / rhs.0) + } +} + +impl Add for i256 { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self::Output { + Self(self.0 + rhs.0) + } +} + +impl Sub for i256 { + type Output = Self; + + #[inline] + fn sub(self, rhs: Self) -> Self::Output { + Self(self.0 - rhs.0) + } +} + +impl Neg for i256 { + type Output = Self; + + #[inline] + fn neg(self) -> Self::Output { + debug_assert_ne!(self, Self::MIN); + Self(-self.0) + } +} + +impl Ord for i256 { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + self.0.cmp(&other.0) + } +} + +impl PartialOrd for i256 { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl From for i256 { + #[inline] + fn from(x: i8) -> Self { + Self::from_i8(x) + } +} + +impl From for i256 { + #[inline] + fn from(x: i64) -> Self { + Self::from_i64(x) + } +} + +impl From for i256 { + #[inline] + fn from(x: i128) -> Self { + Self::from_i128(x) + } +} + +impl TryFrom for i128 { + type Error = ConvertError; + + #[inline] + fn try_from(x: i256) -> Result { + if !(i256::I128_MIN..=i256::I128_MAX).contains(&x) { + return Err(ConvertError::new("not in range")); + } + + Ok(x.0.as_i128()) + } +} + +impl TryFrom for i64 { + type Error = ConvertError; + + #[inline] + fn try_from(x: i256) -> Result { + if !(i256::I64_MIN..=i256::I64_MAX).contains(&x) { + return Err(ConvertError::new("not in range")); + } + + Ok(x.0.as_i64()) + } +} + +impl Shl for i256 { + type Output = Self; + + #[inline] + fn shl(self, rhs: u32) -> Self::Output { + Self(self.0 << rhs) + } +} + +impl Shr for i256 { + type Output = Self; + + #[inline] + fn shr(self, rhs: u32) -> Self::Output { + Self(self.0 >> rhs) + } +} + +// Simple smoke tests to check that the underlying implementation is adequate. +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn bounds_i128() { + assert_eq!(i128::try_from(i256::I128_MIN).unwrap(), i128::MIN); + assert_eq!(i128::try_from(i256::I128_MAX).unwrap(), i128::MAX); + } + + #[test] + fn cmp() { + use core::cmp::Ordering::{self, *}; + fn t(a: i128, b: i128, ord: Ordering) { + let a = i256::from(a); + let b = i256::from(b); + assert_eq!(a.cmp(&b), ord); + assert_eq!(b.cmp(&a), ord.reverse()); + } + t(5, 3, Greater); + t(-5, -5, Equal); + t(0, -5, Greater); + } + + #[test] + fn from_i128() { + fn t(x: i128) { + assert_eq!(i128::try_from(i256::from(x)).unwrap(), x); + } + t(0); + t(1); + t(-1); + t(i128::MAX); + t(i128::MAX - 1); + t(i128::MIN); + t(i128::MIN + 1); + } + + #[test] + fn neg_i128() { + fn t(x: i128) { + assert_eq!(i128::try_from(-i256::from(x)).unwrap(), -x); + assert_eq!(i128::try_from(-i256::from(-x)).unwrap(), x); + } + t(0); + t(1); + t(1234); + t(123_456_789_987); + } + + #[test] + fn neg_i256() { + fn t(value: i256, expected: i256) { + let actual: i256 = -value; + assert_eq!(actual, expected); + assert_eq!(-actual, value); + } + t(i256::new(u128::MAX, i128::MAX), i256::new(1, i128::MIN)); + t( + i256::new(u128::MAX / 2, i128::MAX / 2), + i256::new(u128::MAX / 2 + 2, i128::MIN / 2), + ); + } + + #[test] + #[should_panic] + fn neg_i256_min() { + let _x = -i256::MIN; + } + + #[test] + fn add() { + fn t(a: i128, b: i128, expected: i128) { + let a = i256::from(a); + let b = i256::from(b); + assert_eq!(i128::try_from(a + b).unwrap(), expected); + assert_eq!(i128::try_from(b + a).unwrap(), expected); + assert_eq!(i128::try_from((-a) + (-b)).unwrap(), -expected); + assert_eq!(i128::try_from((-b) + (-a)).unwrap(), -expected); + } + t(0, 0, 0); + t(1111, 3210, 4321); + t(-1111, 5432, 4321); + t(-4321, 5432, 1111); + } + + #[test] + fn sub() { + fn t(a: i128, b: i128, expected: i128) { + let a = i256::from(a); + let b = i256::from(b); + assert_eq!(i128::try_from(a - b).unwrap(), expected); + assert_eq!(i128::try_from(b - a).unwrap(), -expected); + assert_eq!(i128::try_from((-a) - (-b)).unwrap(), -expected); + assert_eq!(i128::try_from((-b) - (-a)).unwrap(), expected); + } + t(0, 0, 0); + t(4321, 1111, 3210); + t(4321, -1111, 5432); + t(1111, -4321, 5432); + } + + #[test] + fn mul() { + fn t(a: i128, b: i128, expected: i128) { + let a = i256::from(a); + let b = i256::from(b); + assert_eq!(i128::try_from(a * b).unwrap(), expected); + assert_eq!(i128::try_from(b * a).unwrap(), expected); + assert_eq!(i128::try_from((-a) * (-b)).unwrap(), expected); + assert_eq!(i128::try_from((-b) * (-a)).unwrap(), expected); + } + t(0, 0, 0); + t(7, 5, 35); + t(-7, 5, -35); + } + + #[test] + fn div() { + fn t(a: i128, b: i128, expected: i128) { + let a = i256::from(a); + let b = i256::from(b); + assert_eq!(i128::try_from(a / b).unwrap(), expected); + assert_eq!(i128::try_from((-a) / (-b)).unwrap(), expected); + } + t(0, 1, 0); + t(35, 5, 7); + t(-35, 5, -7); + } + + #[cfg(feature = "std")] + #[test] + fn as_positive_f64() { + fn t(x: i256, expected: f64) { + assert_eq!(x.as_positive_f64(), expected); + } + t(0i64.into(), 0.0); + t(1i64.into(), 1.0); + t(i64::MAX.into(), 9.223372036854776e18); + t(i128::MAX.into(), 1.7014118346046923e38); + t( + i256::from(i128::MAX) * i256::from(i128::MAX), + 2.894802230932905e76, + ); + } +} diff --git a/src/layout.rs b/src/layout.rs new file mode 100644 index 0000000..8dc8eda --- /dev/null +++ b/src/layout.rs @@ -0,0 +1,65 @@ +use core::convert::{From, TryInto}; +use core::ops::{Add, Div, Mul, Neg, Sub}; + +#[doc(hidden)] +pub trait Promotion: + Sized + Ord + Neg + Add + Sub + Mul + Div + From + TryInto +{ + type Layout; + + fn as_layout(&self) -> Self::Layout; + #[cfg(feature = "std")] + fn as_positive_f64(&self) -> f64; + fn leading_zeros(&self) -> u32; + fn mul_l(&self, rhs: Self::Layout) -> Self; + fn div_l(&self, rhs: Self::Layout) -> Self; + fn div_rem_l(&self, rhs: Self::Layout) -> (Self, Self::Layout); +} + +#[cfg(any(feature = "i16", feature = "i32", feature = "i64"))] +macro_rules! promotion { + ($layout:ty => $prom:ty) => { + impl Promotion for $prom { + type Layout = $layout; + + #[inline] + fn as_layout(&self) -> Self::Layout { + *self as $layout + } + + #[cfg(feature = "std")] + #[inline] + fn as_positive_f64(&self) -> f64 { + *self as f64 + } + + #[inline] + fn leading_zeros(&self) -> u32 { + (*self).leading_zeros() + } + + #[inline] + fn mul_l(&self, rhs: Self::Layout) -> Self { + self * rhs as $prom + } + + #[inline] + fn div_l(&self, rhs: Self::Layout) -> Self { + self / rhs as $prom + } + + #[inline] + fn div_rem_l(&self, rhs: Self::Layout) -> (Self, Self::Layout) { + (self / rhs as $prom, (self % rhs as $prom) as Self::Layout) + } + } + }; +} + +#[cfg(feature = "i16")] +promotion!(i16 => i32); +#[cfg(feature = "i32")] +promotion!(i32 => i64); +#[cfg(feature = "i64")] +promotion!(i64 => i128); +// NOTE: i128 => i256 is implemented in the `i256_polyfill` module. diff --git a/src/lib.rs b/src/lib.rs index 5351f2c..ca48ec4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,7 +7,7 @@ //! ## Features //! Turn them on in `Cargo.toml`: //! -//! - `i128` — `i128` layout support which will be promoted to internally implemented `I256` for +//! - `i128` — `i128` layout support which will be promoted to a polyfill for `i256` for //! multiplication and division. //! - `i64` — `i64` layout support which will be promoted to `i128` for multiplication and division. //! - `i32` — `i32` layout support which will be promoted to `i64` for multiplication and division. @@ -134,13 +134,12 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] -use core::cmp::Ord; -use core::{fmt, i64, marker::PhantomData}; +use core::{cmp::Ord, fmt, marker::PhantomData}; use typenum::Unsigned; #[cfg(feature = "i128")] -use crate::i256::I256; +use crate::i256_polyfill::i256; use crate::ops::{sqrt::Sqrt, *}; use crate::string::Stringify; @@ -148,7 +147,8 @@ mod const_fn; mod errors; mod float; #[cfg(feature = "i128")] -mod i256; +mod i256_polyfill; +mod layout; mod macros; #[cfg(feature = "parity")] mod parity; @@ -172,6 +172,7 @@ mod schemars; #[doc(hidden)] pub mod _priv { pub use crate::const_fn::*; + pub use crate::layout::*; pub use crate::macros::Operand; pub use crate::ops::*; } @@ -235,9 +236,10 @@ macro_rules! impl_fixed_point { $(#[$attr:meta])? inner = $layout:tt; promoted_to = $promotion:tt; - convert = $convert:expr; try_from = [$($try_from:ty),*]; - ) => { + ) => {const _: () = { + use $crate::_priv::Promotion as _; + $(#[$attr])? impl FixedPoint<$layout, P> { /// The number of digits in the fractional part. @@ -247,7 +249,6 @@ macro_rules! impl_fixed_point { const COEF: $layout = const_fn::pow10(Self::PRECISION) as _; const NEG_COEF: $layout = -Self::COEF; - const COEF_PROMOTED: $promotion = $convert(Self::COEF) as _; } $(#[$attr])? @@ -273,19 +274,13 @@ macro_rules! impl_fixed_point { #[inline] fn rmul(self, rhs: Self, mode: RoundMode) -> Result { - // TODO: avoid 128bit arithmetic when possible, - // because LLVM doesn't replace 128bit division by const with multiplication. - - let value = $promotion::from(self.inner) * $promotion::from(rhs.inner); - // TODO: replace with multiplication by a constant. - let result = value / Self::COEF_PROMOTED; - let loss = value - result * Self::COEF_PROMOTED; + let value = $promotion::from(self.inner).mul_l(rhs.inner); + // `|loss| < COEF`, thus it fits in the layout. + let (result, loss) = value.div_rem_l(Self::COEF); let mut result = $layout::try_from(result).map_err(|_| ArithmeticError::Overflow)?; - // `|loss| < COEF`, thus it fits in the layout. - let loss = $layout::try_from(loss).unwrap(); let sign = self.inner.signum() * rhs.inner.signum(); let add_signed_one = if mode == RoundMode::Nearest { @@ -310,24 +305,17 @@ macro_rules! impl_fixed_point { #[inline] fn rdiv(self, rhs: Self, mode: RoundMode) -> Result { - // TODO: avoid 128bit arithmetic when possible, - // because LLVM doesn't replace 128bit division by const with multiplication. - if rhs.inner == 0 { return Err(ArithmeticError::DivisionByZero); } - let numerator = $promotion::from(self.inner) * Self::COEF_PROMOTED; - let denominator = $promotion::from(rhs.inner); - let result = numerator / denominator; - let loss = numerator - result * denominator; + let numerator = $promotion::from(self.inner).mul_l(Self::COEF); + // `|loss| < rhs`, thus it fits in the layout. + let (result, loss) = numerator.div_rem_l(rhs.inner); let mut result = $layout::try_from(result).map_err(|_| ArithmeticError::Overflow)?; - // `|loss| < denominator`, thus it fits in the layout. - let loss = $layout::try_from(loss).unwrap(); - if loss != 0 { let sign = self.inner.signum() * rhs.inner.signum(); @@ -587,8 +575,6 @@ macro_rules! impl_fixed_point { /// * `Ceil`: `S ≥ sqrt(F)` /// * `Nearest`: `Floor` or `Ceil`, which one is closer to `sqrt(F)` /// - /// The fastest mode is `Floor`. - /// /// ``` /// # #[cfg(feature = "i64")] /// # fn main() -> Result<(), Box> { @@ -617,23 +603,31 @@ macro_rules! impl_fixed_point { // At first we have `S_inner = S * COEF`. // We'd like to gain `sqrt(S) * COEF`: // `sqrt(S) * COEF = sqrt(S * COEF^2) = sqrt(S_inner * COEF)` - let squared = $promotion::from(self.inner) * Self::COEF_PROMOTED; - let lo = squared.sqrt()?; + let squared = $promotion::from(self.inner).mul_l(Self::COEF); + let lo = squared.sqrt(); let add_one = match mode { RoundMode::Floor => false, RoundMode::Nearest => { - let lo2 = lo * lo; - // (lo+1)^2 = lo^2 +2lo + 1 - let hi2 = lo2 + lo + lo + $promotion::ONE; - squared - lo2 >= hi2 - squared + // We choose to round up iff + // + // (lo+1)^2 - squared <= squared - lo^2 + // + // However, we don't want to do calculations in the promoted type, + // because it can be slow (`i128` and `i256`). So, we use modular + // arithmetic (with `2^bits(layout)` modulus) to avoid it. + + let lo2 = lo.wrapping_mul(lo); + // hi^2 = (lo+1)^2 = lo^2 + 2lo + 1 + let hi2 = lo2.wrapping_add(lo).wrapping_add(lo).wrapping_add($layout::ONE); + let squared = squared.as_layout(); + hi2.wrapping_sub(squared) <= squared.wrapping_sub(lo2) + }, + RoundMode::Ceil => { + lo.wrapping_mul(lo) != squared.as_layout() }, - RoundMode::Ceil if lo * lo == squared => false, - RoundMode::Ceil => true, }; - // `sqrt` can't take more bits than `self` already does, thus `unwrap()` is ok. - let lo = $layout::try_from(lo).unwrap(); let inner = if add_one { lo + $layout::ONE } else { @@ -734,12 +728,7 @@ macro_rules! impl_fixed_point { } } )* - }; -} - -#[cfg(any(feature = "i64", feature = "i32", feature = "i16"))] -const fn identity(x: T) -> T { - x + };}; } #[cfg(feature = "i16")] @@ -747,7 +736,6 @@ impl_fixed_point!( #[cfg_attr(docsrs, doc(cfg(feature = "i16")))] inner = i16; promoted_to = i32; - convert = identity; try_from = [i8, u8, i16, u16, i32, u32, i64, u64, i128, u128, isize, usize]; ); #[cfg(feature = "i32")] @@ -755,7 +743,6 @@ impl_fixed_point!( #[cfg_attr(docsrs, doc(cfg(feature = "i32")))] inner = i32; promoted_to = i64; - convert = identity; try_from = [i8, u8, i16, u16, i32, u32, i64, u64, i128, u128, isize, usize]; ); #[cfg(feature = "i64")] @@ -763,14 +750,12 @@ impl_fixed_point!( #[cfg_attr(docsrs, doc(cfg(feature = "i64")))] inner = i64; promoted_to = i128; - convert = identity; try_from = [i8, u8, i16, u16, i32, u32, i64, u64, i128, u128, isize, usize]; ); #[cfg(feature = "i128")] impl_fixed_point!( #[cfg_attr(docsrs, doc(cfg(feature = "i128")))] inner = i128; - promoted_to = I256; - convert = I256::from_i128; + promoted_to = i256; try_from = [i8, u8, i16, u16, i32, u32, i64, u64, i128, u128, isize, usize]; ); diff --git a/src/ops/sqrt.rs b/src/ops/sqrt.rs index 5a2b295..efe4a66 100644 --- a/src/ops/sqrt.rs +++ b/src/ops/sqrt.rs @@ -1,56 +1,40 @@ -use core::mem; +use crate::{layout::Promotion, ops::Zero}; -use crate::ArithmeticError; - -pub(crate) trait Sqrt: Sized { - type Error; - - /// Checked square root. - /// For given non-negative number S returns max possible number Q such that: - /// `Q ≤ sqrt(S)`. - /// Returns `Error` for negative arguments. - fn sqrt(self) -> Result; +pub(crate) trait Sqrt: Promotion { + fn sqrt(self) -> Self::Layout; } macro_rules! impl_sqrt { - ($( $int:ty ),+ $(,)?) => { - $( impl_sqrt!(@single $int); )* - }; - (@single $int:ty) => { - impl Sqrt for $int { - type Error = ArithmeticError; - + ($prom:ty) => { + impl Sqrt for $prom { /// Checked integer square root. /// Sqrt implementation courtesy of [`num` crate][num]. /// /// [num]: https://github.com/rust-num/num-integer/blob/4d166cbb754244760e28ea4ce826d54fafd3e629/src/roots.rs#L278 #[inline] - fn sqrt(self) -> Result { - #[inline] - const fn bits() -> u32 { - (mem::size_of::() * 8) as _ - } + fn sqrt(self) -> Self::Layout { + type Layout = <$prom as Promotion>::Layout; #[cfg(feature = "std")] #[inline] - fn guess(x: $int) -> $int { - (x as f64).sqrt() as $int + fn guess(v: $prom) -> Layout { + v.as_positive_f64().sqrt() as Layout } #[cfg(not(feature = "std"))] #[inline] - fn guess(x: $int) -> $int { + fn guess(v: $prom) -> Layout { #[inline] - fn log2_estimate(x: $int) -> u32 { - debug_assert!(x > 0); - bits::<$int>() - 1 - x.leading_zeros() + fn log2_estimate(v: $prom) -> u32 { + debug_assert!(v > <$prom as Zero>::ZERO); + (core::mem::size_of::<$prom>() as u32 * 8) - 1 - v.leading_zeros() } - 1 << ((log2_estimate(x) + 1) / 2) + 1 << ((log2_estimate(v) + 1) / 2) } #[inline] - fn fixpoint(mut x: $int, f: impl Fn($int) -> $int) -> $int { + fn fixpoint(mut x: Layout, f: impl Fn(Layout) -> Layout) -> Layout { let mut xn = f(x); while x < xn { x = xn; @@ -63,30 +47,25 @@ macro_rules! impl_sqrt { x } - #[allow(unused_comparisons)] - { debug_assert!(self >= 0); } + debug_assert!(self >= <$prom as Zero>::ZERO); - if bits::<$int>() > 64 { - // 128-bit division is slow, so do a recursive bitwise `sqrt` until it's small enough. - let result = match u64::try_from(self) { - Ok(x) => x.sqrt()? as _, - Err(_) => { - let lo = (self >> 2u32).sqrt()? << 1; - let hi = lo + 1; - if hi * hi <= self { hi } else { lo } - } - }; - return Ok(result); - } - if self < 4 { - return Ok((self > 0).into()); + if self < <$prom>::from(4i8) { + return ((self > <$prom as Zero>::ZERO) as i8).into(); } + // https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method - let next = |x: $int| (self / x + x) >> 1; - Ok(fixpoint(guess(self), next)) + let next = |x: Layout| (self.div_l(x).as_layout() + x) >> 1; + fixpoint(guess(self), next) } } - } + }; } -impl_sqrt!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128); +#[cfg(feature = "i16")] +impl_sqrt!(i32); +#[cfg(feature = "i32")] +impl_sqrt!(i64); +#[cfg(feature = "i64")] +impl_sqrt!(i128); +#[cfg(feature = "i128")] +impl_sqrt!(crate::i256); diff --git a/src/string.rs b/src/string.rs index 6e59efc..26cda7b 100644 --- a/src/string.rs +++ b/src/string.rs @@ -56,7 +56,7 @@ macro_rules! impl_for { let prec = Self::PRECISION as usize; // TODO: negative precision? if EXACT { - if fractional_str.len() > Self::PRECISION.abs() as usize { + if fractional_str.len() > Self::PRECISION.unsigned_abs() as usize { return Err(ConvertError::new("requested precision is too high")); } } diff --git a/tests/it/const_ctor/too_long_fractional.stderr b/tests/it/const_ctor/too_long_fractional.stderr index 50912d3..aa71d74 100644 --- a/tests/it/const_ctor/too_long_fractional.stderr +++ b/tests/it/const_ctor/too_long_fractional.stderr @@ -14,7 +14,7 @@ help: the constant being evaluated = note: `#[deny(long_running_const_eval)]` on by default = note: this error originates in the macro `const_assert` which comes from the expansion of the macro `fixnum_const` (in Nightly builds, run with -Z macro-backtrace for more info) -note: erroneous constant used +note: erroneous constant encountered --> tests/it/const_ctor/too_long_fractional.rs:7:36 | 7 | const VALUE: FixedPoint = fixnum_const!(0.1234567891, 9);