diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index c775d3131692..83f47883add9 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -777,29 +777,19 @@ pub fn binary_numeric_coercion( (_, Float32) | (Float32, _) => Some(Float32), // The following match arms encode the following logic: Given the two // integral types, we choose the narrowest possible integral type that - // accommodates all values of both types. Note that some information - // loss is inevitable when we have a signed type and a `UInt64`, in - // which case we use `Int64`;i.e. the widest signed integral type. - - // TODO: For i64 and u64, we can use decimal or float64 - // Postgres has no unsigned type :( - // DuckDB v.0.10.0 has double (double precision floating-point number (8 bytes)) - // for largest signed (signed sixteen-byte integer) and unsigned integer (unsigned sixteen-byte integer) + // accommodates all values of both types. Note that to avoid information + // loss when combining UInt64 with signed integers we use Decimal128(20, 0). + (UInt64, Int64 | Int32 | Int16 | Int8) + | (Int64 | Int32 | Int16 | Int8, UInt64) => Some(Decimal128(20, 0)), + (UInt64, _) | (_, UInt64) => Some(UInt64), (Int64, _) | (_, Int64) - | (UInt64, Int8) - | (Int8, UInt64) - | (UInt64, Int16) - | (Int16, UInt64) - | (UInt64, Int32) - | (Int32, UInt64) | (UInt32, Int8) | (Int8, UInt32) | (UInt32, Int16) | (Int16, UInt32) | (UInt32, Int32) | (Int32, UInt32) => Some(Int64), - (UInt64, _) | (_, UInt64) => Some(UInt64), (Int32, _) | (_, Int32) | (UInt16, Int16) @@ -928,16 +918,16 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result { } /// Convert the numeric data type to the decimal data type. -/// Now, we just support the signed integer type and floating-point type. +/// We support signed and unsigned integer types and floating-point type. fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option { use arrow::datatypes::DataType::*; // This conversion rule is from spark // https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127 match numeric_type { - Int8 => Some(Decimal128(3, 0)), - Int16 => Some(Decimal128(5, 0)), - Int32 => Some(Decimal128(10, 0)), - Int64 => Some(Decimal128(20, 0)), + Int8 | UInt8 => Some(Decimal128(3, 0)), + Int16 | UInt16 => Some(Decimal128(5, 0)), + Int32 | UInt32 => Some(Decimal128(10, 0)), + Int64 | UInt64 => Some(Decimal128(20, 0)), // TODO if we convert the floating-point data to the decimal type, it maybe overflow. Float32 => Some(Decimal128(14, 7)), Float64 => Some(Decimal128(30, 15)), @@ -946,16 +936,16 @@ fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option { } /// Convert the numeric data type to the decimal data type. -/// Now, we just support the signed integer type and floating-point type. +/// We support signed and unsigned integer types and floating-point type. fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option { use arrow::datatypes::DataType::*; // This conversion rule is from spark // https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127 match numeric_type { - Int8 => Some(Decimal256(3, 0)), - Int16 => Some(Decimal256(5, 0)), - Int32 => Some(Decimal256(10, 0)), - Int64 => Some(Decimal256(20, 0)), + Int8 | UInt8 => Some(Decimal256(3, 0)), + Int16 | UInt16 => Some(Decimal256(5, 0)), + Int32 | UInt32 => Some(Decimal256(10, 0)), + Int64 | UInt64 => Some(Decimal256(20, 0)), // TODO if we convert the floating-point data to the decimal type, it maybe overflow. Float32 => Some(Decimal256(14, 7)), Float64 => Some(Decimal256(30, 15)), @@ -1994,6 +1984,18 @@ mod tests { Operator::Gt, DataType::UInt32 ); + test_coercion_binary_rule!( + DataType::UInt64, + DataType::UInt8, + Operator::Eq, + DataType::UInt64 + ); + test_coercion_binary_rule!( + DataType::UInt64, + DataType::Int64, + Operator::Eq, + DataType::Decimal128(20, 0) + ); // numeric/decimal test_coercion_binary_rule!( DataType::Int64, @@ -2025,6 +2027,12 @@ mod tests { Operator::GtEq, DataType::Decimal128(15, 3) ); + test_coercion_binary_rule!( + DataType::UInt64, + DataType::Decimal128(20, 0), + Operator::Eq, + DataType::Decimal128(20, 0) + ); // Binary test_coercion_binary_rule!( diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt index 06460a005c20..a624d1def980 100644 --- a/datafusion/sqllogictest/test_files/coalesce.slt +++ b/datafusion/sqllogictest/test_files/coalesce.slt @@ -105,13 +105,13 @@ select ---- 2 Int64 -# TODO: Got types (i64, u64), casting to decimal or double or even i128 if supported -query IT +# i64 and u64, cast to decimal128(20, 0) +query RT select coalesce(2, arrow_cast(3, 'UInt64')), arrow_typeof(coalesce(2, arrow_cast(3, 'UInt64'))); ---- -2 Int64 +2 Decimal128(20, 0) statement ok create table t1 (a bigint, b int, c int) as values (null, null, 1), (null, 2, null); diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index 804612287246..c32b4b9ea397 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -433,3 +433,30 @@ drop table test_column_defaults statement error DataFusion error: Error during planning: Column reference is not allowed in the DEFAULT expression : Schema error: No field named a. create table test_column_defaults(a int, b int default a+1) + + +# test inserting UInt64 and signed integers into a bigint unsigned column +statement ok +create table unsigned_bigint_test (v bigint unsigned) + +query I +insert into unsigned_bigint_test values (10000000000000000000), (18446744073709551615) +---- +2 + +query I +insert into unsigned_bigint_test values (10000000000000000001), (1), (10000000000000000002) +---- +3 + +query I rowsort +select * from unsigned_bigint_test +---- +1 +10000000000000000000 +10000000000000000001 +10000000000000000002 +18446744073709551615 + +statement ok +drop table unsigned_bigint_test diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 68426f180d99..496c6c609e45 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -1828,7 +1828,7 @@ AS VALUES ('BB', 6, 1), ('BB', 6, 1); -query TII +query TIR select col1, col2, coalesce(sum_col3, 0) as sum_col3 from (select distinct col2 from tbl) AS q1 cross join (select distinct col1 from tbl) AS q2 diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index 7b8992b966ad..352c01ca295c 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -413,23 +413,23 @@ explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggre logical_plan 01)Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5 02)--Union -03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9 +03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Decimal128(20, 0)) AS c9 04)------TableScan: aggregate_test_100 projection=[c1, c9] -05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9 +05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Decimal128(20, 0)) AS c9 06)------TableScan: aggregate_test_100 projection=[c1, c3] physical_plan 01)SortPreservingMergeExec: [c9@1 DESC], fetch=5 02)--UnionExec 03)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true] -04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9] +04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Decimal128(20, 0)) as c9] 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true 07)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true] -08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Int64) as c9] +08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Decimal128(20, 0)) as c9] 09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true -query TI +query TR SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggregate_test_100 ORDER BY c9 DESC LIMIT 5 ---- c 4268716378