apache · jonahgao · Jan 24, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 22, 2025
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -777,29 +777,19 @@ pub fn binary_numeric_coercion(
         (_, Float32) | (Float32, _) => Some(Float32),
         // The following match arms encode the following logic: Given the two
         // integral types, we choose the narrowest possible integral type that
-        // accommodates all values of both types. Note that some information
-        // loss is inevitable when we have a signed type and a `UInt64`, in
-        // which case we use `Int64`;i.e. the widest signed integral type.
-
-        // TODO: For i64 and u64, we can use decimal or float64
-        // Postgres has no unsigned type :(
-        // DuckDB v.0.10.0 has double (double precision floating-point number (8 bytes))
-        // for largest signed (signed sixteen-byte integer) and unsigned integer (unsigned sixteen-byte integer)
+        // accommodates all values of both types. Note that to avoid information
+        // loss when combining UInt64 with signed integers we use Decimal128(20, 0).
+        (UInt64, Int64 | Int32 | Int16 | Int8)
+        | (Int64 | Int32 | Int16 | Int8, UInt64) => Some(Decimal128(20, 0)),
+        (UInt64, _) | (_, UInt64) => Some(UInt64),
         (Int64, _)
         | (_, Int64)
-        | (UInt64, Int8)
-        | (Int8, UInt64)
-        | (UInt64, Int16)
-        | (Int16, UInt64)
-        | (UInt64, Int32)
-        | (Int32, UInt64)
         | (UInt32, Int8)
         | (Int8, UInt32)
         | (UInt32, Int16)
         | (Int16, UInt32)
         | (UInt32, Int32)
         | (Int32, UInt32) => Some(Int64),
-        (UInt64, _) | (_, UInt64) => Some(UInt64),
         (Int32, _)
         | (_, Int32)
         | (UInt16, Int16)
@@ -928,16 +918,16 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result<DataType> {
 }
 
 /// Convert the numeric data type to the decimal data type.
-/// Now, we just support the signed integer type and floating-point type.
+/// We support signed and unsigned integer types and floating-point type.
 fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
     // This conversion rule is from spark
     // https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127
     match numeric_type {
-        Int8 => Some(Decimal128(3, 0)),
-        Int16 => Some(Decimal128(5, 0)),
-        Int32 => Some(Decimal128(10, 0)),
-        Int64 => Some(Decimal128(20, 0)),
+        Int8 | UInt8 => Some(Decimal128(3, 0)),
+        Int16 | UInt16 => Some(Decimal128(5, 0)),
+        Int32 | UInt32 => Some(Decimal128(10, 0)),
+        Int64 | UInt64 => Some(Decimal128(20, 0)),
         // TODO if we convert the floating-point data to the decimal type, it maybe overflow.
         Float32 => Some(Decimal128(14, 7)),
         Float64 => Some(Decimal128(30, 15)),
@@ -946,16 +936,16 @@ fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option<DataType> {
 }
 
 /// Convert the numeric data type to the decimal data type.
-/// Now, we just support the signed integer type and floating-point type.
+/// We support signed and unsigned integer types and floating-point type.
 fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
     // This conversion rule is from spark
     // https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127
     match numeric_type {
-        Int8 => Some(Decimal256(3, 0)),
-        Int16 => Some(Decimal256(5, 0)),
-        Int32 => Some(Decimal256(10, 0)),
-        Int64 => Some(Decimal256(20, 0)),
+        Int8 | UInt8 => Some(Decimal256(3, 0)),
+        Int16 | UInt16 => Some(Decimal256(5, 0)),
+        Int32 | UInt32 => Some(Decimal256(10, 0)),
+        Int64 | UInt64 => Some(Decimal256(20, 0)),
         // TODO if we convert the floating-point data to the decimal type, it maybe overflow.
         Float32 => Some(Decimal256(14, 7)),
         Float64 => Some(Decimal256(30, 15)),
@@ -1994,6 +1984,18 @@ mod tests {
             Operator::Gt,
             DataType::UInt32
         );
+        test_coercion_binary_rule!(
+            DataType::UInt64,
+            DataType::UInt8,
+            Operator::Eq,
+            DataType::UInt64
+        );
+        test_coercion_binary_rule!(
+            DataType::UInt64,
+            DataType::Int64,
+            Operator::Eq,
+            DataType::Decimal128(20, 0)
+        );
         // numeric/decimal
         test_coercion_binary_rule!(
             DataType::Int64,
@@ -2025,6 +2027,12 @@ mod tests {
             Operator::GtEq,
             DataType::Decimal128(15, 3)
         );
+        test_coercion_binary_rule!(
+            DataType::UInt64,
+            DataType::Decimal128(20, 0),
+            Operator::Eq,
+            DataType::Decimal128(20, 0)
+        );
 
         // Binary
         test_coercion_binary_rule!(

diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -105,13 +105,13 @@ select
 ----
 2 Int64
 
-# TODO: Got types (i64, u64), casting to decimal or double or even i128 if supported
-query IT
+# i64 and u64, cast to decimal128(20, 0)
+query RT
 select
   coalesce(2, arrow_cast(3, 'UInt64')),
   arrow_typeof(coalesce(2, arrow_cast(3, 'UInt64')));
 ----
-2 Int64
+2 Decimal128(20, 0)
 
 statement ok
 create table t1 (a bigint, b int, c int) as values (null, null, 1), (null, 2, null);

diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt
@@ -433,3 +433,30 @@ drop table test_column_defaults
 
 statement error DataFusion error: Error during planning: Column reference is not allowed in the DEFAULT expression : Schema error: No field named a.
 create table test_column_defaults(a int, b int default a+1)
+
+
+# test inserting UInt64 and signed integers into a bigint unsigned column
+statement ok
+create table unsigned_bigint_test (v bigint unsigned)
+
+query I
+insert into unsigned_bigint_test values (10000000000000000000), (18446744073709551615)
+----
+2
+
+query I
+insert into unsigned_bigint_test values (10000000000000000001), (1), (10000000000000000002)
+----
+3
+
+query I rowsort
+select * from unsigned_bigint_test
+----
+1
+10000000000000000000
+10000000000000000001
+10000000000000000002
+18446744073709551615
+
+statement ok
+drop table unsigned_bigint_test
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
@@ -1828,7 +1828,7 @@ AS VALUES
 ('BB', 6, 1),
 ('BB', 6, 1);
 
-query TII
+query TIR
 select col1, col2, coalesce(sum_col3, 0) as sum_col3
 from (select distinct col2 from tbl) AS q1
 cross join (select distinct col1 from tbl) AS q2

diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
@@ -413,23 +413,23 @@ explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggre
 logical_plan
 01)Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
 02)--Union
-03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9
+03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Decimal128(20, 0)) AS c9
 04)------TableScan: aggregate_test_100 projection=[c1, c9]
-05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9
+05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Decimal128(20, 0)) AS c9
 06)------TableScan: aggregate_test_100 projection=[c1, c3]
 physical_plan
 01)SortPreservingMergeExec: [c9@1 DESC], fetch=5
 02)--UnionExec
 03)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true]
-04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9]
+04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Decimal128(20, 0)) as c9]
 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
 07)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true]
-08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Int64) as c9]
+08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Decimal128(20, 0)) as c9]
 09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true
 
-query TI
+query TR
 SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggregate_test_100 ORDER BY c9 DESC LIMIT 5
 ----
 c 4268716378