Skip to content

Commit

Permalink
Still adjusting double -> (U)INT64 conversion limits
Browse files Browse the repository at this point in the history
  • Loading branch information
gaborcsardi committed Feb 4, 2025
1 parent 0552594 commit 6f290dc
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 11 deletions.
8 changes: 4 additions & 4 deletions src/RParquetOutFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1081,11 +1081,11 @@ void RParquetOutFile::write_double_int64(std::ostream &file, SEXP col,
}
if (is_signed) {
// smallest & largest double that can be put into an int64_t
double min = -9223372036854775295.0, max = 9223372036854775295.0;
double min = -9223372036854774900.0, max = 9223372036854774900.0;
for (uint64_t i = from; i < until; i++) {
double val = REAL(col)[i];
if (R_IsNA(val)) continue;
const char *w = val < min ? "small" : (val > max ? "large" : "");
const char *w = val <= min ? "small" : (val >= max ? "large" : "");
if (w[0]) {
r_call([&] {
Rf_errorcall(
Expand All @@ -1110,7 +1110,7 @@ void RParquetOutFile::write_double_int64(std::ostream &file, SEXP col,
has_minmax_value[idx] = has_minmax_value[idx] || has_min;
} else {
// largest double that can be put into an uint64_t
double max = 18446744073709550591.0;
double max = 18446744073709550592.0;
uint64_t min_value = 0, max_value = 0;
bool has_min = false, has_max = false;
bool minmax = write_minmax_values && is_minmax_supported[idx];
Expand All @@ -1121,7 +1121,7 @@ void RParquetOutFile::write_double_int64(std::ostream &file, SEXP col,
for (uint64_t i = from; i < until; i++) {
double val = REAL(col)[i];
if (R_IsNA(val)) continue;
if (val > max) {
if (val >= max) {
r_call([&] {
Rf_errorcall(
nanoparquet_call,
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/_snaps/write-parquet-3.md
Original file line number Diff line number Diff line change
Expand Up @@ -1590,15 +1590,15 @@
write_parquet(d, tmp, schema = parquet_schema("INT_64"))
Condition
Error in `write_parquet()`:
! Integer value too large for INT with bit width 64: 9223372036854775808.000000 at column 1, row 2.
! Integer value too large for INT with bit width 64: 922337203685477xxxx.000000 at column 1, row 2.

---

Code
write_parquet(d, tmp, schema = parquet_schema("INT_64"))
Condition
Error in `write_parquet()`:
! Integer value too small for INT with bit width 64: -9223372036854775808.000000 at column 1, row 2.
! Integer value too small for INT with bit width 64: -922337203685477xxxx.000000 at column 1, row 2.

---

Expand Down Expand Up @@ -1629,7 +1629,7 @@
write_parquet(d, tmp, schema = parquet_schema("UINT_64"))
Condition
Error in `write_parquet()`:
! Integer value too large for unsigned INT with bit width 64: 18446744073709551616.000000 at column 1, row 2.
! Integer value too large for unsigned INT with bit width 64: 18446744073709551616.000000 at column 1, row 1.

---

Expand Down
4 changes: 4 additions & 0 deletions tests/testthat/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,7 @@ test_write <- function(d, schema = NULL, encoding = NULL) {
as.data.frame(read_parquet(tmp))
})
}

redact_maxint64 <- function(x) {
gsub("922337203685477[0-9][0-9][0-9][0-9]", "922337203685477xxxx", x)
}
8 changes: 4 additions & 4 deletions tests/testthat/test-write-parquet-3.R
Original file line number Diff line number Diff line change
Expand Up @@ -564,14 +564,14 @@ test_that("double to INT(64, *)", {
as.data.frame(read_parquet(tmp))
})

d <- data.frame(d = c(9223372036854775295, 9223372036854775296))
d <- data.frame(d = c(9223372036854774899, 9223372036854774900))
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("INT_64"))
})
d <- data.frame(d = -c(9223372036854775295, 9223372036854775296))
}, transform = redact_maxint64)
d <- data.frame(d = -c(9223372036854774899, 9223372036854774900))
expect_snapshot(error = TRUE, {
write_parquet(d, tmp, schema = parquet_schema("INT_64"))
})
}, transform = redact_maxint64)

d <- data.frame(d = as.double(c(0:5, NA)))
write_parquet(d, tmp, schema = parquet_schema("UINT_64"))
Expand Down

0 comments on commit 6f290dc

Please sign in to comment.