From 6fa71093fdaa90003735b1af2bfec2a47a52f74b Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Mon, 6 Jan 2025 16:32:31 +0100 Subject: [PATCH 01/17] feat: add `AltrepDataFrameRelation` class --- src/CMakeLists.txt | 1 + src/Makevars | 2 +- src/altrepdataframe_relation.cpp | 22 ++++++++++++++++++++++ src/include/altrepdataframe_relation.hpp | 17 +++++++++++++++++ 4 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 src/altrepdataframe_relation.cpp create mode 100644 src/include/altrepdataframe_relation.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c1f9fc98a..ca417c971 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,6 +10,7 @@ add_library( register.cpp relational.cpp reltoaltrep.cpp + altrepdataframe_relation.cpp scan.cpp statement.cpp transform.cpp diff --git a/src/Makevars b/src/Makevars index 0eff645a7..939f119ea 100644 --- a/src/Makevars +++ b/src/Makevars @@ -16,4 +16,4 @@ include Makevars.duckdb CXX_STD = CXX17 PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -DBROTLI_ENCODER_CLEANUP_ON_OOM -Iduckdb/src/include -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/fast_float -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/httplib -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -Iduckdb/third_party/libpg_query -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/lz4 -Iduckdb/third_party/brotli/include -Iduckdb/third_party/brotli/common -Iduckdb/third_party/brotli/dec -Iduckdb/third_party/brotli/enc -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/miniz -Iduckdb/third_party/pcg -Iduckdb/third_party/re2 -Iduckdb/third_party/skiplist -Iduckdb/third_party/tdigest -Iduckdb/third_party/utf8proc -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/yyjson/include -Iduckdb/third_party/zstd/include -Iduckdb/extension/parquet/include -Iduckdb/third_party/parquet -Iduckdb/third_party/thrift -Iduckdb/third_party/lz4 -Iduckdb/third_party/brotli/include -Iduckdb/third_party/brotli/common -Iduckdb/third_party/brotli/dec -Iduckdb/third_party/brotli/enc -Iduckdb/third_party/snappy -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/zstd/include -Iduckdb/extension/core_functions/include -I../inst/include -Iduckdb -DDUCKDB_EXTENSION_PARQUET_LINKED -DDUCKDB_BUILD_LIBRARY -DDUCKDB_EXTENSION_CORE_FUNCTIONS_LINKED -DDUCKDB_BUILD_LIBRARY -OBJECTS=rfuns.o database.o connection.o statement.o register.o relational.o scan.o signal.o transform.o utils.o reltoaltrep.o types.o cpp11.o $(SOURCES) +OBJECTS=rfuns.o database.o connection.o statement.o register.o relational.o scan.o signal.o transform.o utils.o reltoaltrep.o altrepdataframe_relation.o types.o cpp11.o $(SOURCES) diff --git a/src/altrepdataframe_relation.cpp b/src/altrepdataframe_relation.cpp new file mode 100644 index 000000000..c4cc21d44 --- /dev/null +++ b/src/altrepdataframe_relation.cpp @@ -0,0 +1,22 @@ +#include "altrepdataframe_relation.hpp" + +namespace duckdb { + +AltrepDataFrameRelation::AltrepDataFrameRelation(shared_ptr parent) +// TODO: which RelationType should be used? + : Relation(parent->context, RelationType::AGGREGATE_RELATION), parent(std::move(parent)) { +} + +const vector &AltrepDataFrameRelation::Columns() { + return parent->Columns(); +} + +string AltrepDataFrameRelation::ToString(idx_t depth) { + return parent->ToString(depth); +} + +bool AltrepDataFrameRelation::IsReadOnly() { + return parent->IsReadOnly(); +} + +} diff --git a/src/include/altrepdataframe_relation.hpp b/src/include/altrepdataframe_relation.hpp new file mode 100644 index 000000000..a747b2479 --- /dev/null +++ b/src/include/altrepdataframe_relation.hpp @@ -0,0 +1,17 @@ +#include "duckdb/main/relation.hpp" + +namespace duckdb { + +class AltrepDataFrameRelation final : public Relation { +public: + AltrepDataFrameRelation(shared_ptr parent); + + shared_ptr parent; +public: + const vector &Columns() override; + string ToString(idx_t depth) override; + bool IsReadOnly() override; +}; + +} + From c59db7ee82d11827f4eb2f8a3c875fee286f3147 Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Mon, 6 Jan 2025 17:46:20 +0100 Subject: [PATCH 02/17] add `rel_project2` --- R/cpp11.R | 4 ++++ R/relational.R | 13 +++++++++++++ R/rethrow-gen.R | 10 ++++++++++ src/cpp11.cpp | 8 ++++++++ src/relational.cpp | 23 +++++++++++++++++++++++ 5 files changed, 58 insertions(+) diff --git a/R/cpp11.R b/R/cpp11.R index 764ed5b71..c2d723801 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -80,6 +80,10 @@ rapi_rel_filter <- function(rel, exprs) { .Call(`_duckdb_rapi_rel_filter`, rel, exprs) } +rapi_rel_project2 <- function(df, con, exprs) { + .Call(`_duckdb_rapi_rel_project2`, df, con, exprs) +} + rapi_rel_project <- function(rel, exprs) { .Call(`_duckdb_rapi_rel_project`, rel, exprs) } diff --git a/R/relational.R b/R/relational.R index 8d04fc8b1..9ce48a14d 100644 --- a/R/relational.R +++ b/R/relational.R @@ -133,6 +133,19 @@ rel_project <- function(rel, exprs) { rethrow_rapi_rel_project(rel, exprs) } +#' Lazily project a DuckDB relation object +#' @param rel the DuckDB relation object +#' @param exprs a list of DuckDB expressions to project +#' @return the now projected `duckdb_relation` object +#' @noRd +#' @examples +#' con <- DBI::dbConnect(duckdb()) +#' rel <- rel_from_df(con, mtcars) +#' rel2 <- rel_project(rel, list(expr_reference("cyl"), expr_reference("disp"))) +rel_project2 <- function(df, con, exprs) { + rethrow_rapi_rel_project2(as.data.frame(df), con@conn_ref, exprs) +} + #' Lazily filter a DuckDB relation object #' @param rel the DuckDB relation object #' @param exprs a list of DuckDB expressions to filter by diff --git a/R/rethrow-gen.R b/R/rethrow-gen.R index f4a639c2b..a58535ff3 100644 --- a/R/rethrow-gen.R +++ b/R/rethrow-gen.R @@ -189,6 +189,15 @@ rethrow_rapi_rel_project <- function(rel, exprs, call = parent.frame(2)) { ) } +rethrow_rapi_rel_project2 <- function(df, con, exprs, call = parent.frame(2)) { + rlang::try_fetch( + rapi_rel_project2(df, con, exprs), + error = function(e) { + rethrow_error_from_rapi(e, call) + } + ) +} + rethrow_rapi_rel_aggregate <- function(rel, groups, aggregates, call = parent.frame(2)) { rlang::try_fetch( rapi_rel_aggregate(rel, groups, aggregates), @@ -580,6 +589,7 @@ rethrow_restore <- function() { rethrow_rapi_rel_from_df <<- rapi_rel_from_df rethrow_rapi_rel_filter <<- rapi_rel_filter rethrow_rapi_rel_project <<- rapi_rel_project + rethrow_rapi_rel_project2 <<- rapi_rel_project2 rethrow_rapi_rel_aggregate <<- rapi_rel_aggregate rethrow_rapi_rel_order <<- rapi_rel_order rethrow_rapi_expr_window <<- rapi_expr_window diff --git a/src/cpp11.cpp b/src/cpp11.cpp index e1c0649e4..70117e562 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -154,6 +154,13 @@ extern "C" SEXP _duckdb_rapi_rel_filter(SEXP rel, SEXP exprs) { END_CPP11 } // relational.cpp +SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs); +extern "C" SEXP _duckdb_rapi_rel_project2(SEXP df, SEXP con, SEXP exprs) { + BEGIN_CPP11 + return cpp11::as_sexp(rapi_rel_project2(cpp11::as_cpp>(df), cpp11::as_cpp>(con), cpp11::as_cpp>(exprs))); + END_CPP11 +} +// relational.cpp SEXP rapi_rel_project(duckdb::rel_extptr_t rel, list exprs); extern "C" SEXP _duckdb_rapi_rel_project(SEXP rel, SEXP exprs) { BEGIN_CPP11 @@ -499,6 +506,7 @@ static const R_CallMethodDef CallEntries[] = { {"_duckdb_rapi_rel_names", (DL_FUNC) &_duckdb_rapi_rel_names, 1}, {"_duckdb_rapi_rel_order", (DL_FUNC) &_duckdb_rapi_rel_order, 3}, {"_duckdb_rapi_rel_project", (DL_FUNC) &_duckdb_rapi_rel_project, 2}, + {"_duckdb_rapi_rel_project2", (DL_FUNC) &_duckdb_rapi_rel_project2, 3}, {"_duckdb_rapi_rel_set_alias", (DL_FUNC) &_duckdb_rapi_rel_set_alias, 2}, {"_duckdb_rapi_rel_set_diff", (DL_FUNC) &_duckdb_rapi_rel_set_diff, 2}, {"_duckdb_rapi_rel_set_intersect", (DL_FUNC) &_duckdb_rapi_rel_set_intersect, 2}, diff --git a/src/relational.cpp b/src/relational.cpp index 3642638d7..bfa59550c 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -162,6 +162,29 @@ using namespace cpp11; return make_external_prot("duckdb_relation", prot, res); } +[[cpp11::register]] SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs) { + if (exprs.size() == 0) { + warning("rel_project without projection expressions has no effect"); + return df; + } + vector> projections; + vector aliases; + + for (expr_extptr_t expr : exprs) { + auto dexpr = expr->Copy(); + aliases.push_back(dexpr->GetName()); + projections.push_back(std::move(dexpr)); + } + + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_df(con, df, false)); + + auto res = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); + + cpp11::writable::list prot = {rel}; + + return make_external_prot("duckdb_relation", prot, res); +} + [[cpp11::register]] SEXP rapi_rel_project(duckdb::rel_extptr_t rel, list exprs) { if (exprs.size() == 0) { warning("rel_project without projection expressions has no effect"); From 65e4a646913a89605feed8566a3a3db54189b142 Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Tue, 7 Jan 2025 10:14:20 +0100 Subject: [PATCH 03/17] stop if no expressions --- src/relational.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/relational.cpp b/src/relational.cpp index bfa59550c..2218d025c 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -164,8 +164,7 @@ using namespace cpp11; [[cpp11::register]] SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs) { if (exprs.size() == 0) { - warning("rel_project without projection expressions has no effect"); - return df; + stop("expected projection expressions"); } vector> projections; vector aliases; From d3a250e4e4282a068a41e7fc10ae63eb2b868d8a Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Tue, 7 Jan 2025 10:16:12 +0100 Subject: [PATCH 04/17] remove `as.data.frame` --- R/relational.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/relational.R b/R/relational.R index 9ce48a14d..0ff570346 100644 --- a/R/relational.R +++ b/R/relational.R @@ -143,7 +143,7 @@ rel_project <- function(rel, exprs) { #' rel <- rel_from_df(con, mtcars) #' rel2 <- rel_project(rel, list(expr_reference("cyl"), expr_reference("disp"))) rel_project2 <- function(df, con, exprs) { - rethrow_rapi_rel_project2(as.data.frame(df), con@conn_ref, exprs) + rethrow_rapi_rel_project2(df, con@conn_ref, exprs) } #' Lazily filter a DuckDB relation object From 45ae5e6ad7ede810b9a98e8807c3685e3439e21d Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Tue, 7 Jan 2025 11:36:01 +0100 Subject: [PATCH 05/17] add `rel_filter2` --- R/cpp11.R | 8 ++++++-- R/relational.R | 16 +++++++++++++++- R/rethrow-gen.R | 10 ++++++++++ src/cpp11.cpp | 14 +++++++++++--- src/relational.cpp | 36 +++++++++++++++++++++++++++++------- 5 files changed, 71 insertions(+), 13 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index c2d723801..9e7c9f740 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -80,14 +80,18 @@ rapi_rel_filter <- function(rel, exprs) { .Call(`_duckdb_rapi_rel_filter`, rel, exprs) } -rapi_rel_project2 <- function(df, con, exprs) { - .Call(`_duckdb_rapi_rel_project2`, df, con, exprs) +rapi_rel_filter2 <- function(df, con, exprs) { + .Call(`_duckdb_rapi_rel_filter2`, df, con, exprs) } rapi_rel_project <- function(rel, exprs) { .Call(`_duckdb_rapi_rel_project`, rel, exprs) } +rapi_rel_project2 <- function(df, con, exprs) { + .Call(`_duckdb_rapi_rel_project2`, df, con, exprs) +} + rapi_rel_aggregate <- function(rel, groups, aggregates) { .Call(`_duckdb_rapi_rel_aggregate`, rel, groups, aggregates) } diff --git a/R/relational.R b/R/relational.R index 0ff570346..f63872e29 100644 --- a/R/relational.R +++ b/R/relational.R @@ -143,7 +143,7 @@ rel_project <- function(rel, exprs) { #' rel <- rel_from_df(con, mtcars) #' rel2 <- rel_project(rel, list(expr_reference("cyl"), expr_reference("disp"))) rel_project2 <- function(df, con, exprs) { - rethrow_rapi_rel_project2(df, con@conn_ref, exprs) + rethrow_rapi_rel_project2(as.data.frame(df), con@conn_ref, exprs) } #' Lazily filter a DuckDB relation object @@ -160,6 +160,20 @@ rel_filter <- function(rel, exprs) { rethrow_rapi_rel_filter(rel, exprs) } +#' Lazily filter a DuckDB relation object +#' @param rel the DuckDB relation object +#' @param exprs a list of DuckDB expressions to filter by +#' @return the now filtered `duckdb_relation` object +#' @noRd +#' @examples +#' con <- DBI::dbConnect(duckdb()) +#' DBI::dbExecute(con, "CREATE OR REPLACE MACRO gt(a, b) AS a > b") +#' rel <- rel_from_df(con, mtcars) +#' rel2 <- rel_filter(rel, list(expr_function("gt", list(expr_reference("cyl"), expr_constant("6"))))) +rel_filter2 <- function(df, con, exprs) { + rethrow_rapi_rel_filter2(as.data.frame(df), con@conn_ref, exprs) +} + #' Lazily aggregate a DuckDB relation object #' @param rel the DuckDB relation object #' @param groups a list of DuckDB expressions to group by diff --git a/R/rethrow-gen.R b/R/rethrow-gen.R index a58535ff3..36d1a36aa 100644 --- a/R/rethrow-gen.R +++ b/R/rethrow-gen.R @@ -180,6 +180,15 @@ rethrow_rapi_rel_filter <- function(rel, exprs, call = parent.frame(2)) { ) } +rethrow_rapi_rel_filter2 <- function(df, con, exprs, call = parent.frame(2)) { + rlang::try_fetch( + rapi_rel_filter2(df, con, exprs), + error = function(e) { + rethrow_error_from_rapi(e, call) + } + ) +} + rethrow_rapi_rel_project <- function(rel, exprs, call = parent.frame(2)) { rlang::try_fetch( rapi_rel_project(rel, exprs), @@ -588,6 +597,7 @@ rethrow_restore <- function() { rethrow_rapi_expr_tostring <<- rapi_expr_tostring rethrow_rapi_rel_from_df <<- rapi_rel_from_df rethrow_rapi_rel_filter <<- rapi_rel_filter + rethrow_rapi_rel_filter2 <<- rapi_rel_filter2 rethrow_rapi_rel_project <<- rapi_rel_project rethrow_rapi_rel_project2 <<- rapi_rel_project2 rethrow_rapi_rel_aggregate <<- rapi_rel_aggregate diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 70117e562..6e1e93a10 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -154,10 +154,10 @@ extern "C" SEXP _duckdb_rapi_rel_filter(SEXP rel, SEXP exprs) { END_CPP11 } // relational.cpp -SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs); -extern "C" SEXP _duckdb_rapi_rel_project2(SEXP df, SEXP con, SEXP exprs) { +SEXP rapi_rel_filter2(data_frame df, duckdb::conn_eptr_t con, list exprs); +extern "C" SEXP _duckdb_rapi_rel_filter2(SEXP df, SEXP con, SEXP exprs) { BEGIN_CPP11 - return cpp11::as_sexp(rapi_rel_project2(cpp11::as_cpp>(df), cpp11::as_cpp>(con), cpp11::as_cpp>(exprs))); + return cpp11::as_sexp(rapi_rel_filter2(cpp11::as_cpp>(df), cpp11::as_cpp>(con), cpp11::as_cpp>(exprs))); END_CPP11 } // relational.cpp @@ -168,6 +168,13 @@ extern "C" SEXP _duckdb_rapi_rel_project(SEXP rel, SEXP exprs) { END_CPP11 } // relational.cpp +SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs); +extern "C" SEXP _duckdb_rapi_rel_project2(SEXP df, SEXP con, SEXP exprs) { + BEGIN_CPP11 + return cpp11::as_sexp(rapi_rel_project2(cpp11::as_cpp>(df), cpp11::as_cpp>(con), cpp11::as_cpp>(exprs))); + END_CPP11 +} +// relational.cpp SEXP rapi_rel_aggregate(duckdb::rel_extptr_t rel, list groups, list aggregates); extern "C" SEXP _duckdb_rapi_rel_aggregate(SEXP rel, SEXP groups, SEXP aggregates) { BEGIN_CPP11 @@ -495,6 +502,7 @@ static const R_CallMethodDef CallEntries[] = { {"_duckdb_rapi_rel_distinct", (DL_FUNC) &_duckdb_rapi_rel_distinct, 1}, {"_duckdb_rapi_rel_explain", (DL_FUNC) &_duckdb_rapi_rel_explain, 3}, {"_duckdb_rapi_rel_filter", (DL_FUNC) &_duckdb_rapi_rel_filter, 2}, + {"_duckdb_rapi_rel_filter2", (DL_FUNC) &_duckdb_rapi_rel_filter2, 3}, {"_duckdb_rapi_rel_from_altrep_df", (DL_FUNC) &_duckdb_rapi_rel_from_altrep_df, 3}, {"_duckdb_rapi_rel_from_df", (DL_FUNC) &_duckdb_rapi_rel_from_df, 3}, {"_duckdb_rapi_rel_from_sql", (DL_FUNC) &_duckdb_rapi_rel_from_sql, 2}, diff --git a/src/relational.cpp b/src/relational.cpp index 2218d025c..aa6251638 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -162,9 +162,32 @@ using namespace cpp11; return make_external_prot("duckdb_relation", prot, res); } -[[cpp11::register]] SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs) { +[[cpp11::register]] SEXP rapi_rel_filter2(data_frame df, duckdb::conn_eptr_t con, list exprs) { + duckdb::unique_ptr filter_expr; + if (exprs.size() == 0) { // nop + stop("expected filter expressions"); + } else if (exprs.size() == 1) { + filter_expr = ((expr_extptr_t)exprs[0])->Copy(); + } else { + vector> filters; + for (expr_extptr_t expr : exprs) { + filters.push_back(expr->Copy()); + } + filter_expr = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(filters)); + } + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_df(con, df, false)); + + auto res = make_shared_ptr(rel->rel, std::move(filter_expr)); + + cpp11::writable::list prot = {rel}; + + return make_external_prot("duckdb_relation", prot, res); +} + +[[cpp11::register]] SEXP rapi_rel_project(duckdb::rel_extptr_t rel, list exprs) { if (exprs.size() == 0) { - stop("expected projection expressions"); + warning("rel_project without projection expressions has no effect"); + return rel; } vector> projections; vector aliases; @@ -175,8 +198,6 @@ using namespace cpp11; projections.push_back(std::move(dexpr)); } - duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_df(con, df, false)); - auto res = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); cpp11::writable::list prot = {rel}; @@ -184,10 +205,9 @@ using namespace cpp11; return make_external_prot("duckdb_relation", prot, res); } -[[cpp11::register]] SEXP rapi_rel_project(duckdb::rel_extptr_t rel, list exprs) { +[[cpp11::register]] SEXP rapi_rel_project2(data_frame df, duckdb::conn_eptr_t con, list exprs) { if (exprs.size() == 0) { - warning("rel_project without projection expressions has no effect"); - return rel; + stop("expected projection expressions"); } vector> projections; vector aliases; @@ -198,6 +218,8 @@ using namespace cpp11; projections.push_back(std::move(dexpr)); } + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_df(con, df, false)); + auto res = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); cpp11::writable::list prot = {rel}; From 7e3bdb84495bc6d2056302728634ff0c12a403fe Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Tue, 7 Jan 2025 14:41:26 +0100 Subject: [PATCH 06/17] fix: remove `as.data.frame` --- R/relational.R | 4 ++-- test-old.R | 43 +++++++++++++++++++++++++++++++++++++++++++ test.R | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 test-old.R create mode 100644 test.R diff --git a/R/relational.R b/R/relational.R index f63872e29..8e6982d23 100644 --- a/R/relational.R +++ b/R/relational.R @@ -143,7 +143,7 @@ rel_project <- function(rel, exprs) { #' rel <- rel_from_df(con, mtcars) #' rel2 <- rel_project(rel, list(expr_reference("cyl"), expr_reference("disp"))) rel_project2 <- function(df, con, exprs) { - rethrow_rapi_rel_project2(as.data.frame(df), con@conn_ref, exprs) + rethrow_rapi_rel_project2(df, con@conn_ref, exprs) } #' Lazily filter a DuckDB relation object @@ -171,7 +171,7 @@ rel_filter <- function(rel, exprs) { #' rel <- rel_from_df(con, mtcars) #' rel2 <- rel_filter(rel, list(expr_function("gt", list(expr_reference("cyl"), expr_constant("6"))))) rel_filter2 <- function(df, con, exprs) { - rethrow_rapi_rel_filter2(as.data.frame(df), con@conn_ref, exprs) + rethrow_rapi_rel_filter2(df, con@conn_ref, exprs) } #' Lazily aggregate a DuckDB relation object diff --git a/test-old.R b/test-old.R new file mode 100644 index 000000000..35a67e97b --- /dev/null +++ b/test-old.R @@ -0,0 +1,43 @@ +drv <- duckdb::duckdb() +con <- DBI::dbConnect(drv) +df1 <- tibble::tibble(a = 1) + +"mutate" +#> [1] "mutate" +rel1 <- duckdb:::rel_from_df(con, df1) +"mutate" +#> [1] "mutate" +rel2 <- duckdb:::rel_project( + rel1, + list( + { + tmp_expr <- duckdb:::expr_reference("a") + duckdb:::expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb:::expr_constant(2) + duckdb:::expr_set_alias(tmp_expr, "b") + tmp_expr + } + ) +) +"filter" +#> [1] "filter" +rel3 <- duckdb:::rel_filter( + rel2, + list( + duckdb:::expr_comparison( + "==", + list( + duckdb:::expr_reference("b"), + duckdb:::expr_constant(2) + ) + ) + ) +) +rel2 +rel3 +duckdb:::rel_to_altrep(rel2) +rel2 +rel3 diff --git a/test.R b/test.R new file mode 100644 index 000000000..d66dbd44a --- /dev/null +++ b/test.R @@ -0,0 +1,48 @@ +drv <- duckdb::duckdb() +con <- DBI::dbConnect(drv) +df1 <- tibble::tibble(a = 1) + +df1 +"mutate" +rel2 <- duckdb:::rel_project2( + df1, + con, + list( + { + tmp_expr <- duckdb:::expr_reference("a") + duckdb:::expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb:::expr_constant(2) + duckdb:::expr_set_alias(tmp_expr, "b") + tmp_expr + } + ) +) + +df2 <- duckdb:::rel_to_altrep(rel2) +df1 +df2 +typeof(df1) +typeof(df2) + +"filter" +rel3 <- duckdb:::rel_filter2( + df2, + con, + list( + duckdb:::expr_comparison( + "==", + list( + duckdb:::expr_reference("b"), + duckdb:::expr_constant(2) + ) + ) + ) +) +rel2 +rel3 +duckdb:::rel_to_altrep(rel2) +rel2 +rel3 From ca1ac1723468484ca25269240a44662a2dc2cfba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Tue, 7 Jan 2025 15:08:21 +0100 Subject: [PATCH 07/17] Maybe this -- but doesn't work yet --- src/relational.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/relational.cpp b/src/relational.cpp index aa6251638..03b941598 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -162,6 +162,8 @@ using namespace cpp11; return make_external_prot("duckdb_relation", prot, res); } +SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); + [[cpp11::register]] SEXP rapi_rel_filter2(data_frame df, duckdb::conn_eptr_t con, list exprs) { duckdb::unique_ptr filter_expr; if (exprs.size() == 0) { // nop @@ -175,7 +177,7 @@ using namespace cpp11; } filter_expr = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(filters)); } - duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_df(con, df, false)); + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_altrep_df(df, false, true)); auto res = make_shared_ptr(rel->rel, std::move(filter_expr)); @@ -218,7 +220,7 @@ using namespace cpp11; projections.push_back(std::move(dexpr)); } - duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_df(con, df, false)); + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_altrep_df( df, false, true)); auto res = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); From af84bd448b5ed63c70f17b01739481f53513beb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Tue, 7 Jan 2025 15:09:04 +0100 Subject: [PATCH 08/17] Strict --- src/relational.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/relational.cpp b/src/relational.cpp index 03b941598..18e073acd 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -177,7 +177,7 @@ SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); } filter_expr = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(filters)); } - duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_altrep_df(df, false, true)); + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_altrep_df(df, true, true)); auto res = make_shared_ptr(rel->rel, std::move(filter_expr)); @@ -220,7 +220,7 @@ SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); projections.push_back(std::move(dexpr)); } - duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_altrep_df( df, false, true)); + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_altrep_df( df, true, true)); auto res = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); From eddf8164ccaf187b33aeebb04f8c16e595b4be6e Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Tue, 7 Jan 2025 19:52:20 +0100 Subject: [PATCH 09/17] add `rapi_rel_from_any_df` --- src/include/rapi.hpp | 2 ++ src/include/reltoaltrep.hpp | 4 ++++ src/relational.cpp | 16 ++++++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/include/rapi.hpp b/src/include/rapi.hpp index 50fe9a86b..5c3f0074d 100644 --- a/src/include/rapi.hpp +++ b/src/include/rapi.hpp @@ -220,6 +220,8 @@ SEXP rapi_record_batch(duckdb::rqry_eptr_t, int); cpp11::r_string rapi_ptr_to_str(SEXP extptr); +SEXP rapi_rel_from_df(duckdb::conn_eptr_t con, cpp11::data_frame df, bool experimental); + void duckdb_r_transform(duckdb::Vector &src_vec, SEXP dest, duckdb::idx_t dest_offset, duckdb::idx_t n, bool integer64); SEXP duckdb_r_allocate(const duckdb::LogicalType &type, duckdb::idx_t nrows); void duckdb_r_decorate(const duckdb::LogicalType &type, SEXP dest, bool integer64); diff --git a/src/include/reltoaltrep.hpp b/src/include/reltoaltrep.hpp index 57ede82ad..464971fb7 100644 --- a/src/include/reltoaltrep.hpp +++ b/src/include/reltoaltrep.hpp @@ -27,3 +27,7 @@ struct RelToAltrep { static R_altrep_class_t list_class; #endif }; + +SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); + +SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materialized); diff --git a/src/relational.cpp b/src/relational.cpp index 18e073acd..0a7f633ac 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -1,6 +1,7 @@ #include "rapi.hpp" #include "signal.hpp" #include "typesr.hpp" +#include "reltoaltrep.hpp" #include "R_ext/Random.h" @@ -141,6 +142,15 @@ using namespace cpp11; return res; } +SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materialized) { + auto rel = rapi_rel_from_altrep_df(df, false, allow_materialized); + if (rel != R_NilValue) { + return rel; + } + + return rapi_rel_from_df(con, df, false); +} + [[cpp11::register]] SEXP rapi_rel_filter(duckdb::rel_extptr_t rel, list exprs) { duckdb::unique_ptr filter_expr; if (exprs.size() == 0) { // nop @@ -162,8 +172,6 @@ using namespace cpp11; return make_external_prot("duckdb_relation", prot, res); } -SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); - [[cpp11::register]] SEXP rapi_rel_filter2(data_frame df, duckdb::conn_eptr_t con, list exprs) { duckdb::unique_ptr filter_expr; if (exprs.size() == 0) { // nop @@ -177,7 +185,7 @@ SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); } filter_expr = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(filters)); } - duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_altrep_df(df, true, true)); + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_any_df(con, df, true)); auto res = make_shared_ptr(rel->rel, std::move(filter_expr)); @@ -220,7 +228,7 @@ SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); projections.push_back(std::move(dexpr)); } - duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_altrep_df( df, true, true)); + duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_any_df(con, df, true)); auto res = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); From 889af85911782c21738284e5fe3227689a898993 Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Wed, 8 Jan 2025 12:21:10 +0100 Subject: [PATCH 10/17] use `AltrepDataFrameRelation` --- src/altrepdataframe_relation.cpp | 7 ++++++- src/include/altrepdataframe_relation.hpp | 4 +++- src/relational.cpp | 9 +++++++-- src/reltoaltrep.cpp | 2 +- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/altrepdataframe_relation.cpp b/src/altrepdataframe_relation.cpp index c4cc21d44..af8250029 100644 --- a/src/altrepdataframe_relation.cpp +++ b/src/altrepdataframe_relation.cpp @@ -5,10 +5,11 @@ namespace duckdb { AltrepDataFrameRelation::AltrepDataFrameRelation(shared_ptr parent) // TODO: which RelationType should be used? : Relation(parent->context, RelationType::AGGREGATE_RELATION), parent(std::move(parent)) { + TryBindRelation(columns); } const vector &AltrepDataFrameRelation::Columns() { - return parent->Columns(); + return columns; } string AltrepDataFrameRelation::ToString(idx_t depth) { @@ -19,4 +20,8 @@ bool AltrepDataFrameRelation::IsReadOnly() { return parent->IsReadOnly(); } +unique_ptr AltrepDataFrameRelation::GetQueryNode() { + return parent->GetQueryNode(); +} + } diff --git a/src/include/altrepdataframe_relation.hpp b/src/include/altrepdataframe_relation.hpp index a747b2479..b149c8d92 100644 --- a/src/include/altrepdataframe_relation.hpp +++ b/src/include/altrepdataframe_relation.hpp @@ -7,11 +7,13 @@ class AltrepDataFrameRelation final : public Relation { AltrepDataFrameRelation(shared_ptr parent); shared_ptr parent; + vector columns; public: + unique_ptr GetQueryNode() override; + const vector &Columns() override; string ToString(idx_t depth) override; bool IsReadOnly() override; }; } - diff --git a/src/relational.cpp b/src/relational.cpp index 0a7f633ac..d788c90fc 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -2,6 +2,7 @@ #include "signal.hpp" #include "typesr.hpp" #include "reltoaltrep.hpp" +#include "altrepdataframe_relation.hpp" #include "R_ext/Random.h" @@ -187,7 +188,9 @@ SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materiali } duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_any_df(con, df, true)); - auto res = make_shared_ptr(rel->rel, std::move(filter_expr)); + auto filter = make_shared_ptr(rel->rel, std::move(filter_expr)); + + auto res = make_shared_ptr(filter); cpp11::writable::list prot = {rel}; @@ -230,7 +233,9 @@ SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materiali duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_any_df(con, df, true)); - auto res = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); + auto projection = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); + + auto res = make_shared_ptr(projection); cpp11::writable::list prot = {rel}; diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index d5d9faa3f..2c72bb64e 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -114,7 +114,7 @@ struct AltrepRelationWrapper { } auto materialize_message = Rf_GetOption(RStrings::get().materialize_message_sym, R_BaseEnv); - if (Rf_isLogical(materialize_message) && Rf_length(materialize_message) == 1 && LOGICAL_ELT(materialize_message, 0) == true) { + if (Rf_isLogical(materialize_message) && Rf_length(materialize_message) == 1 && LOGICAL_ELT(materialize_message, 0) == true) { // Legacy Rprintf("duckplyr: materializing\n"); } From 32a0eb836c3892f719eae3b7770d911196319e28 Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Thu, 9 Jan 2025 14:08:38 +0100 Subject: [PATCH 11/17] add `EXTENSION_RELATION` type --- src/duckdb/src/common/enum_util.cpp | 7 ++++--- src/duckdb/src/common/enums/relation_type.cpp | 2 ++ .../src/include/duckdb/common/enums/relation_type.hpp | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index 23d9c6e27..a03b33d63 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -3034,19 +3034,20 @@ const StringUtil::EnumStringLiteral *GetRelationTypeValues() { { static_cast(RelationType::VIEW_RELATION), "VIEW_RELATION" }, { static_cast(RelationType::QUERY_RELATION), "QUERY_RELATION" }, { static_cast(RelationType::DELIM_JOIN_RELATION), "DELIM_JOIN_RELATION" }, - { static_cast(RelationType::DELIM_GET_RELATION), "DELIM_GET_RELATION" } + { static_cast(RelationType::DELIM_GET_RELATION), "DELIM_GET_RELATION" }, + { static_cast(RelationType::EXTENSION_RELATION), "EXTENSION_RELATION" } }; return values; } template<> const char* EnumUtil::ToChars(RelationType value) { - return StringUtil::EnumToString(GetRelationTypeValues(), 28, "RelationType", static_cast(value)); + return StringUtil::EnumToString(GetRelationTypeValues(), 29, "RelationType", static_cast(value)); } template<> RelationType EnumUtil::FromString(const char *value) { - return static_cast(StringUtil::StringToEnum(GetRelationTypeValues(), 28, "RelationType", value)); + return static_cast(StringUtil::StringToEnum(GetRelationTypeValues(), 29, "RelationType", value)); } const StringUtil::EnumStringLiteral *GetRenderModeValues() { diff --git a/src/duckdb/src/common/enums/relation_type.cpp b/src/duckdb/src/common/enums/relation_type.cpp index 4f58ed7c4..dc02b8970 100644 --- a/src/duckdb/src/common/enums/relation_type.cpp +++ b/src/duckdb/src/common/enums/relation_type.cpp @@ -61,6 +61,8 @@ string RelationTypeToString(RelationType type) { return "VIEW_RELATION"; case RelationType::QUERY_RELATION: return "QUERY_RELATION"; + case RelationType::EXTENSION_RELATION: + return "EXTENSION_RELATION"; case RelationType::INVALID_RELATION: break; } diff --git a/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp b/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp index 302b2f369..bca6af491 100644 --- a/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +++ b/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp @@ -43,7 +43,8 @@ enum class RelationType : uint8_t { VIEW_RELATION, QUERY_RELATION, DELIM_JOIN_RELATION, - DELIM_GET_RELATION + DELIM_GET_RELATION, + EXTENSION_RELATION = 255 }; string RelationTypeToString(RelationType type); From 5a8961043790026ac5a280c269f90cadc9e7b3a3 Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Thu, 9 Jan 2025 14:09:18 +0100 Subject: [PATCH 12/17] change `rapi_rel_to_altrep` to handle df --- src/altrepdataframe_relation.cpp | 39 ++++++-- src/cpp11.cpp | 8 +- src/include/altrepdataframe_relation.hpp | 15 ++- src/include/reltoaltrep.hpp | 22 +++++ src/relational.cpp | 8 +- src/reltoaltrep.cpp | 113 +++++++++++------------ 6 files changed, 127 insertions(+), 78 deletions(-) diff --git a/src/altrepdataframe_relation.cpp b/src/altrepdataframe_relation.cpp index af8250029..999748d40 100644 --- a/src/altrepdataframe_relation.cpp +++ b/src/altrepdataframe_relation.cpp @@ -1,10 +1,15 @@ #include "altrepdataframe_relation.hpp" +#include "R_ext/Random.h" + namespace duckdb { -AltrepDataFrameRelation::AltrepDataFrameRelation(shared_ptr parent) -// TODO: which RelationType should be used? - : Relation(parent->context, RelationType::AGGREGATE_RELATION), parent(std::move(parent)) { +AltrepDataFrameRelation::AltrepDataFrameRelation(duckdb::shared_ptr p, cpp11::sexp df, duckdb::conn_eptr_t con, duckdb::shared_ptr altrep) + : Relation(p->context, RelationType::EXTENSION_RELATION) + , altrep(std::move(altrep)) + , dataframe(df) + , connection(std::move(con)) + , parent(std::move(p)) { TryBindRelation(columns); } @@ -13,15 +18,37 @@ const vector &AltrepDataFrameRelation::Columns() { } string AltrepDataFrameRelation::ToString(idx_t depth) { - return parent->ToString(depth); + return GetParent().ToString(depth); } bool AltrepDataFrameRelation::IsReadOnly() { - return parent->IsReadOnly(); + return GetParent().IsReadOnly(); } unique_ptr AltrepDataFrameRelation::GetQueryNode() { - return parent->GetQueryNode(); + return GetParent().GetQueryNode(); +} + +Relation& AltrepDataFrameRelation::GetParent() { + if (altrep->HasQueryResult()) { + // here context mutex locked + return GetTableRelation(); + } else { + return *parent; + } +} + +Relation& AltrepDataFrameRelation::GetTableRelation() { + if (!table_function_relation) { + named_parameter_map_t other_params; + other_params["experimental"] = Value::BOOLEAN(false); + auto alias = StringUtil::Format("dataframe_%d_%d", (uintptr_t)(SEXP)dataframe, + (int32_t)(NumericLimits::Maximum() * unif_rand())); + + table_function_relation = connection->conn->TableFunction("r_dataframe_scan", {Value::POINTER((uintptr_t)(SEXP)dataframe)}, other_params)->Alias(alias); + } + + return *table_function_relation; } } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 6e1e93a10..58c2ae4df 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -361,10 +361,10 @@ extern "C" SEXP _duckdb_rapi_rel_insert(SEXP rel, SEXP schema_name, SEXP table_n END_CPP11 } // reltoaltrep.cpp -SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, bool allow_materialization); -extern "C" SEXP _duckdb_rapi_rel_to_altrep(SEXP rel, SEXP allow_materialization) { +SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, duckdb::conn_eptr_t con, bool allow_materialization); +extern "C" SEXP _duckdb_rapi_rel_to_altrep(SEXP rel, SEXP con, SEXP allow_materialization) { BEGIN_CPP11 - return cpp11::as_sexp(rapi_rel_to_altrep(cpp11::as_cpp>(rel), cpp11::as_cpp>(allow_materialization))); + return cpp11::as_sexp(rapi_rel_to_altrep(cpp11::as_cpp>(rel), cpp11::as_cpp>(con), cpp11::as_cpp>(allow_materialization))); END_CPP11 } // reltoaltrep.cpp @@ -520,7 +520,7 @@ static const R_CallMethodDef CallEntries[] = { {"_duckdb_rapi_rel_set_intersect", (DL_FUNC) &_duckdb_rapi_rel_set_intersect, 2}, {"_duckdb_rapi_rel_set_symdiff", (DL_FUNC) &_duckdb_rapi_rel_set_symdiff, 2}, {"_duckdb_rapi_rel_sql", (DL_FUNC) &_duckdb_rapi_rel_sql, 2}, - {"_duckdb_rapi_rel_to_altrep", (DL_FUNC) &_duckdb_rapi_rel_to_altrep, 2}, + {"_duckdb_rapi_rel_to_altrep", (DL_FUNC) &_duckdb_rapi_rel_to_altrep, 3}, {"_duckdb_rapi_rel_to_csv", (DL_FUNC) &_duckdb_rapi_rel_to_csv, 3}, {"_duckdb_rapi_rel_to_df", (DL_FUNC) &_duckdb_rapi_rel_to_df, 1}, {"_duckdb_rapi_rel_to_parquet", (DL_FUNC) &_duckdb_rapi_rel_to_parquet, 3}, diff --git a/src/include/altrepdataframe_relation.hpp b/src/include/altrepdataframe_relation.hpp index b149c8d92..b04bb02ea 100644 --- a/src/include/altrepdataframe_relation.hpp +++ b/src/include/altrepdataframe_relation.hpp @@ -1,12 +1,18 @@ #include "duckdb/main/relation.hpp" +#include "rapi.hpp" +#include "reltoaltrep.hpp" namespace duckdb { class AltrepDataFrameRelation final : public Relation { public: - AltrepDataFrameRelation(shared_ptr parent); + AltrepDataFrameRelation(duckdb::shared_ptr p, cpp11::sexp df, duckdb::conn_eptr_t con, duckdb::shared_ptr altrep); - shared_ptr parent; + shared_ptr table_function_relation; + cpp11::sexp dataframe; + duckdb::conn_eptr_t connection; + duckdb::shared_ptr altrep; + duckdb::shared_ptr parent; vector columns; public: unique_ptr GetQueryNode() override; @@ -14,6 +20,11 @@ class AltrepDataFrameRelation final : public Relation { const vector &Columns() override; string ToString(idx_t depth) override; bool IsReadOnly() override; + +private: + Relation& GetTableRelation(); + + Relation& GetParent(); }; } diff --git a/src/include/reltoaltrep.hpp b/src/include/reltoaltrep.hpp index 464971fb7..189dbbe0a 100644 --- a/src/include/reltoaltrep.hpp +++ b/src/include/reltoaltrep.hpp @@ -2,6 +2,26 @@ #include "rapi.hpp" +namespace duckdb { + +struct AltrepRelationWrapper { + AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_); + + static AltrepRelationWrapper *Get(SEXP x); + + bool HasQueryResult() const; + + MaterializedQueryResult *GetQueryResult(); + + bool allow_materialization; + + rel_extptr_t rel_eptr; + duckdb::shared_ptr rel; + duckdb::unique_ptr res; +}; + +} + struct RelToAltrep { static void Initialize(DllInfo *dll); static R_xlen_t RownamesLength(SEXP x); @@ -31,3 +51,5 @@ struct RelToAltrep { SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materialized); + +SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, duckdb::conn_eptr_t con, bool allow_materialization); diff --git a/src/relational.cpp b/src/relational.cpp index d788c90fc..9c386be83 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -190,11 +190,9 @@ SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materiali auto filter = make_shared_ptr(rel->rel, std::move(filter_expr)); - auto res = make_shared_ptr(filter); - cpp11::writable::list prot = {rel}; - return make_external_prot("duckdb_relation", prot, res); + return rapi_rel_to_altrep(make_external_prot("duckdb_relation", prot, filter), con, true); } [[cpp11::register]] SEXP rapi_rel_project(duckdb::rel_extptr_t rel, list exprs) { @@ -235,11 +233,9 @@ SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materiali auto projection = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); - auto res = make_shared_ptr(projection); - cpp11::writable::list prot = {rel}; - return make_external_prot("duckdb_relation", prot, res); + return rapi_rel_to_altrep(make_external_prot("duckdb_relation", prot, projection), con, true); } [[cpp11::register]] SEXP rapi_rel_aggregate(duckdb::rel_extptr_t rel, list groups, list aggregates) { diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index 2c72bb64e..a19ae0e13 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -5,6 +5,7 @@ #include "reltoaltrep.hpp" #include "signal.hpp" #include "cpp11/declarations.hpp" +#include "altrepdataframe_relation.hpp" #include "httplib.hpp" #include @@ -87,78 +88,69 @@ static T *GetFromExternalPtr(SEXP x) { return wrapper; } -struct AltrepRelationWrapper { - - static AltrepRelationWrapper *Get(SEXP x) { - return GetFromExternalPtr(x); - } +AltrepRelationWrapper *AltrepRelationWrapper::Get(SEXP x) { + return GetFromExternalPtr(x); +} - AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_) - : allow_materialization(allow_materialization_), rel_eptr(rel_), rel(rel_->rel) { - } +AltrepRelationWrapper::AltrepRelationWrapper(rel_extptr_t rel_, bool allow_materialization_) + : allow_materialization(allow_materialization_), rel_eptr(rel_), rel(rel_->rel) { +} - bool HasQueryResult() const { - return (bool)res; - } +bool AltrepRelationWrapper::HasQueryResult() const { + return (bool)res; +} - MaterializedQueryResult *GetQueryResult() { - if (!res) { - if (!allow_materialization) { - cpp11::stop("Materialization is disabled, use collect() or as_tibble() to materialize"); - } +MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() { + if (!res) { + if (!allow_materialization) { + cpp11::stop("Materialization is disabled, use collect() or as_tibble() to materialize"); + } - auto materialize_callback = Rf_GetOption(RStrings::get().materialize_callback_sym, R_BaseEnv); - if (Rf_isFunction(materialize_callback)) { - sexp call = Rf_lang2(materialize_callback, rel_eptr); - Rf_eval(call, R_BaseEnv); - } + auto materialize_callback = Rf_GetOption(RStrings::get().materialize_callback_sym, R_BaseEnv); + if (Rf_isFunction(materialize_callback)) { + sexp call = Rf_lang2(materialize_callback, rel_eptr); + Rf_eval(call, R_BaseEnv); + } - auto materialize_message = Rf_GetOption(RStrings::get().materialize_message_sym, R_BaseEnv); - if (Rf_isLogical(materialize_message) && Rf_length(materialize_message) == 1 && LOGICAL_ELT(materialize_message, 0) == true) { - // Legacy - Rprintf("duckplyr: materializing\n"); - } + auto materialize_message = Rf_GetOption(RStrings::get().materialize_message_sym, R_BaseEnv); + if (Rf_isLogical(materialize_message) && Rf_length(materialize_message) == 1 && LOGICAL_ELT(materialize_message, 0) == true) { + // Legacy + Rprintf("duckplyr: materializing\n"); + } - ScopedInterruptHandler signal_handler(rel->context->GetContext()); + ScopedInterruptHandler signal_handler(rel->context->GetContext()); - // We need to temporarily allow a deeper execution stack - // https://github.com/duckdb/duckdb-r/issues/101 - auto old_depth = rel->context->GetContext()->config.max_expression_depth; - rel->context->GetContext()->config.max_expression_depth = old_depth * 2; - duckdb_httplib::detail::scope_exit reset_max_expression_depth( - [&]() { rel->context->GetContext()->config.max_expression_depth = old_depth; }); + // We need to temporarily allow a deeper execution stack + // https://github.com/duckdb/duckdb-r/issues/101 + auto old_depth = rel->context->GetContext()->config.max_expression_depth; + rel->context->GetContext()->config.max_expression_depth = old_depth * 2; + duckdb_httplib::detail::scope_exit reset_max_expression_depth( + [&]() { rel->context->GetContext()->config.max_expression_depth = old_depth; }); - res = rel->Execute(); + res = rel->Execute(); - // FIXME: Use std::experimental::scope_exit - if (rel->context->GetContext()->config.max_expression_depth != old_depth * 2) { - Rprintf("Internal error: max_expression_depth was changed from %" PRIu64 " to %" PRIu64 "\n", - old_depth * 2, rel->context->GetContext()->config.max_expression_depth); - } - rel->context->GetContext()->config.max_expression_depth = old_depth; - reset_max_expression_depth.release(); + // FIXME: Use std::experimental::scope_exit + if (rel->context->GetContext()->config.max_expression_depth != old_depth * 2) { + Rprintf("Internal error: max_expression_depth was changed from %" PRIu64 " to %" PRIu64 "\n", + old_depth * 2, rel->context->GetContext()->config.max_expression_depth); + } + rel->context->GetContext()->config.max_expression_depth = old_depth; + reset_max_expression_depth.release(); - if (signal_handler.HandleInterrupt()) { - cpp11::stop("Query execution was interrupted"); - } + if (signal_handler.HandleInterrupt()) { + cpp11::stop("Query execution was interrupted"); + } - signal_handler.Disable(); + signal_handler.Disable(); - if (res->HasError()) { - cpp11::stop("Error evaluating duckdb query: %s", res->GetError().c_str()); - } - D_ASSERT(res->type == QueryResultType::MATERIALIZED_RESULT); + if (res->HasError()) { + cpp11::stop("Error evaluating duckdb query: %s", res->GetError().c_str()); } - D_ASSERT(res); - return (MaterializedQueryResult *)res.get(); + D_ASSERT(res->type == QueryResultType::MATERIALIZED_RESULT); } - - bool allow_materialization; - - rel_extptr_t rel_eptr; - duckdb::shared_ptr rel; - duckdb::unique_ptr res; -}; + D_ASSERT(res); + return (MaterializedQueryResult *)res.get(); +} struct AltrepRownamesWrapper { @@ -348,7 +340,7 @@ static R_altrep_class_t LogicalTypeToAltrepType(const LogicalType &type) { } } -[[cpp11::register]] SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, bool allow_materialization) { +[[cpp11::register]] SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, duckdb::conn_eptr_t con, bool allow_materialization) { D_ASSERT(rel && rel->rel); auto drel = rel->rel; auto ncols = drel->Columns().size(); @@ -381,7 +373,8 @@ static R_altrep_class_t LogicalTypeToAltrepType(const LogicalType &type) { // Row names cpp11::external_pointer ptr(new AltrepRownamesWrapper(relation_wrapper)); R_SetExternalPtrTag(ptr, RStrings::get().duckdb_row_names_sym); - cpp11::sexp row_names_sexp = R_new_altrep(RelToAltrep::rownames_class, ptr, rel); + cpp11::sexp row_names_sexp = R_new_altrep(RelToAltrep::rownames_class, ptr, make_external("duckdb_relation", + make_shared_ptr(rel->rel, data_frame, con, relation_wrapper))); install_new_attrib(data_frame, R_RowNamesSymbol, row_names_sexp); // Class From 2a0c3449ab5ac7ccde77bc0d8d0128ce3fe8b8a1 Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Thu, 9 Jan 2025 15:02:52 +0100 Subject: [PATCH 13/17] add connection --- R/cpp11.R | 4 ++-- R/cpp12.R | 4 ++-- R/relational.R | 4 ++-- R/rethrow-gen.R | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 9e7c9f740..6ed7648b7 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -196,8 +196,8 @@ rapi_rel_insert <- function(rel, schema_name, table_name) { invisible(.Call(`_duckdb_rapi_rel_insert`, rel, schema_name, table_name)) } -rapi_rel_to_altrep <- function(rel, allow_materialization) { - .Call(`_duckdb_rapi_rel_to_altrep`, rel, allow_materialization) +rapi_rel_to_altrep <- function(rel, con, allow_materialization) { + .Call(`_duckdb_rapi_rel_to_altrep`, rel, con, allow_materialization) } rapi_rel_from_altrep_df <- function(df, strict, allow_materialized) { diff --git a/R/cpp12.R b/R/cpp12.R index b5e8e882c..0db34c931 100644 --- a/R/cpp12.R +++ b/R/cpp12.R @@ -1,4 +1,4 @@ # allow_materialization = TRUE: compatibility with duckplyr <= 0.4.1 -rapi_rel_to_altrep <- function(rel, allow_materialization = TRUE) { - .Call(`_duckdb_rapi_rel_to_altrep`, rel, allow_materialization) +rapi_rel_to_altrep <- function(rel, conn, allow_materialization = TRUE) { + .Call(`_duckdb_rapi_rel_to_altrep`, rel, conn, allow_materialization) } diff --git a/R/relational.R b/R/relational.R index 8e6982d23..0f2ca237b 100644 --- a/R/relational.R +++ b/R/relational.R @@ -439,8 +439,8 @@ rel_set_alias <- function(rel, alias) { #' con <- DBI::dbConnect(duckdb()) #' rel <- rel_from_df(con, mtcars) #' print(rel_to_altrep(rel)) -rel_to_altrep <- function(rel, allow_materialization = TRUE) { - rethrow_rapi_rel_to_altrep(rel, allow_materialization) +rel_to_altrep <- function(rel, con, allow_materialization = TRUE) { + rethrow_rapi_rel_to_altrep(rel, con@conn_ref, allow_materialization) } diff --git a/R/rethrow-gen.R b/R/rethrow-gen.R index 36d1a36aa..611d64a93 100644 --- a/R/rethrow-gen.R +++ b/R/rethrow-gen.R @@ -441,9 +441,9 @@ rethrow_rapi_rel_insert <- function(rel, schema_name, table_name, call = parent. ) } -rethrow_rapi_rel_to_altrep <- function(rel, allow_materialization, call = parent.frame(2)) { +rethrow_rapi_rel_to_altrep <- function(rel, conn, allow_materialization, call = parent.frame(2)) { rlang::try_fetch( - rapi_rel_to_altrep(rel, allow_materialization), + rapi_rel_to_altrep(rel, conn, allow_materialization), error = function(e) { rethrow_error_from_rapi(e, call) } From 4d8b8479fc27f13675d81460a3d9266cd83b1644 Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Thu, 9 Jan 2025 16:48:58 +0100 Subject: [PATCH 14/17] add rapi_rel_to_altrep2 --- R/cpp11.R | 4 ++-- R/cpp12.R | 4 ++-- R/relational.R | 2 +- R/rethrow-gen.R | 4 ++-- src/cpp11.cpp | 8 +++---- src/include/reltoaltrep.hpp | 2 +- src/relational.cpp | 4 ++-- src/reltoaltrep.cpp | 45 ++++++++++++++++++++++++++++++++++++- 8 files changed, 58 insertions(+), 15 deletions(-) diff --git a/R/cpp11.R b/R/cpp11.R index 6ed7648b7..9e7c9f740 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -196,8 +196,8 @@ rapi_rel_insert <- function(rel, schema_name, table_name) { invisible(.Call(`_duckdb_rapi_rel_insert`, rel, schema_name, table_name)) } -rapi_rel_to_altrep <- function(rel, con, allow_materialization) { - .Call(`_duckdb_rapi_rel_to_altrep`, rel, con, allow_materialization) +rapi_rel_to_altrep <- function(rel, allow_materialization) { + .Call(`_duckdb_rapi_rel_to_altrep`, rel, allow_materialization) } rapi_rel_from_altrep_df <- function(df, strict, allow_materialized) { diff --git a/R/cpp12.R b/R/cpp12.R index 0db34c931..b5e8e882c 100644 --- a/R/cpp12.R +++ b/R/cpp12.R @@ -1,4 +1,4 @@ # allow_materialization = TRUE: compatibility with duckplyr <= 0.4.1 -rapi_rel_to_altrep <- function(rel, conn, allow_materialization = TRUE) { - .Call(`_duckdb_rapi_rel_to_altrep`, rel, conn, allow_materialization) +rapi_rel_to_altrep <- function(rel, allow_materialization = TRUE) { + .Call(`_duckdb_rapi_rel_to_altrep`, rel, allow_materialization) } diff --git a/R/relational.R b/R/relational.R index 0f2ca237b..5fa1a61c4 100644 --- a/R/relational.R +++ b/R/relational.R @@ -440,7 +440,7 @@ rel_set_alias <- function(rel, alias) { #' rel <- rel_from_df(con, mtcars) #' print(rel_to_altrep(rel)) rel_to_altrep <- function(rel, con, allow_materialization = TRUE) { - rethrow_rapi_rel_to_altrep(rel, con@conn_ref, allow_materialization) + rethrow_rapi_rel_to_altrep(rel, allow_materialization) } diff --git a/R/rethrow-gen.R b/R/rethrow-gen.R index 611d64a93..36d1a36aa 100644 --- a/R/rethrow-gen.R +++ b/R/rethrow-gen.R @@ -441,9 +441,9 @@ rethrow_rapi_rel_insert <- function(rel, schema_name, table_name, call = parent. ) } -rethrow_rapi_rel_to_altrep <- function(rel, conn, allow_materialization, call = parent.frame(2)) { +rethrow_rapi_rel_to_altrep <- function(rel, allow_materialization, call = parent.frame(2)) { rlang::try_fetch( - rapi_rel_to_altrep(rel, conn, allow_materialization), + rapi_rel_to_altrep(rel, allow_materialization), error = function(e) { rethrow_error_from_rapi(e, call) } diff --git a/src/cpp11.cpp b/src/cpp11.cpp index 58c2ae4df..6e1e93a10 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -361,10 +361,10 @@ extern "C" SEXP _duckdb_rapi_rel_insert(SEXP rel, SEXP schema_name, SEXP table_n END_CPP11 } // reltoaltrep.cpp -SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, duckdb::conn_eptr_t con, bool allow_materialization); -extern "C" SEXP _duckdb_rapi_rel_to_altrep(SEXP rel, SEXP con, SEXP allow_materialization) { +SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, bool allow_materialization); +extern "C" SEXP _duckdb_rapi_rel_to_altrep(SEXP rel, SEXP allow_materialization) { BEGIN_CPP11 - return cpp11::as_sexp(rapi_rel_to_altrep(cpp11::as_cpp>(rel), cpp11::as_cpp>(con), cpp11::as_cpp>(allow_materialization))); + return cpp11::as_sexp(rapi_rel_to_altrep(cpp11::as_cpp>(rel), cpp11::as_cpp>(allow_materialization))); END_CPP11 } // reltoaltrep.cpp @@ -520,7 +520,7 @@ static const R_CallMethodDef CallEntries[] = { {"_duckdb_rapi_rel_set_intersect", (DL_FUNC) &_duckdb_rapi_rel_set_intersect, 2}, {"_duckdb_rapi_rel_set_symdiff", (DL_FUNC) &_duckdb_rapi_rel_set_symdiff, 2}, {"_duckdb_rapi_rel_sql", (DL_FUNC) &_duckdb_rapi_rel_sql, 2}, - {"_duckdb_rapi_rel_to_altrep", (DL_FUNC) &_duckdb_rapi_rel_to_altrep, 3}, + {"_duckdb_rapi_rel_to_altrep", (DL_FUNC) &_duckdb_rapi_rel_to_altrep, 2}, {"_duckdb_rapi_rel_to_csv", (DL_FUNC) &_duckdb_rapi_rel_to_csv, 3}, {"_duckdb_rapi_rel_to_df", (DL_FUNC) &_duckdb_rapi_rel_to_df, 1}, {"_duckdb_rapi_rel_to_parquet", (DL_FUNC) &_duckdb_rapi_rel_to_parquet, 3}, diff --git a/src/include/reltoaltrep.hpp b/src/include/reltoaltrep.hpp index 189dbbe0a..485016cd7 100644 --- a/src/include/reltoaltrep.hpp +++ b/src/include/reltoaltrep.hpp @@ -52,4 +52,4 @@ SEXP rapi_rel_from_altrep_df(SEXP df, bool strict, bool allow_materialized); SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materialized); -SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, duckdb::conn_eptr_t con, bool allow_materialization); +SEXP rapi_rel_to_altrep2(duckdb::rel_extptr_t rel, duckdb::conn_eptr_t con, bool allow_materialization); diff --git a/src/relational.cpp b/src/relational.cpp index 9c386be83..c2810aa47 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -192,7 +192,7 @@ SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materiali cpp11::writable::list prot = {rel}; - return rapi_rel_to_altrep(make_external_prot("duckdb_relation", prot, filter), con, true); + return rapi_rel_to_altrep2(make_external_prot("duckdb_relation", prot, filter), con, true); } [[cpp11::register]] SEXP rapi_rel_project(duckdb::rel_extptr_t rel, list exprs) { @@ -235,7 +235,7 @@ SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materiali cpp11::writable::list prot = {rel}; - return rapi_rel_to_altrep(make_external_prot("duckdb_relation", prot, projection), con, true); + return rapi_rel_to_altrep2(make_external_prot("duckdb_relation", prot, projection), con, true); } [[cpp11::register]] SEXP rapi_rel_aggregate(duckdb::rel_extptr_t rel, list groups, list aggregates) { diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index a19ae0e13..dca689284 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -340,7 +340,50 @@ static R_altrep_class_t LogicalTypeToAltrepType(const LogicalType &type) { } } -[[cpp11::register]] SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, duckdb::conn_eptr_t con, bool allow_materialization) { +[[cpp11::register]] SEXP rapi_rel_to_altrep(duckdb::rel_extptr_t rel, bool allow_materialization) { + D_ASSERT(rel && rel->rel); + auto drel = rel->rel; + auto ncols = drel->Columns().size(); + + auto relation_wrapper = make_shared_ptr(rel, allow_materialization); + + cpp11::writable::list data_frame; + data_frame.reserve(ncols); + + for (size_t col_idx = 0; col_idx < ncols; col_idx++) { + auto &column_type = drel->Columns()[col_idx].Type(); + cpp11::external_pointer ptr(new AltrepVectorWrapper(relation_wrapper, col_idx)); + R_SetExternalPtrTag(ptr, RStrings::get().duckdb_vector_sym); + + cpp11::sexp vector_sexp = R_new_altrep(LogicalTypeToAltrepType(column_type), ptr, rel); + duckdb_r_decorate(column_type, vector_sexp, false); + data_frame.push_back(vector_sexp); + } + + // convert to SEXP, with potential side effect of truncation and removal of attributes + (void)(SEXP)data_frame; + + // Names + vector names; + for (auto &col : drel->Columns()) { + names.push_back(col.Name()); + } + SET_NAMES(data_frame, StringsToSexp(names)); + + // Row names + cpp11::external_pointer ptr(new AltrepRownamesWrapper(relation_wrapper)); + R_SetExternalPtrTag(ptr, RStrings::get().duckdb_row_names_sym); + cpp11::sexp row_names_sexp = R_new_altrep(RelToAltrep::rownames_class, ptr, rel); + install_new_attrib(data_frame, R_RowNamesSymbol, row_names_sexp); + + // Class + data_frame.attr(R_ClassSymbol) = RStrings::get().dataframe_str; + + return data_frame; +} + + +SEXP rapi_rel_to_altrep2(duckdb::rel_extptr_t rel, duckdb::conn_eptr_t con, bool allow_materialization) { D_ASSERT(rel && rel->rel); auto drel = rel->rel; auto ncols = drel->Columns().size(); From dc1e31e8cf7f1203360d3cbb9cdc455e2f0de634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 9 Jan 2025 17:19:32 +0100 Subject: [PATCH 15/17] Throw exception --- src/altrepdataframe_relation.cpp | 8 +++++++- src/include/altrepdataframe_relation.hpp | 13 ++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/altrepdataframe_relation.cpp b/src/altrepdataframe_relation.cpp index 999748d40..d76efb0fc 100644 --- a/src/altrepdataframe_relation.cpp +++ b/src/altrepdataframe_relation.cpp @@ -38,7 +38,7 @@ Relation& AltrepDataFrameRelation::GetParent() { } } -Relation& AltrepDataFrameRelation::GetTableRelation() { +void AltrepDataFrameRelation::BuildTableRelation() { if (!table_function_relation) { named_parameter_map_t other_params; other_params["experimental"] = Value::BOOLEAN(false); @@ -47,6 +47,12 @@ Relation& AltrepDataFrameRelation::GetTableRelation() { table_function_relation = connection->conn->TableFunction("r_dataframe_scan", {Value::POINTER((uintptr_t)(SEXP)dataframe)}, other_params)->Alias(alias); } +} + +Relation& AltrepDataFrameRelation::GetTableRelation() { + if (!table_function_relation) { + throw RebuildRelationException(this); + } return *table_function_relation; } diff --git a/src/include/altrepdataframe_relation.hpp b/src/include/altrepdataframe_relation.hpp index b04bb02ea..d39ce40d7 100644 --- a/src/include/altrepdataframe_relation.hpp +++ b/src/include/altrepdataframe_relation.hpp @@ -21,10 +21,21 @@ class AltrepDataFrameRelation final : public Relation { string ToString(idx_t depth) override; bool IsReadOnly() override; + void BuildTableRelation(); + private: Relation& GetTableRelation(); Relation& GetParent(); }; -} +class RebuildRelationException : public std::exception { +public: + RebuildRelationException(AltrepDataFrameRelation* target_) : target(target_) { + } + +public: + AltrepDataFrameRelation* target; +}; + +} // namespace duckdb From 950f56825b3ee16c1509a8baf36934dfb0525ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 9 Jan 2025 17:22:32 +0100 Subject: [PATCH 16/17] Catching exception, but it occurs elsewhere --- src/reltoaltrep.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/reltoaltrep.cpp b/src/reltoaltrep.cpp index dca689284..a200b3fc8 100644 --- a/src/reltoaltrep.cpp +++ b/src/reltoaltrep.cpp @@ -127,7 +127,19 @@ MaterializedQueryResult *AltrepRelationWrapper::GetQueryResult() { duckdb_httplib::detail::scope_exit reset_max_expression_depth( [&]() { rel->context->GetContext()->config.max_expression_depth = old_depth; }); - res = rel->Execute(); + // We can't build the table relation on the fly because of a deadlock situation: + // The context is locked by Execute(), + // and a lock is attempted when creating a new Relation object. + while (true) { + try { + res = rel->Execute(); + } + catch (RebuildRelationException &e) { + e.target->BuildTableRelation(); + continue; + } + break; + } // FIXME: Use std::experimental::scope_exit if (rel->context->GetContext()->config.max_expression_depth != old_depth * 2) { From 43220c812aa402cab868a57e5262810d6a2973dc Mon Sep 17 00:00:00 2001 From: Antonov548 Date: Fri, 10 Jan 2025 11:12:35 +0100 Subject: [PATCH 17/17] relation rebuild --- src/relational.cpp | 70 +++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/src/relational.cpp b/src/relational.cpp index c2810aa47..6a69881d1 100644 --- a/src/relational.cpp +++ b/src/relational.cpp @@ -174,21 +174,36 @@ SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materiali } [[cpp11::register]] SEXP rapi_rel_filter2(data_frame df, duckdb::conn_eptr_t con, list exprs) { - duckdb::unique_ptr filter_expr; - if (exprs.size() == 0) { // nop - stop("expected filter expressions"); - } else if (exprs.size() == 1) { - filter_expr = ((expr_extptr_t)exprs[0])->Copy(); - } else { - vector> filters; - for (expr_extptr_t expr : exprs) { - filters.push_back(expr->Copy()); + auto build_filter_expr = [](const auto& _exprs) { + duckdb::unique_ptr filter_expr; + if (_exprs.size() == 0) { // nop + stop("expected filter expressions"); + } else if (_exprs.size() == 1) { + filter_expr = ((expr_extptr_t)_exprs[0])->Copy(); + } else { + vector> filters; + for (expr_extptr_t expr : _exprs) { + filters.push_back(expr->Copy()); + } + filter_expr = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(filters)); } - filter_expr = make_uniq(ExpressionType::CONJUNCTION_AND, std::move(filters)); - } + + return filter_expr; + }; duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_any_df(con, df, true)); - auto filter = make_shared_ptr(rel->rel, std::move(filter_expr)); + shared_ptr filter; + + while (true) { + try { + filter = make_shared_ptr(rel->rel, build_filter_expr(exprs)); + } + catch (RebuildRelationException &e) { + e.target->BuildTableRelation(); + continue; + } + break; + } cpp11::writable::list prot = {rel}; @@ -220,18 +235,33 @@ SEXP rapi_rel_from_any_df(duckdb::conn_eptr_t con, SEXP df, bool allow_materiali if (exprs.size() == 0) { stop("expected projection expressions"); } - vector> projections; - vector aliases; + auto build_arguments = [](const auto& _exprs) { + vector aliases; + vector> projections; + for (expr_extptr_t expr : _exprs) { + auto dexpr = expr->Copy(); + aliases.push_back(dexpr->GetName()); + projections.push_back(std::move(dexpr)); + } - for (expr_extptr_t expr : exprs) { - auto dexpr = expr->Copy(); - aliases.push_back(dexpr->GetName()); - projections.push_back(std::move(dexpr)); - } + return std::make_pair(std::move(projections), std::move(aliases)); + }; duckdb::rel_extptr_t rel = cpp11::as_cpp>(rapi_rel_from_any_df(con, df, true)); - auto projection = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); + shared_ptr projection; + + while (true) { + try { + auto&& [projections, aliases] = build_arguments(exprs); + projection = make_shared_ptr(rel->rel, std::move(projections), std::move(aliases)); + } + catch (RebuildRelationException &e) { + e.target->BuildTableRelation(); + continue; + } + break; + } cpp11::writable::list prot = {rel};