From b4718925a86e8ec98a58ece4108a975821342ccd Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Thu, 23 Jan 2025 19:26:15 +0200 Subject: [PATCH 01/10] add test_select_distinct_dimensions --- rust/cubesql/cubesql/src/compile/mod.rs | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 2e99f16eb60d4..614eb99ed6c0b 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -8205,6 +8205,42 @@ ORDER BY "source"."str0" ASC ) } + #[tokio::test] + async fn test_select_distinct_dimensions() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let logical_plan = convert_select_to_query_plan( + "SELECT DISTINCT customer_gender FROM \"KibanaSampleDataEcommerce\"".to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + println!("logical_plan: {:?}", logical_plan); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec![]), + dimensions: Some(vec![ + // "KibanaSampleDataEcommerce.order_date".to_string(), + // "KibanaSampleDataEcommerce.last_mod".to_string(), + "KibanaSampleDataEcommerce.customer_gender".to_string(), + // "KibanaSampleDataEcommerce.notes".to_string(), + // "KibanaSampleDataEcommerce.taxful_total_price".to_string(), + // "KibanaSampleDataEcommerce.has_subscription".to_string(), + ]), + segments: Some(vec![]), + order: Some(vec![]), + ungrouped: Some(false), + ..Default::default() + } + ) + } + #[tokio::test] async fn test_sort_relations() -> Result<(), CubeError> { init_testing_logger(); From 64b0ed56f6879acefc494ce0a9994824f31165a8 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Thu, 23 Jan 2025 19:26:36 +0200 Subject: [PATCH 02/10] =?UTF-8?q?started=20doing=20transforming=5Frewrite?= =?UTF-8?q?=20=E2=86=92=20"select-distinct-dimensions"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/compile/rewrite/rules/members.rs | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index f5f7c809fc428..6766120638ed3 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -4,8 +4,8 @@ use crate::{ analysis::{ConstantFolding, LogicalPlanData, MemberNamesToExpr, OriginalExpr}, binary_expr, cast_expr, change_user_expr, column_expr, cross_join, cube_scan, cube_scan_filters_empty_tail, cube_scan_members, cube_scan_members_empty_tail, - cube_scan_order_empty_tail, dimension_expr, expr_column_name, fun_expr, join, like_expr, - limit, list_concat_pushdown_replacer, list_concat_pushup_replacer, literal_expr, + cube_scan_order_empty_tail, dimension_expr, distinct, expr_column_name, fun_expr, join, + like_expr, limit, list_concat_pushdown_replacer, list_concat_pushup_replacer, literal_expr, literal_member, measure_expr, member_pushdown_replacer, member_replacer, merged_members_replacer, original_expr_name, projection, referenced_columns, rewrite, rewriter::{CubeEGraph, CubeRewrite, RewriteRules}, @@ -262,6 +262,34 @@ impl RewriteRules for MemberRules { ), self.push_down_limit("?skip", "?fetch", "?new_skip", "?new_fetch"), ), + transforming_rewrite( + "select-distinct-dimensions", + distinct(cube_scan( + "?alias_to_cube", + "?members", + "?filters", + "?orders", + "?limit", + "?offset", + "?split", + "?can_pushdown_join", + "CubeScanWrapped:false", + "?old_ungrouped", + )), + cube_scan( + "?alias_to_cube", + "?members", + "?filters", + "?orders", + "?limit", + "?offset", + "?split", + "?can_pushdown_join", + "CubeScanWrapped:false", + "CubeScanUngrouped:false", + ), + self.select_distinct_dimensions("?members", "?limit"), + ), // MOD function to binary expr transforming_rewrite_with_root( "mod-fun-to-binary-expr", @@ -1478,6 +1506,29 @@ impl MemberRules { ) } + fn select_distinct_dimensions( + &self, + members_var: &'static str, + limit_var: &'static str, + ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { + let members_var = var!(members_var); + let limit_var = var!(limit_var); + + move |egraph, subst| { + let cube_limit = var_iter!(egraph[subst[limit_var]], CubeScanLimit) + .next() + .unwrap(); + + if cube_limit.is_some() { + return false; + } + + for members in var_list_iter!(egraph[subst[members_var]], CubeScanMembers) {} + + true + } + } + fn push_down_non_empty_aggregate( &self, alias_to_cube_var: &'static str, From 0cd9ee06f6910da7c38c2ad839f450a1eec20ba0 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 27 Jan 2025 15:35:25 +0200 Subject: [PATCH 03/10] fix tests --- rust/cubesql/cubesql/src/compile/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 614eb99ed6c0b..404150f271824 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -8235,7 +8235,6 @@ ORDER BY "source"."str0" ASC ]), segments: Some(vec![]), order: Some(vec![]), - ungrouped: Some(false), ..Default::default() } ) @@ -15695,7 +15694,6 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), ]), segments: Some(vec![]), order: Some(vec![]), - ungrouped: Some(true), ..Default::default() } ) From 833f887291e3298f9637f41462285c493f9ea3cc Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 27 Jan 2025 15:35:48 +0200 Subject: [PATCH 04/10] wip: implement self.select_distinct_dimensions() --- .../cubesql/src/compile/rewrite/rules/members.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 6766120638ed3..528e0d1231093 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -288,7 +288,7 @@ impl RewriteRules for MemberRules { "CubeScanWrapped:false", "CubeScanUngrouped:false", ), - self.select_distinct_dimensions("?members", "?limit"), + self.select_distinct_dimensions(/*"?members",*/ "?limit"), ), // MOD function to binary expr transforming_rewrite_with_root( @@ -1508,10 +1508,10 @@ impl MemberRules { fn select_distinct_dimensions( &self, - members_var: &'static str, + // members_var: &'static str, limit_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { - let members_var = var!(members_var); + // let members_var = var!(members_var); let limit_var = var!(limit_var); move |egraph, subst| { @@ -1523,7 +1523,13 @@ impl MemberRules { return false; } - for members in var_list_iter!(egraph[subst[members_var]], CubeScanMembers) {} + // for members in var_list_iter!(egraph[subst[members_var]], CubeScanMembers) { + // // TODO: check if all members in request are dimensions + // // If no - return false + // for member in members.iter() { + // println!("member: {:?}", egraph[*member]); + // } + // } true } From 55ad58553d4f2f9d6279cb078ab6c54953dd0d76 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 27 Jan 2025 15:41:37 +0200 Subject: [PATCH 05/10] fix tests --- rust/cubesql/cubesql/src/compile/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 404150f271824..bb45c58c5f568 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -15693,7 +15693,10 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), "KibanaSampleDataEcommerce.customer_gender".to_string(), ]), segments: Some(vec![]), - order: Some(vec![]), + order: Some(vec![vec![ + "KibanaSampleDataEcommerce.customer_gender".to_string(), + "asc".to_string() + ],]), ..Default::default() } ) From 684c814d8f169ab350aca282a89171c1b8e091e4 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 27 Jan 2025 17:02:13 +0200 Subject: [PATCH 06/10] some improvements --- .../src/compile/rewrite/rules/members.rs | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 528e0d1231093..585980ee1e9b9 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -269,8 +269,8 @@ impl RewriteRules for MemberRules { "?members", "?filters", "?orders", - "?limit", - "?offset", + "CubeScanLimit:None", + "CubeScanOffset:None", "?split", "?can_pushdown_join", "CubeScanWrapped:false", @@ -281,14 +281,14 @@ impl RewriteRules for MemberRules { "?members", "?filters", "?orders", - "?limit", - "?offset", + "CubeScanLimit:None", + "CubeScanOffset:None", "?split", "?can_pushdown_join", "CubeScanWrapped:false", "CubeScanUngrouped:false", ), - self.select_distinct_dimensions(/*"?members",*/ "?limit"), + self.select_distinct_dimensions(/*"?members"*/), ), // MOD function to binary expr transforming_rewrite_with_root( @@ -1509,20 +1509,10 @@ impl MemberRules { fn select_distinct_dimensions( &self, // members_var: &'static str, - limit_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { // let members_var = var!(members_var); - let limit_var = var!(limit_var); - - move |egraph, subst| { - let cube_limit = var_iter!(egraph[subst[limit_var]], CubeScanLimit) - .next() - .unwrap(); - - if cube_limit.is_some() { - return false; - } + move |_egraph, _subst| { // for members in var_list_iter!(egraph[subst[members_var]], CubeScanMembers) { // // TODO: check if all members in request are dimensions // // If no - return false From b12c6efcecfab7ac2416b1144a43e4c3bca664ed Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 27 Jan 2025 22:17:43 +0200 Subject: [PATCH 07/10] implement select_distinct_dimensions --- .../src/compile/rewrite/rules/members.rs | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 585980ee1e9b9..263de60e97c63 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -288,7 +288,7 @@ impl RewriteRules for MemberRules { "CubeScanWrapped:false", "CubeScanUngrouped:false", ), - self.select_distinct_dimensions(/*"?members"*/), + self.select_distinct_dimensions("?members"), ), // MOD function to binary expr transforming_rewrite_with_root( @@ -1508,20 +1508,26 @@ impl MemberRules { fn select_distinct_dimensions( &self, - // members_var: &'static str, + members_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { - // let members_var = var!(members_var); - - move |_egraph, _subst| { - // for members in var_list_iter!(egraph[subst[members_var]], CubeScanMembers) { - // // TODO: check if all members in request are dimensions - // // If no - return false - // for member in members.iter() { - // println!("member: {:?}", egraph[*member]); - // } - // } - - true + let members_var = var!(members_var); + let meta_context = self.meta_context.clone(); + + move |egraph, subst| { + egraph + .index(subst[members_var]) + .data + .member_name_to_expr + .as_ref() + .map_or(true, |member_names_to_expr| { + !member_names_to_expr.list.iter().any(|(_, member, _)| { + // we should allow transform only for queries with dimensions only, + // as it doesn't make sense for measures + meta_context + .find_measure_with_name(member.name().unwrap().to_string()) + .is_some() + }) + }) } } From 3316cafbe1ba8c81faa1b51fac0ca18f71a55cd2 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Mon, 27 Jan 2025 22:52:23 +0200 Subject: [PATCH 08/10] adding tests --- rust/cubesql/cubesql/src/compile/mod.rs | 120 ++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 6 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index bb45c58c5f568..6169d824a864f 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -8213,7 +8213,7 @@ ORDER BY "source"."str0" ASC init_testing_logger(); let logical_plan = convert_select_to_query_plan( - "SELECT DISTINCT customer_gender FROM \"KibanaSampleDataEcommerce\"".to_string(), + "SELECT DISTINCT customer_gender FROM KibanaSampleDataEcommerce".to_string(), DatabaseProtocol::PostgreSQL, ) .await @@ -8226,12 +8226,120 @@ ORDER BY "source"."str0" ASC V1LoadRequestQuery { measures: Some(vec![]), dimensions: Some(vec![ - // "KibanaSampleDataEcommerce.order_date".to_string(), - // "KibanaSampleDataEcommerce.last_mod".to_string(), "KibanaSampleDataEcommerce.customer_gender".to_string(), - // "KibanaSampleDataEcommerce.notes".to_string(), - // "KibanaSampleDataEcommerce.taxful_total_price".to_string(), - // "KibanaSampleDataEcommerce.has_subscription".to_string(), + ]), + segments: Some(vec![]), + order: Some(vec![]), + ..Default::default() + } + ); + + let logical_plan = convert_select_to_query_plan( + "SELECT DISTINCT customer_gender FROM KibanaSampleDataEcommerce LIMIT 100".to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + println!("logical_plan: {:?}", logical_plan); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec![]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.customer_gender".to_string(), + ]), + segments: Some(vec![]), + order: Some(vec![]), + limit: Some(100), + ..Default::default() + } + ); + + let logical_plan = convert_select_to_query_plan( + "SELECT DISTINCT * FROM (SELECT customer_gender FROM KibanaSampleDataEcommerce LIMIT 100) q_0".to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + println!("logical_plan: {:?}", logical_plan); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec![]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.customer_gender".to_string(), + ]), + segments: Some(vec![]), + order: Some(vec![]), + limit: Some(100), + ungrouped: Some(true), + ..Default::default() + } + ); + + let logical_plan = convert_select_to_query_plan( + "SELECT DISTINCT customer_gender, order_date FROM KibanaSampleDataEcommerce" + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + println!("logical_plan: {:?}", logical_plan); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec![]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.customer_gender".to_string(), + "KibanaSampleDataEcommerce.order_date".to_string(), + ]), + segments: Some(vec![]), + order: Some(vec![]), + ..Default::default() + } + ); + + let logical_plan = convert_select_to_query_plan( + "SELECT DISTINCT MAX(maxPrice) FROM KibanaSampleDataEcommerce".to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + println!("logical_plan: {:?}", logical_plan); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.maxPrice".to_string(),]), + dimensions: Some(vec![]), + segments: Some(vec![]), + order: Some(vec![]), + ..Default::default() + } + ); + + let logical_plan = convert_select_to_query_plan( + "SELECT DISTINCT * FROM (SELECT customer_gender, MAX(maxPrice) FROM KibanaSampleDataEcommerce GROUP BY 1) q_0".to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + println!("logical_plan: {:?}", logical_plan); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.maxPrice".to_string(),]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.customer_gender".to_string(), ]), segments: Some(vec![]), order: Some(vec![]), From 3123a9da0822cb19bcbb70d437851f4a11076106 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 28 Jan 2025 00:07:19 +0200 Subject: [PATCH 09/10] =?UTF-8?q?refactor=20a=C2=A0bit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 263de60e97c63..444f5c6cfbf0d 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -1524,7 +1524,12 @@ impl MemberRules { // we should allow transform only for queries with dimensions only, // as it doesn't make sense for measures meta_context - .find_measure_with_name(member.name().unwrap().to_string()) + .find_measure_with_name( + member + .name() + .expect("Measure should have a name") + .to_string(), + ) .is_some() }) }) From 84fdb24b6a91b3e9694d54f73aa6100b94669fa1 Mon Sep 17 00:00:00 2001 From: Konstantin Burkalev Date: Tue, 28 Jan 2025 12:12:17 +0200 Subject: [PATCH 10/10] improve select_distinct_dimensions() --- .../src/compile/rewrite/rules/members.rs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 444f5c6cfbf0d..187d4ecf60db0 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -1520,17 +1520,17 @@ impl MemberRules { .member_name_to_expr .as_ref() .map_or(true, |member_names_to_expr| { - !member_names_to_expr.list.iter().any(|(_, member, _)| { - // we should allow transform only for queries with dimensions only, + !member_names_to_expr.list.iter().all(|(_, member, _)| { + // we should allow transform for queries with dimensions only, // as it doesn't make sense for measures - meta_context - .find_measure_with_name( - member - .name() - .expect("Measure should have a name") - .to_string(), - ) - .is_some() + if let Some(name) = member.name() { + meta_context + .find_dimension_with_name(name.to_string()) + .is_some() + || meta_context.is_synthetic_field(name.to_string()) + } else { + true + } }) }) }