Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cubesql): Fix SELECT DISTINCT on pushdown #9144

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
149 changes: 147 additions & 2 deletions rust/cubesql/cubesql/src/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8205,6 +8205,149 @@ ORDER BY "source"."str0" ASC
)
}

#[tokio::test]
async fn test_select_distinct_dimensions() {
if !Rewriter::sql_push_down_enabled() {
return;
}
init_testing_logger();

let logical_plan = convert_select_to_query_plan(
"SELECT DISTINCT customer_gender FROM KibanaSampleDataEcommerce".to_string(),
DatabaseProtocol::PostgreSQL,
)
.await
.as_logical_plan();

println!("logical_plan: {:?}", logical_plan);

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec![]),
dimensions: Some(vec![
"KibanaSampleDataEcommerce.customer_gender".to_string(),
]),
segments: Some(vec![]),
order: Some(vec![]),
..Default::default()
}
);

let logical_plan = convert_select_to_query_plan(
"SELECT DISTINCT customer_gender FROM KibanaSampleDataEcommerce LIMIT 100".to_string(),
DatabaseProtocol::PostgreSQL,
)
.await
.as_logical_plan();

println!("logical_plan: {:?}", logical_plan);

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec![]),
dimensions: Some(vec![
"KibanaSampleDataEcommerce.customer_gender".to_string(),
]),
segments: Some(vec![]),
order: Some(vec![]),
limit: Some(100),
..Default::default()
}
);

let logical_plan = convert_select_to_query_plan(
"SELECT DISTINCT * FROM (SELECT customer_gender FROM KibanaSampleDataEcommerce LIMIT 100) q_0".to_string(),
DatabaseProtocol::PostgreSQL,
)
.await
.as_logical_plan();

println!("logical_plan: {:?}", logical_plan);

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec![]),
dimensions: Some(vec![
"KibanaSampleDataEcommerce.customer_gender".to_string(),
]),
segments: Some(vec![]),
order: Some(vec![]),
limit: Some(100),
ungrouped: Some(true),
..Default::default()
}
);

let logical_plan = convert_select_to_query_plan(
"SELECT DISTINCT customer_gender, order_date FROM KibanaSampleDataEcommerce"
.to_string(),
DatabaseProtocol::PostgreSQL,
)
.await
.as_logical_plan();

println!("logical_plan: {:?}", logical_plan);

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec![]),
dimensions: Some(vec![
"KibanaSampleDataEcommerce.customer_gender".to_string(),
"KibanaSampleDataEcommerce.order_date".to_string(),
]),
segments: Some(vec![]),
order: Some(vec![]),
..Default::default()
}
);

let logical_plan = convert_select_to_query_plan(
"SELECT DISTINCT MAX(maxPrice) FROM KibanaSampleDataEcommerce".to_string(),
DatabaseProtocol::PostgreSQL,
)
.await
.as_logical_plan();

println!("logical_plan: {:?}", logical_plan);

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec!["KibanaSampleDataEcommerce.maxPrice".to_string(),]),
dimensions: Some(vec![]),
segments: Some(vec![]),
order: Some(vec![]),
..Default::default()
}
);

let logical_plan = convert_select_to_query_plan(
"SELECT DISTINCT * FROM (SELECT customer_gender, MAX(maxPrice) FROM KibanaSampleDataEcommerce GROUP BY 1) q_0".to_string(),
DatabaseProtocol::PostgreSQL,
)
.await
.as_logical_plan();

println!("logical_plan: {:?}", logical_plan);

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec!["KibanaSampleDataEcommerce.maxPrice".to_string(),]),
dimensions: Some(vec![
"KibanaSampleDataEcommerce.customer_gender".to_string(),
]),
segments: Some(vec![]),
order: Some(vec![]),
..Default::default()
}
)
}

#[tokio::test]
async fn test_sort_relations() -> Result<(), CubeError> {
init_testing_logger();
Expand Down Expand Up @@ -15658,8 +15801,10 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
"KibanaSampleDataEcommerce.customer_gender".to_string(),
]),
segments: Some(vec![]),
order: Some(vec![]),
ungrouped: Some(true),
order: Some(vec![vec![
"KibanaSampleDataEcommerce.customer_gender".to_string(),
"asc".to_string()
],]),
..Default::default()
}
)
Expand Down
62 changes: 60 additions & 2 deletions rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
analysis::{ConstantFolding, LogicalPlanData, MemberNamesToExpr, OriginalExpr},
binary_expr, cast_expr, change_user_expr, column_expr, cross_join, cube_scan,
cube_scan_filters_empty_tail, cube_scan_members, cube_scan_members_empty_tail,
cube_scan_order_empty_tail, dimension_expr, expr_column_name, fun_expr, join, like_expr,
limit, list_concat_pushdown_replacer, list_concat_pushup_replacer, literal_expr,
cube_scan_order_empty_tail, dimension_expr, distinct, expr_column_name, fun_expr, join,
like_expr, limit, list_concat_pushdown_replacer, list_concat_pushup_replacer, literal_expr,
literal_member, measure_expr, member_pushdown_replacer, member_replacer,
merged_members_replacer, original_expr_name, projection, referenced_columns, rewrite,
rewriter::{CubeEGraph, CubeRewrite, RewriteRules},
Expand Down Expand Up @@ -262,6 +262,34 @@
),
self.push_down_limit("?skip", "?fetch", "?new_skip", "?new_fetch"),
),
transforming_rewrite(
"select-distinct-dimensions",
distinct(cube_scan(
"?alias_to_cube",
"?members",
"?filters",
"?orders",
"CubeScanLimit:None",
"CubeScanOffset:None",
"?split",
"?can_pushdown_join",
"CubeScanWrapped:false",
"?old_ungrouped",
)),
cube_scan(
"?alias_to_cube",
"?members",
"?filters",
"?orders",
"CubeScanLimit:None",
"CubeScanOffset:None",
"?split",
"?can_pushdown_join",
"CubeScanWrapped:false",
"CubeScanUngrouped:false",
),
self.select_distinct_dimensions("?members"),
),
// MOD function to binary expr
transforming_rewrite_with_root(
"mod-fun-to-binary-expr",
Expand Down Expand Up @@ -1478,6 +1506,36 @@
)
}

fn select_distinct_dimensions(
&self,
members_var: &'static str,
) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool {
let members_var = var!(members_var);
let meta_context = self.meta_context.clone();

move |egraph, subst| {
egraph
.index(subst[members_var])
.data
.member_name_to_expr
.as_ref()
.map_or(true, |member_names_to_expr| {
!member_names_to_expr.list.iter().all(|(_, member, _)| {
// we should allow transform for queries with dimensions only,
// as it doesn't make sense for measures
if let Some(name) = member.name() {
meta_context
.find_dimension_with_name(name.to_string())
.is_some()
|| meta_context.is_synthetic_field(name.to_string())

Check warning on line 1530 in rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs

View check run for this annotation

Codecov / codecov/patch

rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs#L1530

Added line #L1530 was not covered by tests
} else {
true

Check warning on line 1532 in rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs

View check run for this annotation

Codecov / codecov/patch

rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs#L1532

Added line #L1532 was not covered by tests
}
})
})
}
}

fn push_down_non_empty_aggregate(
&self,
alias_to_cube_var: &'static str,
Expand Down
Loading