From b1e34353d341d74288c17bcd1cca8a6c55957524 Mon Sep 17 00:00:00 2001 From: Michael Simons Date: Mon, 20 Jan 2025 11:54:41 +0100 Subject: [PATCH] refactor(translator): Optimize `LIKE` queries to not always use `=~`. When possible, use `CONTAINS`, `STARTS WITH` or `ENDS WITH`. Signed-off-by: Michael Simons --- .../jdbc/translator/impl/SqlToCypher.java | 46 +++++++++++++++---- .../translator/impl/SqlToCypherTests.java | 22 +++++++++ .../impl/src/test/resources/predicates.adoc | 2 +- 3 files changed, 61 insertions(+), 9 deletions(-) diff --git a/neo4j-jdbc-translator/impl/src/main/java/org/neo4j/jdbc/translator/impl/SqlToCypher.java b/neo4j-jdbc-translator/impl/src/main/java/org/neo4j/jdbc/translator/impl/SqlToCypher.java index 12dd7a8a..d7b28160 100644 --- a/neo4j-jdbc-translator/impl/src/main/java/org/neo4j/jdbc/translator/impl/SqlToCypher.java +++ b/neo4j-jdbc-translator/impl/src/main/java/org/neo4j/jdbc/translator/impl/SqlToCypher.java @@ -33,6 +33,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiFunction; import java.util.function.Function; +import java.util.function.LongSupplier; import java.util.function.Predicate; import java.util.function.Supplier; import java.util.logging.Level; @@ -104,6 +105,7 @@ final class SqlToCypher implements Translator { static final Pattern ELEMENT_ID_PATTERN = Pattern.compile("(?i)v\\$(?:(?.+?)_)?id"); static final String ELEMENT_ID_FUNCTION_NAME = "elementId"; static final String ELEMENT_ID_ALIAS = "v$id"; + static final Pattern PERCENT_OR_UNDERSCORE = Pattern.compile("[%_]"); static { Logger.getLogger("org.jooq.Constants").setLevel(Level.WARNING); @@ -1186,14 +1188,7 @@ else if (c instanceof QOM.RowIsNotNull e) { .orElseThrow(); } else if (c instanceof QOM.Like like) { - Expression rhs; - if (like.$arg2() instanceof Param p && p.$inline() && p.getValue() instanceof String s) { - rhs = Cypher.literalOf(s.replace("%", ".*").replace("_", ".")); - } - else { - rhs = expression(like.$arg2()); - } - return expression(like.$arg1()).matches(rhs); + return like(like); } else if (c instanceof QOM.FieldCondition fc && fc.$field() instanceof Param param) { return (Boolean.TRUE.equals(param.getValue()) ? Cypher.literalTrue() : Cypher.literalFalse()) @@ -1212,6 +1207,41 @@ else if (c instanceof QOM.InList il) { } } + private Condition like(QOM.Like like) { + Expression rhs; + Expression lhs = expression(like.$arg1()); + if (like.$arg2() instanceof Param p && p.$inline() && p.getValue() instanceof String s) { + var sw = s.startsWith("%"); + var ew = s.endsWith("%"); + var length = s.length(); + var cnt = new LongSupplier() { + Long value = null; + + @Override + public long getAsLong() { + if (this.value == null) { + this.value = PERCENT_OR_UNDERSCORE.matcher(s).results().count(); + } + return this.value; + } + }; + if (sw && ew && length > 2 && cnt.getAsLong() == 2) { + return lhs.contains(Cypher.literalOf(s.substring(1, length - 1))); + } + else if (sw && length > 1 && cnt.getAsLong() == 1) { + return lhs.endsWith(Cypher.literalOf(s.substring(1))); + } + else if (ew && length > 1 && cnt.getAsLong() == 1) { + return lhs.startsWith(Cypher.literalOf(s.substring(0, length - 1))); + } + rhs = Cypher.literalOf(s.replaceAll("%+", ".*").replace("_", ".")); + } + else { + rhs = expression(like.$arg2()); + } + return lhs.matches(rhs); + } + private Condition rowCondition(Row r1, Row r2, BiFunction comp, BiFunction last) { diff --git a/neo4j-jdbc-translator/impl/src/test/java/org/neo4j/jdbc/translator/impl/SqlToCypherTests.java b/neo4j-jdbc-translator/impl/src/test/java/org/neo4j/jdbc/translator/impl/SqlToCypherTests.java index 58bed155..e0f673a3 100644 --- a/neo4j-jdbc-translator/impl/src/test/java/org/neo4j/jdbc/translator/impl/SqlToCypherTests.java +++ b/neo4j-jdbc-translator/impl/src/test/java/org/neo4j/jdbc/translator/impl/SqlToCypherTests.java @@ -407,6 +407,28 @@ void escapingShouldWork(Boolean prettyPrint, Boolean alwaysEscapeNames, String e assertThat(cypher).isEqualTo(expected.replace("$", cfg.isPrettyPrint() ? System.lineSeparator() : " ")); } + @ParameterizedTest + @CsvSource( + textBlock = """ + SELECT * FROM blub b WHERE name like '%Test%', MATCH (b:blub) WHERE b.name CONTAINS 'Test' RETURN * + SELECT * FROM blub b WHERE name like '%Test', MATCH (b:blub) WHERE b.name ENDS WITH 'Test' RETURN * + SELECT * FROM blub b WHERE name like 'Test%', MATCH (b:blub) WHERE b.name STARTS WITH 'Test' RETURN * + SELECT * FROM blub b WHERE name like 'This is _ %Test%', MATCH (b:blub) WHERE b.name =~ 'This is . .*Test.*' RETURN * + SELECT * FROM blub b WHERE name like '%', MATCH (b:blub) WHERE b.name =~ '.*' RETURN * + SELECT * FROM blub b WHERE name like '%%', MATCH (b:blub) WHERE b.name =~ '.*' RETURN * + SELECT * FROM blub b WHERE name like '%%%', MATCH (b:blub) WHERE b.name =~ '.*' RETURN * + SELECT * FROM blub b WHERE name like '_', MATCH (b:blub) WHERE b.name =~ '.' RETURN * + SELECT * FROM blub b WHERE name like '__', MATCH (b:blub) WHERE b.name =~ '..' RETURN * + SELECT * FROM blub b WHERE name like '___', MATCH (b:blub) WHERE b.name =~ '...' RETURN * + SELECT * FROM blub b WHERE name like '%_%', MATCH (b:blub) WHERE b.name =~ '.*..*' RETURN * + SELECT * FROM blub b WHERE name like '%ein%schöner%Name%', MATCH (b:blub) WHERE b.name =~ '.*ein.*schöner.*Name.*' RETURN * + """) + void likeShouldBeHandledNicely(String sql, String expected) { + + var translator = SqlToCypher.defaultTranslator(); + assertThat(translator.translate(sql)).isEqualTo(expected); + } + private static class TestDataExtractor extends Treeprocessor { private final List testData = new ArrayList<>(); diff --git a/neo4j-jdbc-translator/impl/src/test/resources/predicates.adoc b/neo4j-jdbc-translator/impl/src/test/resources/predicates.adoc index 699c1dbe..a8232ce0 100644 --- a/neo4j-jdbc-translator/impl/src/test/resources/predicates.adoc +++ b/neo4j-jdbc-translator/impl/src/test/resources/predicates.adoc @@ -186,6 +186,6 @@ will be translated into a regular expressions, replacing the `%` with `.*`: [source,cypher,id=p5_0_expected] ---- -MATCH (m:`movies`) WHERE m.title =~ '.*Matrix.*' OR m.title =~ 'M.trix' +MATCH (m:`movies`) WHERE m.title CONTAINS 'Matrix' OR m.title =~ 'M.trix' RETURN * ----