From 6dc899cb5c8aed86f70f4fee848eb681cd11c022 Mon Sep 17 00:00:00 2001 From: Jesse Jia Date: Tue, 19 Dec 2023 12:47:23 -0800 Subject: [PATCH] fix: heavy query on information_schemas cause high CPU load (#316) * fix: heavy query on information_schemas cause high CPU load * revert change * Address comment * resolve conflict --------- Co-authored-by: Jesse Jia --- .../metadata/dao/EbeanLocalAccess.java | 31 ++++++++++++++++--- .../metadata/dao/utils/SQLStatementUtils.java | 12 +++---- .../metadata/dao/EbeanLocalAccessTest.java | 10 +++++- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/EbeanLocalAccess.java b/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/EbeanLocalAccess.java index 265d1bbec..2bcef3f03 100644 --- a/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/EbeanLocalAccess.java +++ b/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/EbeanLocalAccess.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -72,6 +73,10 @@ public class EbeanLocalAccess implements IEbeanLocalAccess private static final String ASPECT_JSON_PLACEHOLDER = "__PLACEHOLDER__"; private static final String DEFAULT_ACTOR = "urn:li:principal:UNKNOWN"; + // key: table_name, + // value: Set(column1, column2, column3 ...) + private final Map> tableColumns = new ConcurrentHashMap<>(); + public EbeanLocalAccess(EbeanServer server, ServerConfig serverConfig, @Nonnull Class urnClass, UrnPathExtractor urnPathExtractor) { _server = server; _urnClass = urnClass; @@ -200,7 +205,9 @@ public List batchGetUnion( for (int index = position; index < end; index++) { final Urn entityUrn = aspectKeys.get(index).getUrn(); final Class aspectClass = (Class) aspectKeys.get(index).getAspectClass(); - keysToQueryMap.computeIfAbsent(aspectClass, unused -> new HashSet<>()).add(entityUrn); + if (checkColumnExists(getTableName(entityUrn), getAspectColumnName(aspectClass))) { + keysToQueryMap.computeIfAbsent(aspectClass, unused -> new HashSet<>()).add(entityUrn); + } } // each statement is for a single aspect class @@ -331,11 +338,10 @@ public ListResult list(@Nonnull Class countAggregate(@Nullable IndexFilter indexFilter, @Nonnull IndexGroupByCriterion indexGroupByCriterion) { final String tableName = SQLSchemaUtils.getTableName(_entityType); + final String groupByColumn = getGeneratedColumnName(indexGroupByCriterion.getAspect(), indexGroupByCriterion.getPath()); // first, check for existence of the column we want to GROUP BY - final String groupByColumnExistsSql = SQLStatementUtils.createGroupByColumnExistsSql(tableName, indexGroupByCriterion); - final SqlRow groupByColumnExistsResults = _server.createSqlQuery(groupByColumnExistsSql).findOne(); - if (groupByColumnExistsResults == null) { + if (!checkColumnExists(tableName, groupByColumn)) { // if we are trying to GROUP BY the results on a column that does not exist, just return an empty map return Collections.emptyMap(); } @@ -534,6 +540,23 @@ private SchemaEvolutionManager createSchemaEvolutionManager(@Nonnull ServerConfi return new FlywaySchemaEvolutionManager(config); } + /** + * Check column exists in table. + */ + public boolean checkColumnExists(@Nonnull String tableName, @Nonnull String columnName) { + // Fetch table columns on very first read and cache it in tableColumns + if (!tableColumns.containsKey(tableName)) { + final List rows = _server.createSqlQuery(SQLStatementUtils.getAllColumnForTable(tableName)).findList(); + Set columns = new HashSet<>(); + for (SqlRow row : rows) { + columns.add(row.getString("COLUMN_NAME")); + } + tableColumns.put(tableName, columns); + } + + return tableColumns.get(tableName).contains(columnName); + } + /** * SQL implementation of find the latest {@link EbeanMetadataAspect}. * @param connection {@link Connection} get from the current transaction, it should not be closed manually diff --git a/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLStatementUtils.java b/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLStatementUtils.java index d426ffd1a..f5036f45e 100644 --- a/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLStatementUtils.java +++ b/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/utils/SQLStatementUtils.java @@ -80,9 +80,8 @@ public class SQLStatementUtils { private static final String INDEX_GROUP_BY_CRITERION = "SELECT count(*) as COUNT, %s FROM %s"; - private static final String SQL_GROUP_BY_COLUMN_EXISTS_TEMPLATE = - "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = database() AND TABLE_NAME = '%s' AND COLUMN_NAME = '%s'"; - + private static final String SQL_GET_ALL_COLUMNS = + "SELECT COLUMN_NAME FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = database() AND TABLE_NAME = '%s'"; private static final String SQL_URN_EXIST_TEMPLATE = "SELECT urn FROM %s WHERE urn = '%s'"; private static final String INSERT_LOCAL_RELATIONSHIP = "INSERT INTO %s (metadata, source, destination, source_type, " @@ -200,8 +199,6 @@ public static String createListAspectWithPaginat } } - - /** * Create Upsert SQL statement. * @param urn entity urn @@ -274,9 +271,8 @@ public static String createGroupBySql(String tableName, @Nullable IndexFilter in return sb.toString(); } - public static String createGroupByColumnExistsSql(String tableName, @Nonnull IndexGroupByCriterion indexGroupByCriterion) { - return String.format(SQL_GROUP_BY_COLUMN_EXISTS_TEMPLATE, tableName, getGeneratedColumnName(indexGroupByCriterion.getAspect(), - indexGroupByCriterion.getPath())); + public static String getAllColumnForTable(String tableName) { + return String.format(SQL_GET_ALL_COLUMNS, tableName); } /** diff --git a/dao-impl/ebean-dao/src/test/java/com/linkedin/metadata/dao/EbeanLocalAccessTest.java b/dao-impl/ebean-dao/src/test/java/com/linkedin/metadata/dao/EbeanLocalAccessTest.java index cffade17d..24a1ae137 100644 --- a/dao-impl/ebean-dao/src/test/java/com/linkedin/metadata/dao/EbeanLocalAccessTest.java +++ b/dao-impl/ebean-dao/src/test/java/com/linkedin/metadata/dao/EbeanLocalAccessTest.java @@ -52,7 +52,7 @@ public class EbeanLocalAccessTest { private static EbeanServer _server; - private static IEbeanLocalAccess _ebeanLocalAccessFoo; + private static EbeanLocalAccess _ebeanLocalAccessFoo; private static IEbeanLocalAccess _ebeanLocalAccessBar; private static IEbeanLocalAccess _ebeanLocalAccessBurger; private static long _now; @@ -469,4 +469,12 @@ public void testGetAspectNoSoftDeleteCheck() { assertFalse(ebeanMetadataAspectList.isEmpty()); assertEquals(fooUrn.toString(), ebeanMetadataAspectList.get(0).getKey().getUrn()); } + + @Test + public void testCheckColumnExists() { + assertTrue(_ebeanLocalAccessFoo.checkColumnExists("metadata_entity_foo", "a_aspectfoo")); + assertTrue(_ebeanLocalAccessFoo.checkColumnExists("metadata_entity_foo", "i_aspectfoo$value")); + assertFalse(_ebeanLocalAccessFoo.checkColumnExists("metadata_entity_foo", "a_aspect_not_exist")); + assertFalse(_ebeanLocalAccessFoo.checkColumnExists("metadata_entity_notexist", "a_aspectfoo")); + } } \ No newline at end of file