From f118ab892b2a47b4e20672b9cd9b0915bcc6a8d7 Mon Sep 17 00:00:00 2001 From: Brian Harrington Date: Wed, 4 Dec 2024 08:19:31 -0600 Subject: [PATCH] druid: require name clause on queries Without a name clause, it can result in many broad druid queries that are quite expensive. These vague queries can be common as intermediates when building a query, but are not typically useful. --- .../src/main/resources/application.conf | 1 - .../atlas/druid/DruidDatabaseActor.scala | 46 ++++++++++++----- .../atlas/druid/DruidDatabaseActorSuite.scala | 51 +++++++++++++++++++ 3 files changed, 84 insertions(+), 14 deletions(-) diff --git a/atlas-druid/src/main/resources/application.conf b/atlas-druid/src/main/resources/application.conf index f0dd72c4..adf7bb8c 100644 --- a/atlas-druid/src/main/resources/application.conf +++ b/atlas-druid/src/main/resources/application.conf @@ -4,7 +4,6 @@ atlas { // URI for the druid service druid { //uri = "http://localhost:7103/druid/v2" - uri = "http://bdp_druid-metricsext-router.cluster.us-east-1.prod.cloud.netflix.net:7103/druid/v2" // Interval used when fetching metadata about data sources that are available metadata-interval = 21d diff --git a/atlas-druid/src/main/scala/com/netflix/atlas/druid/DruidDatabaseActor.scala b/atlas-druid/src/main/scala/com/netflix/atlas/druid/DruidDatabaseActor.scala index c997b4ac..98a7249e 100644 --- a/atlas-druid/src/main/scala/com/netflix/atlas/druid/DruidDatabaseActor.scala +++ b/atlas-druid/src/main/scala/com/netflix/atlas/druid/DruidDatabaseActor.scala @@ -297,20 +297,25 @@ class DruidDatabaseActor(config: Config, service: DruidMetadataService) } private def fetchData(ref: ActorRef, request: DataRequest): Unit = { - val druidQueryContext = toDruidQueryContext(request) - val druidQueries = request.exprs.map { expr => - fetchData(druidQueryContext, request.context, expr).map(ts => expr -> ts) - } - Source(druidQueries) - .flatMapMerge(Int.MaxValue, v => v) - .fold(Map.empty[DataExpr, List[TimeSeries]]) { (acc, t) => - acc + t - } - .runWith(Sink.head) - .onComplete { - case Success(data) => ref ! DataResponse(data) - case Failure(t) => ref ! Failure(t) + try { + request.exprs.foreach(expr => validate(expr.query)) + val druidQueryContext = toDruidQueryContext(request) + val druidQueries = request.exprs.map { expr => + fetchData(druidQueryContext, request.context, expr).map(ts => expr -> ts) } + Source(druidQueries) + .flatMapMerge(Int.MaxValue, v => v) + .fold(Map.empty[DataExpr, List[TimeSeries]]) { (acc, t) => + acc + t + } + .runWith(Sink.head) + .onComplete { + case Success(data) => ref ! DataResponse(data) + case Failure(t) => ref ! Failure(t) + } + } catch { + case e: Exception => ref ! Failure(e) + } } private def fetchData( @@ -709,4 +714,19 @@ object DruidDatabaseActor { } Query.simplify(simpleQuery) } + + private[druid] def validate(query: Query): Unit = { + Query.dnfList(query).foreach { q => + // Without a name clause, it can result in many broad druid queries that are quite + // expensive. These vague queries can be common as intermediates when building a query, + // but are not typically useful. + require(hasNameClause(q), s"query does not restrict by name: $q") + } + } + + private def hasNameClause(query: Query): Boolean = query match { + case Query.And(q1, q2) => hasNameClause(q1) || hasNameClause(q2) + case q: Query.KeyQuery => q.k == "name" + case _ => false + } } diff --git a/atlas-druid/src/test/scala/com/netflix/atlas/druid/DruidDatabaseActorSuite.scala b/atlas-druid/src/test/scala/com/netflix/atlas/druid/DruidDatabaseActorSuite.scala index ad2748f7..64d9f5b5 100644 --- a/atlas-druid/src/test/scala/com/netflix/atlas/druid/DruidDatabaseActorSuite.scala +++ b/atlas-druid/src/test/scala/com/netflix/atlas/druid/DruidDatabaseActorSuite.scala @@ -396,4 +396,55 @@ class DruidDatabaseActorSuite extends FunSuite { val mapper = createValueMapper(false, context.copy(step = 300000), expr) assertEquals(mapper(1.0), 1.0) } + + test("validate: simple expr with name") { + val query = evalQuery("app,www,:eq,name,cpu,:eq,:and") + validate(query) + } + + test("validate: simple expr without name") { + val query = evalQuery("app,www,:eq,not_name,cpu,:eq,:and") + intercept[IllegalArgumentException] { + validate(query) + } + } + + test("validate: OR with name") { + val query = evalQuery("app,www,:eq,name,cpu,:eq,:and,name,disk,:eq,:or") + validate(query) + } + + test("validate: OR one side without name") { + val query = evalQuery("app,www,:eq,not_name,cpu,:eq,:and,name,disk,:eq,:or") + intercept[IllegalArgumentException] { + validate(query) + } + } + + test("validate: name only in NOT") { + val query = evalQuery("app,www,:eq,name,cpu,:eq,:not,:and") + intercept[IllegalArgumentException] { + validate(query) + } + } + + test("validate: IN name") { + val query = evalQuery("app,www,:eq,name,(,cpu,disk,),:in,:and") + validate(query) + } + + test("validate: haskey name") { + val query = evalQuery("app,www,:eq,name,:has,:and") + validate(query) + } + + test("validate: regex name") { + val query = evalQuery("app,www,:eq,name,cpu,:re,:and") + validate(query) + } + + test("validate: greater than name") { + val query = evalQuery("app,www,:eq,name,cpu,:gt,:and") + validate(query) + } }