Skip to content

Commit

Permalink
Do not analyze deprecated resources (#3952)
Browse files Browse the repository at this point in the history
Co-authored-by: Simon Dumas <[email protected]>
  • Loading branch information
imsdu and Simon Dumas authored Jun 13, 2023
1 parent d8a6176 commit cbabe96
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
"_rev": {
"type": "long"
},
"_deprecated": {
"type": "boolean"
},
"_createdAt": {
"type": "date"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import io.circe.{Encoder, Json}
import ch.epfl.bluebrain.nexus.delta.sdk.syntax._
import ch.epfl.bluebrain.nexus.delta.sourcing.model.Identity.Subject
import ch.epfl.bluebrain.nexus.delta.sourcing.model.ProjectRef
import io.circe.syntax.EncoderOps
import io.circe.syntax.{EncoderOps, KeyOps}

import java.time.Instant

Expand Down Expand Up @@ -38,38 +38,63 @@ object GraphAnalyticsResult {
* The value is indexed to Elasticsearch and an update by query action is required so that the type of this resource
* is propagated to the resources pointing to it.
*
* The other fields are metadata and match the same definition as in [[ResourceState]].
*
* @see
* [[ResouceState]]
* The other fields are resource metadata.
*/
final case class Index(
final case class Index private (
project: ProjectRef,
id: Iri,
rev: Int,
deprecated: Boolean,
types: Set[Iri],
createdAt: Instant,
createdBy: Subject,
updatedAt: Instant,
updatedBy: Subject,
value: JsonLdDocument
value: Option[JsonLdDocument]
) extends GraphAnalyticsResult

object Index {
implicit val encoder: Encoder[Index] = Encoder.instance { g =>

def active(
project: ProjectRef,
id: Iri,
rev: Int,
types: Set[Iri],
createdAt: Instant,
createdBy: Subject,
updatedAt: Instant,
updatedBy: Subject,
value: JsonLdDocument
) =
new Index(project, id, rev, false, types, createdAt, createdBy, updatedAt, updatedBy, Some(value))

def deprecated(
project: ProjectRef,
id: Iri,
rev: Int,
types: Set[Iri],
createdAt: Instant,
createdBy: Subject,
updatedAt: Instant,
updatedBy: Subject
) =
new Index(project, id, rev, true, types, createdAt, createdBy, updatedAt, updatedBy, None)

implicit val encoder: Encoder[Index] = Encoder.instance { i =>
import ch.epfl.bluebrain.nexus.delta.sourcing.model.Identity.Database._
Json
.obj(
keywords.id -> g.id.asJson,
"_project" -> g.project.asJson,
"_rev" -> g.rev.asJson,
"_createdAt" -> g.createdAt.asJson,
"_createdBy" -> g.createdBy.asJson,
"_updatedAt" -> g.updatedAt.asJson,
"_updatedBy" -> g.updatedBy.asJson
keywords.id := i.id,
"_project" := i.project,
"_rev" := i.rev,
"_deprecated" := i.deprecated,
"_createdAt" := i.createdAt,
"_createdBy" := i.createdBy,
"_updatedAt" := i.updatedAt,
"_updatedBy" := i.updatedBy
)
.addIfNonEmpty(keywords.tpe, g.types)
.deepMerge(g.value.asJson)
.addIfNonEmpty(keywords.tpe, i.types)
.deepMerge(i.value.map(_.asJson).getOrElse(Json.obj()))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,15 @@ object GraphAnalyticsStream {
UpdateByQuery(s.id, s.types)
}
case Resources.entityType =>
Task.fromEither(ResourceState.serializer.codec.decodeJson(json)).flatMap { s =>
JsonLdDocument.fromExpanded(s.expanded, findRelationships(project, xas, relationshipBatch)).map { d =>
Index(s.project, s.id, s.rev, s.types, s.createdAt, s.createdBy, s.updatedAt, s.updatedBy, d)
}
Task.fromEither(ResourceState.serializer.codec.decodeJson(json)).flatMap {
case s if s.deprecated =>
Task.pure(
Index.deprecated(s.project, s.id, s.rev, s.types, s.createdAt, s.createdBy, s.updatedAt, s.updatedBy)
)
case s =>
JsonLdDocument.fromExpanded(s.expanded, findRelationships(project, xas, relationshipBatch)).map { d =>
Index.active(s.project, s.id, s.rev, s.types, s.createdAt, s.createdBy, s.updatedAt, s.updatedBy, d)
}
}
case _ => Task.pure(Noop)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
],
"_project": "myorg/myproject",
"_rev": 1,
"_deprecated": false,
"_createdAt": "1970-01-01T00:00:00Z",
"_createdBy": {
"@type": "Anonymous"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
],
"_project": "myorg/myproject",
"_rev": 1,
"_deprecated": false,
"_createdAt": "1970-01-01T00:00:00Z",
"_createdBy": {
"@type": "Anonymous"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
],
"_project": "myorg/myproject",
"_rev": 1,
"_deprecated": false,
"_createdAt": "1970-01-01T00:00:00Z",
"_createdBy": {
"@type": "Anonymous"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"@type": [
"http://schema.org/Dataset",
"https://neuroshapes.org/NeuroMorphology"
],
"@id": "http://localhost/deprecated",
"_project": "myorg/myproject",
"_rev": 1,
"_deprecated": true,
"_createdAt": "1970-01-01T00:00:00Z",
"_createdBy": {
"@type": "Anonymous"
},
"_updatedAt": "1970-01-01T00:00:00Z",
"_updatedBy": {
"@type": "Anonymous"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ch.epfl.bluebrain.nexus.delta.plugins.graph.analytics.indexing

import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.ElasticSearchClientSetup
import ch.epfl.bluebrain.nexus.delta.plugins.elasticsearch.client.IndexLabel
import ch.epfl.bluebrain.nexus.delta.plugins.graph.analytics.indexing.GraphAnalyticsResult.Index
import ch.epfl.bluebrain.nexus.delta.plugins.graph.analytics.model.JsonLdDocument
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.nxvFile
import ch.epfl.bluebrain.nexus.delta.rdf.IriOrBNode.Iri
Expand Down Expand Up @@ -36,7 +37,7 @@ class GraphAnalyticsSinkSuite

private val index = IndexLabel.unsafe("test_analytics")

private lazy val sink = new GraphAnalyticsSink(client, 2, 100.millis, index)
private lazy val sink = new GraphAnalyticsSink(client, 5, 100.millis, index)

private val project = ProjectRef.unsafe("myorg", "myproject")

Expand All @@ -50,6 +51,11 @@ class GraphAnalyticsSinkSuite
private val resource2 = iri"http://localhost/resource2"
private val expanded2 = loadExpanded("expanded/resource2.json")

// Deprecated resource
private val deprecatedResource = iri"http://localhost/deprecated"
private val deprecatedResourceTypes =
Set(iri"http://schema.org/Dataset", iri"https://neuroshapes.org/NeuroMorphology")

// Resource linked by 'resource1', resolved while indexing
private val resource3 = iri"http://localhost/resource3"
// File linked by 'resource1', resolved after an update by query
Expand Down Expand Up @@ -90,33 +96,39 @@ class GraphAnalyticsSinkSuite
SuccessElem(Resources.entityType, id, Some(project), Instant.EPOCH, Offset.start, result, 1)

test("Push index results") {
def toIndex(id: Iri, io: UIO[ExpandedJsonLd]) = {
def indexActive(id: Iri, io: UIO[ExpandedJsonLd]) = {
for {
expanded <- io
types <- getTypes(expanded)
doc <- JsonLdDocument.fromExpanded(expanded, _ => findRelationships)
} yield {
val result =
GraphAnalyticsResult.Index(project, id, 1, types, Instant.EPOCH, Anonymous, Instant.EPOCH, Anonymous, doc)
val result = Index.active(project, id, 1, types, Instant.EPOCH, Anonymous, Instant.EPOCH, Anonymous, doc)
success(id, result)
}
}

def indexDeprecated(id: Iri, types: Set[Iri]) =
success(id, Index.deprecated(project, id, 1, types, Instant.EPOCH, Anonymous, Instant.EPOCH, Anonymous))

for {
r1 <- toIndex(resource1, expanded1)
r2 <- toIndex(resource2, expanded2)
r3 = success(resource3, GraphAnalyticsResult.Noop)
chunk = Chunk.seq(List(r1, r2, r3))
active1 <- indexActive(resource1, expanded1)
active2 <- indexActive(resource2, expanded2)
discarded = success(resource3, GraphAnalyticsResult.Noop)
deprecated = indexDeprecated(deprecatedResource, deprecatedResourceTypes)
chunk = Chunk.seq(List(active1, active2, discarded, deprecated))
// We expect no error
_ <- sink(chunk).assert(chunk.map(_.void))
// 2 documents should have been indexed correctly:
_ <- sink(chunk).assert(chunk.map(_.void))
// 3 documents should have been indexed correctly:
// - `resource1` with the relationship to `resource3` resolved
// - `resource2` with no reference resolved
_ <- client.count(index.value).eventually(2L)
expected1 <- ioJsonContentOf("result/resource1.json")
expected2 <- ioJsonContentOf("result/resource2.json")
_ <- client.getSource[Json](index, resource1.toString).eventually(expected1)
_ <- client.getSource[Json](index, resource2.toString).eventually(expected2)
// - `deprecatedResource` with only metadata, resolution is skipped
_ <- client.count(index.value).eventually(3L)
expected1 <- ioJsonContentOf("result/resource1.json")
expected2 <- ioJsonContentOf("result/resource2.json")
expectedDeprecated <- ioJsonContentOf("result/resource_deprecated.json")
_ <- client.getSource[Json](index, resource1.toString).eventually(expected1)
_ <- client.getSource[Json](index, resource2.toString).eventually(expected2)
_ <- client.getSource[Json](index, deprecatedResource.toString).eventually(expectedDeprecated)
} yield ()

}
Expand Down Expand Up @@ -145,7 +157,7 @@ class GraphAnalyticsSinkSuite
_ <- client.refresh(index)
expected1 <- ioJsonContentOf("result/resource1_updated.json")
expected2 <- ioJsonContentOf("result/resource2.json")
_ <- client.count(index.value).eventually(2L)
_ <- client.count(index.value).eventually(3L)
_ <- client.getSource[Json](index, resource1.toString).eventually(expected1)
_ <- client.getSource[Json](index, resource2.toString).eventually(expected2)
} yield ()
Expand Down
18 changes: 14 additions & 4 deletions docs/src/main/paradox/docs/delta/api/graph-analytics-api.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# Graph analytics

Graph analytics are a functionality introduced by the `graph-analytics` plugin and rooted in the `/v1/graph-analytics/{org_label}/{project_label}` collection.
They provide ways to get insights about the data and their relationships in terms of types (@type field).

Graph analytics is a feature introduced by the `graph-analytics` plugin and rooted in the `/v1/graph-analytics/{org_label}/{project_label}` collection.

It runs for each project and it parses and breaks down non-deprecated resources to analyse their structure.
For each of these resources, it extracts the following information:

* Its properties: their path and the type of the associated value
* Its relationships, that is to say the other resources in the same project it points to.

@@@ note { .tip title="Authorization notes" }

Expand Down Expand Up @@ -90,7 +94,13 @@ An example of the ElasticSearch Document looks as follows:
{
"@id": "http://example.com/Anna",
"@type": "http://schema.org/Person",
"_project": "myorg/myproject",
"_rev": 4,
"_deprecated": false,
"_createdAt": "2023-06-01T00:00:00Z",
"_createdBy": { "@type": "User", "realm": "bbp", "subject": "Bob" },
"_updatedAt": "2023-06-12T00:00:00Z",
"_updatedBy": { "@type": "User", "realm": "bbp", "subject": "Alice" },
"properties": [
{
"dataType": "object",
Expand Down Expand Up @@ -132,7 +142,7 @@ An example of the ElasticSearch Document looks as follows:
"references": [
{
"found": true,
"@id": "http://example.com/Robert",
"@id": "http://example.com/Robert"
}
],
"relationships": [
Expand Down

0 comments on commit cbabe96

Please sign in to comment.